Merge branch 'main' into shuguang/error_handling

2026-06-17 15:25:17 +02:00 · 2025-03-03 13:12:31 -08:00 · 2025-03-03 13:12:31 -08:00 · 6be6cc6346
commit 6be6cc6346
parent d00034dab0 10cad4d0b7
150 changed files with 2416 additions and 6445 deletions
--- a/.github/workflows/docker-push.yml
+++ b/.github/workflows/docker-push.yml
@ -1,40 +1,97 @@
 name: Publish Docker image

+env:
+  DOCKER_IMAGE: katanemo/archgw
+
 on:
-  release:
-    types: [published]
+  push:
+    branches:
+      - main

 jobs:
-  push_to_registry:
-    name: Push Docker image to Docker Hub
-    runs-on: ubuntu-latest
-    permissions:
-      packages: write
-      contents: read
-      attestations: write
-      id-token: write
+  # Build ARM64 image on native ARM64 runner
+  build-arm64:
+    runs-on: [linux-arm64]
    steps:
-      - name: Check out the repo
+      - name: Checkout Repository
        uses: actions/checkout@v4

      - name: Log in to Docker Hub
-        uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}

      - name: Extract metadata (tags, labels) for Docker
        id: meta
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+        uses: docker/metadata-action@v5
        with:
-          images: katanemo/archgw
+          images: ${{ env.DOCKER_IMAGE }}
+          tags: |
+            type=raw,value=latest  # Force the tag to be "latest"

-      - name: Build and push Docker image
-        id: push
-        uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
+      - name: Build and Push ARM64 Image
+        uses: docker/build-push-action@v5
        with:
          context: .
          file: ./arch/Dockerfile
+          platforms: linux/arm64
          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
+          tags: ${{ steps.meta.outputs.tags }}-arm64
+
+  # Build AMD64 image on GitHub's AMD64 runner
+  build-amd64:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_IMAGE }}
+          tags: |
+            type=raw,value=latest  # Force the tag to be "latest"
+
+      - name: Build and Push AMD64 Image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: ./arch/Dockerfile
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}-amd64
+
+
+  # Combine ARM64 and AMD64 images into a multi-arch manifest
+  create-manifest:
+    runs-on: ubuntu-latest
+    needs: [build-arm64, build-amd64]  # Wait for both builds
+    steps:
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_IMAGE }}
+          tags: |
+            type=raw,value=latest  # Force the tag to be "latest"
+
+      - name: Create Multi-Arch Manifest
+        run: |
+          # Combine the architecture-specific images into a "latest" manifest
+          docker buildx imagetools create -t ${{ steps.meta.outputs.tags }} \
+            ${{ env.DOCKER_IMAGE }}:latest-arm64 \
+            ${{ env.DOCKER_IMAGE }}:latest-amd64
--- a/.github/workflows/e2e_archgw.yml
+++ b/.github/workflows/e2e_archgw.yml
@ -7,7 +7,7 @@ on:
  pull_request:

 jobs:
-  test:
+  e2e_archgw_tests:
    runs-on: ubuntu-latest-m
    defaults:
      run:
--- a/.github/workflows/e2e_model_server.yml
+++ b/.github/workflows/e2e_model_server.yml
@ -7,7 +7,7 @@ on:
  pull_request:

 jobs:
-  test:
+  e2e_model_server_tests:
    runs-on: ubuntu-latest-m
    defaults:
      run:
--- a/.github/workflows/e2e_test_demos.yml
+++ b/.github/workflows/e2e_test_demos.yml
@ -7,7 +7,7 @@ on:
  pull_request:

 jobs:
-  test:
+  e2e_demo_tests:
    runs-on: ubuntu-latest-m

    steps:
@ -37,7 +37,7 @@ jobs:
          source venv/bin/activate
          cd model_server/ && echo "installing model server" && poetry install
          cd ../arch/tools && echo "installing archgw cli" && poetry install
-          cd ../../demos/test_runner && echo "installing test dependencies" && poetry install
+          cd ../../demos/shared/test_runner && echo "installing test dependencies" && poetry install

      - name: run demo tests
        env:
@ -45,4 +45,4 @@ jobs:
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
        run: |
          source venv/bin/activate
-          cd demos/test_runner && sh run_demo_tests.sh
+          cd demos/shared/test_runner && sh run_demo_tests.sh
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@ -7,7 +7,7 @@ on:
  pull_request:

 jobs:
-  test:
+  e2e_tests:
    runs-on: ubuntu-latest

    steps:
--- a/.github/workflows/validate_arch_config.yml
+++ b/.github/workflows/validate_arch_config.yml
@ -0,0 +1,31 @@
+name: arch config tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  validate_arch_config:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: .
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      - name: build arch docker image
+        run: |
+          docker build  -f arch/Dockerfile . -t katanemo/archgw
+
+      - name: validate arch config
+        run: |
+          bash arch/validate_arch_config.sh
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -25,9 +25,7 @@ repos:
        name: cargo-test
        language: system
        types: [file, rust]
-        # --lib is to only test the library, since when integration tests are made,
-        # they will be in a seperate tests directory
-        entry: bash -c "cd crates/llm_gateway && cargo test --lib"
+        entry: bash -c "cd crates && cargo test --lib"

  - repo: https://github.com/psf/black
    rev: 23.1.0
--- a/README.md
+++ b/README.md
@ -1,30 +1,41 @@
-<p align="center">
+<div align="center">
  <img src="docs/source/_static/img/arch-logo.png" alt="Arch Logo" width="75%" heigh=auto>
-</p>
-<p align="center">
-<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
-</p>
+</div>
+<div align="center">

-Arch is an **intelligent (edge and LLM) proxy designed for agentic applications** - to help you protect, observe, and build agentic tasks by simply connecting (existing) APIs.
+_Arch is an intelligent (edge and LLM) proxy designed for agentic applications - to help you protect, observe, and build agentic tasks by simply connecting (existing) APIs._

-Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that:
-
->Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.*
+[Quickstart](#Quickstart) •
+[Demos](#Demos) •
+[Build agentic apps with Arch](#Build-AI-Agent-with-Arch-Gateway) •
+[Use Arch as an LLM router](#Use-Arch-Gateway-as-LLM-Router) •
+[Documentation](https://docs.archgw.com) •
+[Contact](#Contact)

 [![pre-commit](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml)
 [![rust tests (prompt and llm gateway)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml)
 [![e2e tests](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml)
 [![Build and Deploy Documentation](https://github.com/katanemo/arch/actions/workflows/static.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/static.yml)
+</div>

-Arch is engineered with purpose-built LLMs to handle critical but undifferentiated tasks related to the handling and processing of prompts. This includes detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligent task routing for improved accuracy, mapping user request into "backend" functions, and managing the observability of prompts and LLM API calls in a centralized way.
+# Overview
+<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>


+Arch Gateway was built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that:
+
+>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.*
+
+
+Arch is engineered with purpose-built LLMs to handle critical but pesky tasks related to the handling and processing of prompts. This includes detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intent-based routing for improved task accuracy, mapping user request into "backend" functions, and managing the observability of prompts and LLM API calls in a centralized way.
+
 **Core Features**:
-  - Built on [Envoy](https://envoyproxy.io): Arch runs alongside application servers as a separate containerized process, and builds on top of Envoy's proven HTTP management and scalability features to handle ingress and egress traffic related to prompts and LLMs.
-  - Task Routing & Fast Function Calling. Engineered with purpose-built [LLMs](https://huggingface.co/collections/katanemo/arch-function-66f209a693ea8df14317ad68) to handle fast, cost-effective, and accurate prompt-based tasks like function/API calling, and parameter extraction from prompts to build more task-accurate agentic applications.
-  - Prompt [Guard](https://huggingface.co/collections/katanemo/arch-guard-6702bdc08b889e4bce8f446d): Arch centralizes guardrails to prevent jailbreak attempts and ensure safe user interactions without writing a single line of code.
-  - Routing & Traffic Management: Arch centralizes calls to LLMs used by your applications, offering smart retries, automatic cutover, and resilient upstream connections for continuous availability.
-  - Observability: Arch uses the W3C Trace Context standard to enable complete request tracing across applications, ensuring compatibility with observability tools, and provides metrics to monitor latency, token usage, and error rates, helping optimize AI application performance.
+
+  - **Intent-based prompt routing & fast ⚡ function-calling via APIs**. Engineered with purpose-built [LLMs](https://huggingface.co/collections/katanemo/arch-function-66f209a693ea8df14317ad68) to handle fast, cost-effective, and accurate prompt-based tasks like function/API calling, and parameter extraction from prompts to build more task-accurate agentic applications.
+  - **Prompt [Guard](https://huggingface.co/collections/katanemo/arch-guard-6702bdc08b889e4bce8f446d)**: Arch centralizes guardrails to prevent jailbreak attempts and ensure safe user interactions without writing a single line of code.
+  - **LLM Routing & Traffic Management**: Arch centralizes calls to LLMs used by your applications, offering smart retries, automatic cutover, and resilient upstream connections for continuous availability.
+  - **Observability**: Arch uses the W3C Trace Context standard to enable complete request tracing across applications, ensuring compatibility with observability tools, and provides metrics to monitor latency, token usage, and error rates, helping optimize AI application performance.
+  - **Built on [Envoy](https://envoyproxy.io)**: Arch runs alongside application servers as a separate containerized process, and builds on top of Envoy's proven HTTP management and scalability features to handle ingress and egress traffic related to prompts and LLMs.

 **High-Level Sequence Diagram**:
 ![alt text](docs/source/_static/img/arch_network_diagram_high_level.png)
@ -38,9 +49,9 @@ Arch is engineered with purpose-built LLMs to handle critical but undifferentiat
 To get in touch with us, please join our [discord server](https://discord.gg/pGZf2gcwEc). We will be monitoring that actively and offering support there.

 ## Demos
-* [Weather Forecast](demos/weather_forecast/README.md) - Walk through of the core function calling capabilities of arch gateway using weather forecasting service
-* [Insurance Agent](demos/insurance_agent/README.md) - Build a full insurance agent with Arch
-* [Network Agent](demos/network_agent/README.md) - Build a networking co-pilot/agent agent with Arch
+* [Sample App: Weather Forecast Agent](demos/samples_python/weather_forecast/README.md) - A sample agentic weather forecasting app that highlights core function calling capabilities of Arch.
+* [Sample App: Network Operator Agent](demos/samples_python/network_switch_operator_agent/README.md) - A simple network device switch operator agent that can retrive device statistics and reboot them.
+* [User Case: Connecting to SaaS APIs](demos/use_cases/spotify_bearer_auth) - Connect 3rd party SaaS APIs to your agentic chat experience.

 ## Quickstart

@ -62,7 +73,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
 ```console
 $ python -m venv venv
 $ source venv/bin/activate   # On Windows, use: venv\Scripts\activate
-$ pip install archgw==0.2.0
+$ pip install archgw==0.2.2
 ```

 ### Build AI Agent with Arch Gateway
--- a/arch/Dockerfile
+++ b/arch/Dockerfile
@ -8,7 +8,7 @@ RUN cd prompt_gateway && cargo build --release --target wasm32-wasip1
 RUN cd llm_gateway && cargo build --release --target wasm32-wasip1

 # copy built filter into envoy image
-FROM envoyproxy/envoy:v1.32-latest as envoy
+FROM docker.io/envoyproxy/envoy:v1.32-latest as envoy

 #Build config generator, so that we have a single build image for both Rust and Python
 FROM python:3.12-slim as arch
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -3,21 +3,38 @@ type: object
 properties:
  version:
    type: string
-  listener:
+  listeners:
    type: object
-    properties:
-      address:
-        type: string
-      port:
-        type: integer
-      message_format:
-        type: string
-      connect_timeout:
-        type: string
    additionalProperties: false
-    required:
-      - address
-      - port
+    properties:
+      ingress_traffic:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+          timeout:
+            type: string
+        additionalProperties: false
+      egress_traffic:
+        type: object
+        properties:
+          address:
+            type: string
+          port:
+            type: integer
+          message_format:
+            type: string
+            enum:
+              - openai
+          timeout:
+            type: string
+        additionalProperties: false
  endpoints:
    type: object
    patternProperties:
@ -79,6 +96,8 @@ properties:
    properties:
      prompt_target_intent_matching_threshold:
        type: number
+      optimize_context_window:
+        type: boolean
  system_prompt:
    type: string
  prompt_targets:
@ -105,7 +124,10 @@ properties:
              required:
                type: boolean
              default:
-                type: string
+                anyOf:
+                  - type: string
+                  - type: integer
+                  - type: boolean
              description:
                type: string
              type:
@ -113,7 +135,10 @@ properties:
              enum:
                type: array
                items:
-                  type: string
+                  anyOf:
+                    - type: string
+                    - type: integer
+                    - type: boolean
              in_path:
                type: boolean
              format:
@ -135,6 +160,10 @@ properties:
              enum:
                - GET
                - POST
+            http_headers:
+              type: object
+              additionalProperties:
+                type: string
          additionalProperties: false
          required:
            - name
@ -218,5 +247,4 @@ properties:
 additionalProperties: false
 required:
  - version
-  - listener
  - llm_providers
--- a/arch/docker-compose.dev.yaml
+++ b/arch/docker-compose.dev.yaml
@ -8,7 +8,7 @@ services:
      - "12000:12000"
      - "19901:9901"
    volumes:
-      - ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
+      - ${ARCH_CONFIG_FILE:-../demos/samples_python/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
      - ./envoy.template.yaml:/app/envoy.template.yaml
      - ./arch_config_schema.yaml:/app/arch_config_schema.yaml
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -29,11 +29,11 @@ stats_config:
      - 180000
 static_resources:
  listeners:
-    - name: arch_listener_http
+    - name: ingress_traffic
      address:
        socket_address:
-          address: 0.0.0.0
-          port_value: 10000
+          address: {{ prompt_gateway_listener.address }}
+          port_value: {{ prompt_gateway_listener.port }}
      traffic_direction: INBOUND
      filter_chains:
        - filters:
@ -55,7 +55,7 @@ static_resources:
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
-                stat_prefix: arch_listener_http
+                stat_prefix: ingress_traffic
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
@ -76,13 +76,13 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: arch_prompt_gateway_listener
-                            timeout: 60s
+                            timeout: {{ prompt_gateway_listener.timeout }}
                http_filters:
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-    - name: arch_prompt_gateway_listener
+    - name: ingress_traffic_prompt
      address:
        socket_address:
          address: 0.0.0.0
@ -104,11 +104,11 @@ static_resources:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
-                      service_name: prompt_processor
+                      service_name: ingress_traffic
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
-                stat_prefix: arch_prompt_gateway_listener
+                stat_prefix: ingress_traffic
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
@ -201,7 +201,7 @@ static_resources:
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-    - name: arch_internal
+    - name: egress_api_traffic
      address:
        socket_address:
          address: 0.0.0.0
@ -223,11 +223,11 @@ static_resources:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
-                      service_name: prompt_processor
+                      service_name: egress_api_traffic
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
-                stat_prefix: arch_internal
+                stat_prefix: egress_api_traffic
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
@ -273,13 +273,12 @@ static_resources:
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-
-    - name: arch_listener_http_llm
+    - name: egress_traffic
      address:
        socket_address:
-          address: 0.0.0.0
-          port_value: 12000
-      traffic_direction: INBOUND
+          address: {{ llm_gateway_listener.address }}
+          port_value: {{ llm_gateway_listener.port }}
+      traffic_direction: OUTBOUND
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
@ -300,7 +299,7 @@ static_resources:
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
-                stat_prefix: arch_listener_http
+                stat_prefix: egress_traffic
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
@ -321,14 +320,13 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: arch_listener_llm
-                            timeout: 60s
+                            timeout: {{ llm_gateway_listener.timeout }}
                http_filters:
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

-
-    - name: arch_listener_llm
+    - name: egress_traffic_llm
      address:
        socket_address:
          address: 0.0.0.0
@ -349,11 +347,11 @@ static_resources:
                        envoy_grpc:
                          cluster_name: opentelemetry_collector
                        timeout: 0.250s
-                      service_name: llm_gateway
+                      service_name: egress_traffic_llm
                  random_sampling:
                    value: {{ arch_tracing.random_sampling }}
                {% endif %}
-                stat_prefix: arch_listener_http
+                stat_prefix: egress_traffic
                codec_type: AUTO
                scheme_header_transformation:
                  scheme_to_overwrite: https
@ -443,7 +441,7 @@ static_resources:

  clusters:
    - name: openai
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -467,7 +465,7 @@ static_resources:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3
    - name: mistral
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -486,14 +484,14 @@ static_resources:
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.mistral.ai
-    {% for internal_clustrer in ["arch_fc", "model_server"] %}
-    - name: {{ internal_clustrer }}
-      connect_timeout: 5s
+    {% for internal_cluster in ["arch_fc", "model_server"] %}
+    - name: {{ internal_cluster }}
+      connect_timeout: 0.5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
-        cluster_name: {{ internal_clustrer }}
+        cluster_name: {{ internal_cluster }}
        endpoints:
          - lb_endpoints:
              - endpoint:
@ -501,10 +499,10 @@ static_resources:
                    socket_address:
                      address: host.docker.internal
                      port_value: $MODEL_SERVER_PORT
-                  hostname: {{ internal_clustrer }}
+                  hostname: {{ internal_cluster }}
    {% endfor %}
    - name: mistral_7b_instruct
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: STRICT_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -523,7 +521,7 @@ static_resources:
      {% if cluster.connect_timeout -%}
      connect_timeout: {{ cluster.connect_timeout }}
      {% else -%}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      {% endif -%}
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
@ -557,7 +555,7 @@ static_resources:

 {% for local_llm_provider in local_llms %}
    - name: {{ local_llm_provider.name }}
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -589,7 +587,7 @@ static_resources:

 {% endfor %}
    - name: arch_internal
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -605,7 +603,7 @@ static_resources:
                  hostname: arch_internal

    - name: arch_prompt_gateway_listener
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
@ -621,7 +619,7 @@ static_resources:
                  hostname: arch_prompt_gateway_listener

    - name: arch_listener_llm
-      connect_timeout: 5s
+      connect_timeout: 0.5s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
--- a/arch/tools/README.md
+++ b/arch/tools/README.md
@ -19,7 +19,7 @@ source venv/bin/activate

 ### Step 3: Run the build script
 ```bash
-pip install archgw==0.2.0
+pip install archgw==0.2.2
 ```

 ## Uninstall Instructions: archgw CLI
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -104,7 +104,27 @@ def validate_and_render_schema():
    arch_config_string = yaml.dump(config_yaml)
    arch_llm_config_string = yaml.dump(config_yaml)

+    prompt_gateway_listener = config_yaml.get("listeners", {}).get(
+        "ingress_traffic", {}
+    )
+    if prompt_gateway_listener.get("port") == None:
+        prompt_gateway_listener["port"] = 10000  # default port for prompt gateway
+    if prompt_gateway_listener.get("address") == None:
+        prompt_gateway_listener["address"] = "127.0.0.1"
+    if prompt_gateway_listener.get("timeout") == None:
+        prompt_gateway_listener["timeout"] = "10s"
+
+    llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
+    if llm_gateway_listener.get("port") == None:
+        llm_gateway_listener["port"] = 12000  # default port for llm gateway
+    if llm_gateway_listener.get("address") == None:
+        llm_gateway_listener["address"] = "127.0.0.1"
+    if llm_gateway_listener.get("timeout") == None:
+        llm_gateway_listener["timeout"] = "10s"
+
    data = {
+        "prompt_gateway_listener": prompt_gateway_listener,
+        "llm_gateway_listener": llm_gateway_listener,
        "arch_config": arch_config_string,
        "arch_llm_config": arch_llm_config_string,
        "arch_clusters": inferred_clusters,
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -2,102 +2,49 @@ import subprocess
 import os
 import time
 import sys
-import glob
-import docker
-from docker.errors import DockerException
-from cli.utils import getLogger, update_docker_host_env
+
+import yaml
+from cli.utils import getLogger
 from cli.consts import (
-    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
    KATANEMO_LOCAL_MODEL_LIST,
-    MODEL_SERVER_LOG_FILE,
-    ACCESS_LOG_FILES,
 )
 from huggingface_hub import snapshot_download
-from dotenv import dotenv_values
+import subprocess
+from cli.docker_cli import (
+    docker_container_status,
+    docker_remove_container,
+    docker_start_archgw_detached,
+    docker_stop_container,
+    health_check_endpoint,
+    stream_gateway_logs,
+)


 log = getLogger(__name__)


-def start_archgw_docker(client, arch_config_file, env):
-    logs_path = "~/archgw_logs"
-    logs_path_abs = os.path.expanduser(logs_path)
+def _get_gateway_ports(arch_config_file: str) -> tuple:
+    PROMPT_GATEWAY_DEFAULT_PORT = 10000
+    LLM_GATEWAY_DEFAULT_PORT = 12000

-    return client.containers.run(
-        name=ARCHGW_DOCKER_NAME,
-        image=ARCHGW_DOCKER_IMAGE,
-        detach=True,  # Run in detached mode
-        ports={
-            "10000/tcp": 10000,
-            "10001/tcp": 10001,
-            "11000/tcp": 11000,
-            "12000/tcp": 12000,
-            "9901/tcp": 19901,
-        },
-        volumes={
-            f"{arch_config_file}": {
-                "bind": "/app/arch_config.yaml",
-                "mode": "ro",
-            },
-            "/etc/ssl/cert.pem": {"bind": "/etc/ssl/cert.pem", "mode": "ro"},
-            logs_path_abs: {"bind": "/var/log"},
-        },
-        environment={
-            "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
-            "MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
-            **env,
-        },
-        extra_hosts={"host.docker.internal": "host-gateway"},
-        healthcheck={
-            "test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
-            "interval": 5000000000,  # 5 seconds
-            "timeout": 1000000000,  # 1 seconds
-            "retries": 3,
-        },
+    # parse arch_config_file yaml file and get prompt_gateway_port
+    arch_config_dict = {}
+    with open(arch_config_file) as f:
+        arch_config_dict = yaml.safe_load(f)
+
+    prompt_gateway_port = (
+        arch_config_dict.get("listeners", {})
+        .get("ingress_traffic", {})
+        .get("port", PROMPT_GATEWAY_DEFAULT_PORT)
+    )
+    llm_gateway_port = (
+        arch_config_dict.get("listeners", {})
+        .get("egress_traffic", {})
+        .get("port", LLM_GATEWAY_DEFAULT_PORT)
    )

-
-def stream_gateway_logs(follow):
-    """
-    Stream logs from the arch gateway service.
-    """
-    log.info("Logs from arch gateway service.")
-
-    options = ["docker", "logs", "archgw"]
-    if follow:
-        options.append("-f")
-    try:
-        # Run `docker-compose logs` to stream logs from the gateway service
-        subprocess.run(
-            options,
-            check=True,
-            stdout=sys.stdout,
-            stderr=sys.stderr,
-        )
-
-    except subprocess.CalledProcessError as e:
-        log.info(f"Failed to stream logs: {str(e)}")
-
-
-def stream_access_logs(follow):
-    """
-    Get the archgw access logs
-    """
-    log_file_pattern_expanded = os.path.expanduser(ACCESS_LOG_FILES)
-    log_files = glob.glob(log_file_pattern_expanded)
-
-    stream_command = ["tail"]
-    if follow:
-        stream_command.append("-f")
-
-    stream_command.extend(log_files)
-    subprocess.run(
-        stream_command,
-        check=True,
-        stdout=sys.stdout,
-        stderr=sys.stderr,
-    )
+    return prompt_gateway_port, llm_gateway_port


 def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
@ -111,55 +58,58 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
    log.info("Starting arch gateway")

    try:
-        try:
-            client = docker.from_env()
-        except DockerException as e:
-            # try setting up the docker host environment variable and retry
-            update_docker_host_env()
-            client = docker.from_env()
+        archgw_container_status = docker_container_status(ARCHGW_DOCKER_NAME)
+        if archgw_container_status != "not found":
+            log.info("archgw found in docker, stopping and removing it")
+            docker_stop_container(ARCHGW_DOCKER_NAME)
+            docker_remove_container(ARCHGW_DOCKER_NAME)

-        try:
-            container = client.containers.get("archgw")
-            log.info("archgw container found in docker, stopping and removing it")
-            # ensure that previous docker container is stopped and removed
-            container.stop()
-            container.remove()
-            log.info("Stopped and removed archgw container")
-        except docker.errors.NotFound as e:
-            pass
+        prompt_gateway_port, llm_gateway_port = _get_gateway_ports(arch_config_file)

-        container = start_archgw_docker(client, arch_config_file, env)
+        return_code, _, archgw_stderr = docker_start_archgw_detached(
+            arch_config_file,
+            os.path.expanduser("~/archgw_logs"),
+            env,
+            prompt_gateway_port,
+            llm_gateway_port,
+        )
+        if return_code != 0:
+            log.info("Failed to start arch gateway: " + str(return_code))
+            log.info("stderr: " + archgw_stderr)
+            sys.exit(1)

        start_time = time.time()
-
        while True:
-            container = client.containers.get(container.id)
+            prompt_gateway_health_check_status = health_check_endpoint(
+                f"http://localhost:{prompt_gateway_port}/healthz"
+            )
+
+            llm_gateway_health_check_status = health_check_endpoint(
+                f"http://localhost:{llm_gateway_port}/healthz"
+            )
+
+            archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
            current_time = time.time()
            elapsed_time = current_time - start_time

            # Check if timeout is reached
            if elapsed_time > log_timeout:
-                log.info(f"Stopping log monitoring after {log_timeout} seconds.")
+                log.info(f"stopping log monitoring after {log_timeout} seconds.")
                break

-            container_status = container.attrs["State"]["Health"]["Status"]
-
-            if container_status == "healthy":
-                log.info("Container is healthy!")
+            if prompt_gateway_health_check_status or llm_gateway_health_check_status:
+                log.info("archgw is running and is healthy!")
                break
            else:
-                log.info(f"Container health status: {container_status}")
+                log.info(f"archgw status: {archgw_status}, health status: starting")
                time.sleep(1)

        if foreground:
-            for line in container.logs(stream=True):
-                print(line.decode("utf-8").strip("\n"))
+            stream_gateway_logs(follow=True)

    except KeyboardInterrupt:
        log.info("Keyboard interrupt received, stopping arch gateway service.")
        stop_arch()
-    except docker.errors.APIError as e:
-        log.info(f"Failed to start Arch: {str(e)}")


 def stop_arch():
@ -173,10 +123,10 @@ def stop_arch():

    try:
        subprocess.run(
-            ["docker", "stop", "archgw"],
+            ["docker", "stop", ARCHGW_DOCKER_NAME],
        )
        subprocess.run(
-            ["docker", "remove", "archgw"],
+            ["docker", "rm", ARCHGW_DOCKER_NAME],
        )

        log.info("Successfully shut down arch gateway service.")
--- a/arch/tools/cli/docker_cli.py
+++ b/arch/tools/cli/docker_cli.py
@ -0,0 +1,132 @@
+import subprocess
+import json
+import sys
+import requests
+
+from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
+from cli.utils import getLogger
+
+log = getLogger(__name__)
+
+
+def docker_container_status(container: str) -> str:
+    result = subprocess.run(
+        ["docker", "inspect", "--type=container", container],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if result.returncode != 0:
+        return "not found"
+
+    container_status = json.loads(result.stdout)[0]
+    return container_status.get("State", {}).get("Status", "")
+
+
+def docker_stop_container(container: str) -> str:
+    result = subprocess.run(
+        ["docker", "stop", container], capture_output=True, text=True, check=False
+    )
+    return result.returncode
+
+
+def docker_remove_container(container: str) -> str:
+    result = subprocess.run(
+        ["docker", "rm", container], capture_output=True, text=True, check=False
+    )
+    return result.returncode
+
+
+def docker_start_archgw_detached(
+    arch_config_file: str,
+    logs_path_abs: str,
+    env: dict,
+    prompt_gateway_port,
+    llm_gateway_port,
+) -> str:
+    env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
+
+    port_mappings = [
+        f"{prompt_gateway_port}:{prompt_gateway_port}",
+        f"{llm_gateway_port}:{llm_gateway_port}",
+        "9901:19901",
+    ]
+    port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
+
+    volume_mappings = [
+        f"{logs_path_abs}:/var/log:rw",
+        f"{arch_config_file}:/app/arch_config.yaml:ro",
+    ]
+    volume_mappings_args = [
+        item for volume in volume_mappings for item in ("-v", volume)
+    ]
+
+    options = [
+        "docker",
+        "run",
+        "-d",
+        "--name",
+        ARCHGW_DOCKER_NAME,
+        *port_mappings_args,
+        *volume_mappings_args,
+        *env_args,
+        "--add-host",
+        "host.docker.internal:host-gateway",
+        ARCHGW_DOCKER_IMAGE,
+    ]
+
+    result = subprocess.run(options, capture_output=True, text=True, check=False)
+    return result.returncode, result.stdout, result.stderr
+
+
+def health_check_endpoint(endpoint: str) -> bool:
+    try:
+        response = requests.get(endpoint)
+        if response.status_code == 200:
+            return True
+    except requests.RequestException as e:
+        pass
+    return False
+
+
+def stream_gateway_logs(follow):
+    """
+    Stream logs from the arch gateway service.
+    """
+    log.info("Logs from arch gateway service.")
+
+    options = ["docker", "logs"]
+    if follow:
+        options.append("-f")
+    options.append(ARCHGW_DOCKER_NAME)
+    try:
+        # Run `docker-compose logs` to stream logs from the gateway service
+        subprocess.run(
+            options,
+            check=True,
+            stdout=sys.stdout,
+            stderr=sys.stderr,
+        )
+
+    except subprocess.CalledProcessError as e:
+        log.info(f"Failed to stream logs: {str(e)}")
+
+
+def docker_validate_archgw_schema(arch_config_file):
+    result = subprocess.run(
+        [
+            "docker",
+            "run",
+            "--rm",
+            "-v",
+            f"{arch_config_file}:/app/arch_config.yaml:ro",
+            "--entrypoint",
+            "python",
+            ARCHGW_DOCKER_IMAGE,
+            "config_generator.py",
+        ],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    return result.returncode, result.stdout, result.stderr
--- a/arch/tools/cli/main.py
+++ b/arch/tools/cli/main.py
@ -5,11 +5,12 @@ import subprocess
 import multiprocessing
 import importlib.metadata
 from cli import targets
+from cli.docker_cli import docker_validate_archgw_schema, stream_gateway_logs
 from cli.utils import (
    getLogger,
    get_llm_provider_access_keys,
    load_env_file_to_dict,
-    validate_schema,
+    stream_access_logs,
 )
 from cli.core import (
    start_arch_modelserver,
@ -17,12 +18,9 @@ from cli.core import (
    start_arch,
    stop_arch,
    download_models_from_hf,
-    stream_access_logs,
-    stream_gateway_logs,
 )
 from cli.consts import (
    KATANEMO_DOCKERHUB_REPO,
-    KATANEMO_LOCAL_MODEL_LIST,
    SERVICE_NAME_ARCHGW,
    SERVICE_NAME_MODEL_SERVER,
    SERVICE_ALL,
@ -174,17 +172,24 @@ def up(file, path, service, foreground):

    log.info(f"Validating {arch_config_file}")

-    try:
-        validate_schema(arch_config_file)
-    except Exception as e:
-        log.info(f"Exiting archgw up: validation failed")
-        log.info(f"Error: {str(e)}")
+    (
+        validation_return_code,
+        validation_stdout,
+        validation_stderr,
+    ) = docker_validate_archgw_schema(arch_config_file)
+    if validation_return_code != 0:
+        log.info(f"Error: Validation failed. Exiting")
+        log.info(f"Validation stdout: {validation_stdout}")
+        log.info(f"Validation stderr: {validation_stderr}")
        sys.exit(1)

    log.info("Starting arch model server and arch gateway")

    # Set the ARCH_CONFIG_FILE environment variable
-    env_stage = {}
+    env_stage = {
+        "OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
+        "MODEL_SERVER_PORT": os.getenv("MODEL_SERVER_PORT", "51000"),
+    }
    env = os.environ.copy()
    # check if access_keys are preesnt in the config file
    access_keys = get_llm_provider_access_keys(arch_config_file=arch_config_file)
--- a/arch/tools/cli/targets.py
+++ b/arch/tools/cli/targets.py
@ -2,7 +2,6 @@ import ast
 import sys
 import yaml
 from typing import Any
-from pydantic import BaseModel

 FLASK_ROUTE_DECORATORS = ["route", "get", "post", "put", "delete", "patch"]
 FASTAPI_ROUTE_DECORATORS = ["get", "post", "put", "delete", "patch"]
--- a/arch/tools/cli/utils.py
+++ b/arch/tools/cli/utils.py
@ -1,10 +1,11 @@
+import glob
 import os
+import subprocess
+import sys
 import yaml
 import logging
-import docker
-from docker.errors import DockerException

-from cli.consts import ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME
+from cli.consts import ACCESS_LOG_FILES

 logging.basicConfig(
    level=logging.INFO,
@ -21,63 +22,6 @@ def getLogger(name="cli"):
 log = getLogger(__name__)


-def update_docker_host_env():
-    """
-    Update DOCKER_HOST environment variable to use the local Docker socket
-    """
-    if os.getenv("DOCKER_HOST"):
-        return
-
-    default_docker_socket = os.getenv("DEFAULT_DOCKER_SOCKET", "/var/run/docker.sock")
-    if not os.path.exists(default_docker_socket):
-        home_dir = os.getenv("HOME")
-        docker_host = f"unix://{home_dir}/.docker/run/docker.sock"
-        log.info(
-            f"Default docker socket {default_docker_socket} not found, using {docker_host}"
-        )
-        os.environ["DOCKER_HOST"] = docker_host
-
-
-def validate_schema(arch_config_file: str) -> None:
-    try:
-        try:
-            client = docker.from_env()
-        except DockerException as e:
-            # try setting up the docker host environment variable and retry
-            update_docker_host_env()
-            client = docker.from_env()
-
-        container = client.containers.run(
-            image=ARCHGW_DOCKER_IMAGE,
-            volumes={
-                f"{arch_config_file}": {
-                    "bind": "/app/arch_config.yaml",
-                    "mode": "ro",
-                },
-            },
-            entrypoint=["python", "config_generator.py"],
-            detach=True,
-        )
-
-        # Wait for the container to finish and get the exit code
-        exit_code = container.wait()
-
-        # Check exit code for validation success
-        if exit_code["StatusCode"] != 0:
-            # Validation failed (non-zero exit code)
-            logs = container.logs().decode()  # Get container logs for debugging
-            raise ValueError(
-                f"Validation failed. Container exited with code {exit_code}.\nLogs:\n{logs}"
-            )
-
-        # Successful validation (exit code 0)
-        log.info("Schema validation successful!")
-
-    except docker.errors.APIError as e:
-        # Handle container creation error
-        raise ValueError(f"Failed to create container: {e}")
-
-
 def get_llm_provider_access_keys(arch_config_file):
    with open(arch_config_file, "r") as file:
        arch_config = file.read()
@ -89,6 +33,18 @@ def get_llm_provider_access_keys(arch_config_file):
        if acess_key is not None:
            access_key_list.append(acess_key)

+    for prompt_target in arch_config_yaml.get("prompt_targets", []):
+        for k, v in prompt_target.get("endpoint", {}).get("http_headers", {}).items():
+            if k.lower() == "authorization":
+                print(
+                    f"found auth header: {k} for prompt_target: {prompt_target.get('name')}/{prompt_target.get('endpoint').get('name')}"
+                )
+                auth_tokens = v.split(" ")
+                if len(auth_tokens) > 1:
+                    access_key_list.append(auth_tokens[1])
+                else:
+                    access_key_list.append(v)
+
    return access_key_list


@ -115,3 +71,23 @@ def load_env_file_to_dict(file_path):
                env_dict[key] = value

    return env_dict
+
+
+def stream_access_logs(follow):
+    """
+    Get the archgw access logs
+    """
+    log_file_pattern_expanded = os.path.expanduser(ACCESS_LOG_FILES)
+    log_files = glob.glob(log_file_pattern_expanded)
+
+    stream_command = ["tail"]
+    if follow:
+        stream_command.append("-f")
+
+    stream_command.extend(log_files)
+    subprocess.run(
+        stream_command,
+        check=True,
+        stdout=sys.stdout,
+        stderr=sys.stderr,
+    )
--- a/arch/tools/poetry.lock
+++ b/arch/tools/poetry.lock
@ -2,7 +2,7 @@

 [[package]]
 name = "archgw_modelserver"
-version = "0.2.0"
+version = "0.2.2"
 description = "A model server for serving models"
 optional = false
 python-versions = "*"
@ -15,13 +15,13 @@ url = "../../model_server"

 [[package]]
 name = "attrs"
-version = "24.3.0"
+version = "25.1.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"},
-    {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"},
+    {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
+    {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
 ]

 [package.extras]
@ -32,118 +32,6 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
 tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]

-[[package]]
-name = "certifi"
-version = "2024.12.14"
-description = "Python package for providing Mozilla's CA Bundle."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"},
-    {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"},
-]
-
-[[package]]
-name = "charset-normalizer"
-version = "3.4.1"
-description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"},
-    {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"},
-    {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"},
-    {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"},
-    {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"},
-    {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"},
-    {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"},
-    {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"},
-    {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"},
-    {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"},
-]
-
 [[package]]
 name = "click"
 version = "8.1.8"
@ -169,42 +57,6 @@ files = [
    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]

-[[package]]
-name = "docker"
-version = "7.1.0"
-description = "A Python library for the Docker Engine API."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
-    {file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
-]
-
-[package.dependencies]
-pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
-requests = ">=2.26.0"
-urllib3 = ">=1.26.0"
-
-[package.extras]
-dev = ["coverage (==7.2.7)", "pytest (==7.4.2)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.1.0)", "ruff (==0.1.8)"]
-docs = ["myst-parser (==0.18.0)", "sphinx (==5.1.1)"]
-ssh = ["paramiko (>=2.4.3)"]
-websockets = ["websocket-client (>=1.3.0)"]
-
-[[package]]
-name = "idna"
-version = "3.10"
-description = "Internationalized Domain Names in Applications (IDNA)"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
-    {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
-]
-
-[package.extras]
-all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
-
 [[package]]
 name = "jinja2"
 version = "3.1.5"
@ -328,55 +180,76 @@ files = [
 ]

 [[package]]
-name = "python-dotenv"
-version = "1.0.1"
-description = "Read key-value pairs from a .env file and set them as environment variables"
+name = "pyyaml"
+version = "6.0.2"
+description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
-    {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
-]
-
-[package.extras]
-cli = ["click (>=5.0)"]
-
-[[package]]
-name = "pywin32"
-version = "308"
-description = "Python for Window Extensions"
-optional = false
-python-versions = "*"
-files = [
-    {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
-    {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
-    {file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"},
-    {file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"},
-    {file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"},
-    {file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"},
-    {file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"},
-    {file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"},
-    {file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"},
-    {file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"},
-    {file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"},
-    {file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"},
-    {file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"},
-    {file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"},
-    {file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"},
-    {file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"},
-    {file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
-    {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
+    {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
+    {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
+    {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
+    {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
+    {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
+    {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
+    {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
+    {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
+    {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
+    {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
+    {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
+    {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
+    {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
+    {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
+    {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
+    {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
+    {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
+    {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
+    {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
+    {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
+    {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
+    {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
+    {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
+    {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
+    {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
 ]

 [[package]]
 name = "referencing"
-version = "0.36.1"
+version = "0.36.2"
 description = "JSON Referencing + Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "referencing-0.36.1-py3-none-any.whl", hash = "sha256:363d9c65f080d0d70bc41c721dce3c7f3e77fc09f269cd5c8813da18069a6794"},
-    {file = "referencing-0.36.1.tar.gz", hash = "sha256:ca2e6492769e3602957e9b831b94211599d2aade9477f5d44110d2530cf9aade"},
+    {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
+    {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
 ]

 [package.dependencies]
@ -384,27 +257,6 @@ attrs = ">=22.2.0"
 rpds-py = ">=0.7.0"
 typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""}

-[[package]]
-name = "requests"
-version = "2.32.3"
-description = "Python HTTP for Humans."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
-    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
-]
-
-[package.dependencies]
-certifi = ">=2017.4.17"
-charset-normalizer = ">=2,<4"
-idna = ">=2.5,<4"
-urllib3 = ">=1.21.1,<3"
-
-[package.extras]
-socks = ["PySocks (>=1.5.6,!=1.5.7)"]
-use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
-
 [[package]]
 name = "rpds-py"
 version = "0.22.3"
@ -548,24 +400,7 @@ files = [
    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]

-[[package]]
-name = "urllib3"
-version = "2.3.0"
-description = "HTTP library with thread-safe connection pooling, file post, and more."
-optional = false
-python-versions = ">=3.9"
-files = [
-    {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
-    {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
-]
-
-[package.extras]
-brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
-h2 = ["h2 (>=4,<5)"]
-socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
-zstd = ["zstandard (>=0.18.0)"]
-
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "59543baf4d462d4830e7228ba9eda8ae865416fdabd8ede129492ac45f1926f2"
+content-hash = "42553cae81e184f512ab48272a7d5f22efc9711b293c476fc736c1263d42f6dc"
--- a/arch/tools/pyproject.toml
+++ b/arch/tools/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "archgw"
-version = "0.2.0"
+version = "0.2.2"
 description = "Python-based CLI tool to manage Arch Gateway."
 authors = ["Katanemo Labs, Inc."]
 packages = [
@ -10,13 +10,12 @@ readme = "README.md"

 [tool.poetry.dependencies]
 python = "^3.10"
-archgw_modelserver = "^0.2.0"
+archgw_modelserver = "^0.2.2"
 click = "^8.1.7"
 jinja2 = "^3.1.4"
 jsonschema = "^4.23.0"
 setuptools = "75.5.0"
-docker = "^7.1.0"
-python-dotenv = "^1.0.1"
+pyyaml = "^6.0.2"

 [tool.poetry.scripts]
 archgw = "cli.main:main"
--- a/arch/validate_arch_config.sh
+++ b/arch/validate_arch_config.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+failed_files=()
+
+for file in $(find . -name arch_config.yaml -o -name arch_config_full_reference.yaml); do
+  echo "Validating $file..."
+  if ! docker run --rm -v "$(pwd)/$file:/app/arch_config.yaml:ro" --entrypoint /bin/sh katanemo/archgw:latest -c "python config_generator.py" 2>&1 > /dev/null ; then
+    echo "Validation failed for $file"
+    failed_files+=("$file")
+  fi
+done
+
+# Print summary of failed files
+if [ ${#failed_files[@]} -ne 0 ]; then
+  echo -e "\nValidation failed for the following files:"
+  printf '%s\n' "${failed_files[@]}"
+  exit 1
+else
+  echo -e "\nAll files validated successfully!"
+fi
--- a/crates/Cargo.lock
+++ b/crates/Cargo.lock
@ -234,6 +234,8 @@ dependencies = [
 "serde_yaml",
 "thiserror",
 "tiktoken-rs",
+ "url",
+ "urlencoding",
 ]

 [[package]]
@ -477,6 +479,17 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+]
+
 [[package]]
 name = "duration-string"
 version = "0.3.0"
@ -557,6 +570,15 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"

+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+
 [[package]]
 name = "futures"
 version = "0.3.31"
@ -782,12 +804,151 @@ dependencies = [
 "itoa",
 ]

+[[package]]
+name = "icu_collections"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_locid_transform_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locid_transform_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
+
+[[package]]
+name = "icu_normalizer"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "utf16_iter",
+ "utf8_iter",
+ "write16",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+
+[[package]]
+name = "icu_properties"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+dependencies = [
+ "displaydoc",
+ "icu_collections",
+ "icu_locid_transform",
+ "icu_properties_data",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+
+[[package]]
+name = "icu_provider"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+dependencies = [
+ "displaydoc",
+ "icu_locid",
+ "icu_provider_macros",
+ "stable_deref_trait",
+ "tinystr",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_provider_macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+]
+
 [[package]]
 name = "id-arena"
 version = "2.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"

+[[package]]
+name = "idna"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
 [[package]]
 name = "indexmap"
 version = "2.6.0"
@ -883,6 +1044,12 @@ version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"

+[[package]]
+name = "litemap"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+
 [[package]]
 name = "llm_gateway"
 version = "0.1.0"
@ -1028,6 +1195,12 @@ version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"

+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+
 [[package]]
 name = "pin-project-lite"
 version = "0.2.14"
@ -1547,6 +1720,17 @@ dependencies = [
 "unicode-ident",
 ]

+[[package]]
+name = "synstructure"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+]
+
 [[package]]
 name = "target-lexicon"
 version = "0.12.16"
@ -1606,6 +1790,16 @@ dependencies = [
 "rustc-hash",
 ]

+[[package]]
+name = "tinystr"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
 [[package]]
 name = "toml"
 version = "0.8.19"
@ -1676,6 +1870,35 @@ version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"

+[[package]]
+name = "url"
+version = "2.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
+[[package]]
+name = "utf16_iter"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
 [[package]]
 name = "uuid"
 version = "1.11.0"
@ -2189,12 +2412,48 @@ dependencies = [
 "wasmparser 0.212.0",
 ]

+[[package]]
+name = "write16"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
+
+[[package]]
+name = "writeable"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+
 [[package]]
 name = "yansi"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"

+[[package]]
+name = "yoke"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+ "synstructure",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.7.35"
@ -2216,6 +2475,49 @@ dependencies = [
 "syn 2.0.79",
 ]

+[[package]]
+name = "zerofrom"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+ "synstructure",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+]
+
 [[package]]
 name = "zstd"
 version = "0.13.2"
--- a/crates/common/Cargo.toml
+++ b/crates/common/Cargo.toml
@ -16,6 +16,8 @@ tiktoken-rs = "0.5.9"
 rand = "0.8.5"
 serde_json = "1.0"
 hex = "0.4.3"
+urlencoding = "2.1.3"
+url = "2.5.4"

 [dev-dependencies]
 pretty_assertions = "1.4.1"
--- a/crates/common/src/api/open_ai.rs
+++ b/crates/common/src/api/open_ai.rs
@ -135,7 +135,10 @@ impl From<String> for ParameterType {
            "array" => ParameterType::List,
            "dict" => ParameterType::Dict,
            "dictionary" => ParameterType::Dict,
-            _ => ParameterType::String,
+            _ => {
+                log::warn!("Unknown parameter type: {}, assuming type str", s);
+                ParameterType::String
+            },
        }
    }
 }
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -9,7 +9,6 @@ use crate::api::open_ai::{
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Configuration {
    pub version: String,
-    pub listener: Listener,
    pub endpoints: Option<HashMap<String, Endpoint>>,
    pub llm_providers: Vec<LlmProvider>,
    pub overrides: Option<Overrides>,
@ -25,6 +24,7 @@ pub struct Configuration {
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct Overrides {
    pub prompt_target_intent_matching_threshold: Option<f64>,
+    pub optimize_context_window: Option<bool>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -47,32 +47,6 @@ pub struct ErrorTargetDetail {
    pub endpoint: Option<EndpointDetails>,
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Listener {
-    pub address: String,
-    pub port: u16,
-    pub message_format: MessageFormat,
-    // pub connect_timeout: Option<DurationString>,
-}
-
-impl Default for Listener {
-    fn default() -> Self {
-        Listener {
-            address: "".to_string(),
-            port: 0,
-            message_format: MessageFormat::default(),
-            // connect_timeout: None,
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-pub enum MessageFormat {
-    #[serde(rename = "huggingface")]
-    #[default]
-    Huggingface,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct PromptGuards {
    pub input_guards: HashMap<GuardType, GuardOptions>,
@ -242,6 +216,7 @@ pub struct EndpointDetails {
    pub path: Option<String>,
    #[serde(rename = "http_method")]
    pub method: Option<HttpMethod>,
+    pub http_headers: Option<HashMap<String, String>>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -351,16 +326,6 @@ mod test {
            Some("/agent/summary".to_string())
        );

-        let error_target = config.error_target.as_ref().unwrap();
-        assert_eq!(
-            error_target.endpoint.as_ref().unwrap().name,
-            "error_target_1".to_string()
-        );
-        assert_eq!(
-            error_target.endpoint.as_ref().unwrap().path,
-            Some("/error".to_string())
-        );
-
        let tracing = config.tracing.as_ref().unwrap();
        assert_eq!(tracing.sampling_rate.unwrap(), 0.1);

--- a/crates/common/src/consts.rs
+++ b/crates/common/src/consts.rs
@ -3,7 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
 pub const USER_ROLE: &str = "user";
 pub const TOOL_ROLE: &str = "tool";
 pub const ASSISTANT_ROLE: &str = "assistant";
-pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
+pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
+pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
 pub const MODEL_SERVER_NAME: &str = "model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const MESSAGES_KEY: &str = "messages";
--- a/crates/common/src/path.rs
+++ b/crates/common/src/path.rs
@ -1,21 +1,30 @@
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
+use url::Url;
+use urlencoding;
+
+use crate::configuration::Parameter;

 pub fn replace_params_in_path(
    path: &str,
-    params: &HashMap<String, String>,
-) -> Result<String, String> {
-    let mut result = String::new();
-    let mut in_param = false;
+    tool_params: &HashMap<String, String>,
+    prompt_target_params: &[Parameter],
+) -> Result<(String, String, HashMap<String, String>), String> {
+    let mut query_string_replaced = String::new();
    let mut current_param = String::new();
+    let mut vars_replaced = HashSet::new();
+    let mut params: HashMap<String, String> = HashMap::new();

+    let mut in_param = false;
    for c in path.chars() {
        if c == '{' {
            in_param = true;
        } else if c == '}' {
            in_param = false;
            let param_name = current_param.clone();
-            if let Some(value) = params.get(&param_name) {
-                result.push_str(value);
+            if let Some(value) = tool_params.get(&param_name) {
+                let value = urlencoding::encode(value);
+                query_string_replaced.push_str(value.into_owned().as_str());
+                vars_replaced.insert(param_name.clone());
            } else {
                return Err(format!("Missing value for parameter `{}`", param_name));
            }
@ -23,31 +32,106 @@ pub fn replace_params_in_path(
        } else if in_param {
            current_param.push(c);
        } else {
-            result.push(c);
+            query_string_replaced.push(c);
        }
    }

-    Ok(result)
+    // add the remaining params in path
+    for (param_name, value) in tool_params.iter() {
+        let value = urlencoding::encode(value).into_owned();
+        if !vars_replaced.contains(param_name) {
+            vars_replaced.insert(param_name.clone());
+            params.insert(param_name.clone(), value.clone());
+            if query_string_replaced.contains("?") {
+                query_string_replaced.push_str(&format!("&{}={}", param_name, value));
+            } else {
+                query_string_replaced.push_str(&format!("?{}={}", param_name, value));
+            }
+        }
+    }
+
+    // add default values
+    for param in prompt_target_params.iter() {
+        if !vars_replaced.contains(&param.name) && param.default.is_some() {
+            params.insert(param.name.clone(), param.default.clone().unwrap());
+            if query_string_replaced.contains("?") {
+                query_string_replaced.push_str(&format!(
+                    "&{}={}",
+                    param.name,
+                    param.default.as_ref().unwrap()
+                ));
+            } else {
+                query_string_replaced.push_str(&format!(
+                    "?{}={}",
+                    param.name,
+                    param.default.as_ref().unwrap()
+                ));
+            }
+        }
+    }
+
+    let parsed_uri = Url::parse("http://dummy.com").unwrap();
+    let parsed_uri = parsed_uri
+        .join(&query_string_replaced)
+        .map_err(|e| e.to_string())?;
+    let query_string = parsed_uri.query().unwrap_or("");
+    let path_uri = parsed_uri.path();
+
+    Ok((path_uri.to_string(), query_string.to_string(), params))
 }

 #[cfg(test)]
 mod test {
+    use std::collections::HashMap;
+
+    use crate::configuration::Parameter;
+
    #[test]
    fn test_replace_path() {
        let path = "/cluster.open-cluster-management.io/v1/managedclusters/{cluster_name}";
-        let params = vec![("cluster_name".to_string(), "test1".to_string())]
-            .into_iter()
-            .collect();
+        let params = vec![
+            ("cluster_name".to_string(), "test1".to_string()),
+            ("hello".to_string(), "hello world".to_string()),
+        ]
+        .into_iter()
+        .collect();
+        let prompt_target_params = vec![Parameter {
+            name: "country".to_string(),
+            parameter_type: None,
+            description: "test target".to_string(),
+            required: None,
+            enum_values: None,
+            default: Some("US".to_string()),
+            in_path: None,
+            format: None,
+        }];
+
+        let out_params: HashMap<String, String> = vec![
+            ("country".to_string(), "US".to_string()),
+            ("hello".to_string(), "hello%20world".to_string()),
+        ]
+        .into_iter()
+        .collect();
        assert_eq!(
-            super::replace_params_in_path(path, &params),
-            Ok("/cluster.open-cluster-management.io/v1/managedclusters/test1".to_string())
+            super::replace_params_in_path(path, &params, &prompt_target_params),
+            Ok((
+                "/cluster.open-cluster-management.io/v1/managedclusters/test1".to_string(),
+                "hello=hello%20world&country=US".to_string(),
+                out_params.clone()
+            ))
        );

+        let out_params = HashMap::new();
+        let prompt_target_params = vec![];
        let path = "/cluster.open-cluster-management.io/v1/managedclusters";
        let params = vec![].into_iter().collect();
        assert_eq!(
-            super::replace_params_in_path(path, &params),
-            Ok("/cluster.open-cluster-management.io/v1/managedclusters".to_string())
+            super::replace_params_in_path(path, &params, &prompt_target_params),
+            Ok((
+                "/cluster.open-cluster-management.io/v1/managedclusters".to_string(),
+                "".to_string(),
+                out_params
+            ))
        );

        let path = "/foo/{bar}/baz";
@ -55,8 +139,8 @@ mod test {
            .into_iter()
            .collect();
        assert_eq!(
-            super::replace_params_in_path(path, &params),
-            Ok("/foo/qux/baz".to_string())
+            super::replace_params_in_path(path, &params, &prompt_target_params),
+            Ok(("/foo/qux/baz".to_string(), "".to_string(), HashMap::new()))
        );

        let path = "/foo/{bar}/baz/{qux}";
@ -67,8 +151,45 @@ mod test {
        .into_iter()
        .collect();
        assert_eq!(
-            super::replace_params_in_path(path, &params),
-            Ok("/foo/qux/baz/quux".to_string())
+            super::replace_params_in_path(path, &params, &prompt_target_params),
+            Ok((
+                "/foo/qux/baz/quux".to_string(),
+                "".to_string(),
+                HashMap::new()
+            ))
+        );
+
+        let path = "/foo/{bar}/baz/{qux}?hello=world";
+        let params = vec![
+            ("bar".to_string(), "qux".to_string()),
+            ("qux".to_string(), "quux".to_string()),
+        ]
+        .into_iter()
+        .collect();
+        assert_eq!(
+            super::replace_params_in_path(path, &params, &prompt_target_params),
+            Ok((
+                "/foo/qux/baz/quux".to_string(),
+                "hello=world".to_string(),
+                HashMap::new()
+            ))
+        );
+
+        let path = "/foo/{bar}/baz/{qux}?hello={hello}";
+        let params = vec![
+            ("bar".to_string(), "qux".to_string()),
+            ("qux".to_string(), "quux".to_string()),
+            ("hello".to_string(), "hello world".to_string()),
+        ]
+        .into_iter()
+        .collect();
+        assert_eq!(
+            super::replace_params_in_path(path, &params, &prompt_target_params),
+            Ok((
+                "/foo/qux/baz/quux".to_string(),
+                "hello=hello%20world".to_string(),
+                HashMap::new()
+            ))
        );

        let path = "/foo/{bar}/baz/{qux}";
@ -76,7 +197,7 @@ mod test {
            .into_iter()
            .collect();
        assert_eq!(
-            super::replace_params_in_path(path, &params),
+            super::replace_params_in_path(path, &params, &prompt_target_params),
            Err("Missing value for parameter `qux`".to_string())
        );
    }
--- a/crates/common/src/tracing.rs
+++ b/crates/common/src/tracing.rs
@ -166,7 +166,7 @@ impl TraceData {
                attributes: vec![Attribute {
                    key: "service.name".to_string(),
                    value: AttributeValue {
-                        string_value: Some("upstream-llm".to_string()),
+                        string_value: Some("egress_llm_traffic".to_string()),
                    },
                }],
            };
--- a/crates/llm_gateway/Cargo.lock
+++ b/crates/llm_gateway/Cargo.lock
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -5,7 +5,7 @@ use common::api::open_ai::{
 };
 use common::configuration::LlmProvider;
 use common::consts::{
-    ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH,
+    ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
    RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
 };
 use common::errors::ServerError;
@ -176,6 +176,12 @@ impl HttpContext for StreamContext {
    // Envoy's HTTP model is event driven. The WASM ABI has given implementors events to hook onto
    // the lifecycle of the http request and response.
    fn on_http_request_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
+        let request_path = self.get_http_request_header(":path").unwrap_or_default();
+        if request_path == HEALTHZ_PATH {
+            self.send_http_response(200, vec![], None);
+            return Action::Continue;
+        }
+
        self.select_llm_provider();

        // if endpoint is not set then use provider name as routing header so envoy can resolve the cluster name
@ -381,7 +387,7 @@ impl HttpContext for StreamContext {
                    Ok(traceparent) => {
                        let mut trace_data = common::tracing::TraceData::new();
                        let mut llm_span = Span::new(
-                            "upstream_llm_time".to_string(),
+                            "egress_traffic".to_string(),
                            Some(traceparent.trace_id),
                            Some(traceparent.parent_id),
                            self.request_body_sent_time.unwrap(),
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -18,6 +18,8 @@ fn wasm_module() -> String {
 fn request_headers_expectations(module: &mut Tester, http_context: i32) {
    module
        .call_proxy_on_request_headers(http_context, 0, false)
+        .expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
+        .returning(Some("/v1/chat/completions"))
        .expect_get_header_map_value(
            Some(MapType::HttpRequestHeaders),
            Some("x-arch-llm-provider-hint"),
--- a/crates/prompt_gateway/Cargo.lock
+++ b/crates/prompt_gateway/Cargo.lock
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@ -6,7 +6,8 @@ use common::{
    consts::{
        ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER,
        ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, HEALTHZ_PATH,
-        MODEL_SERVER_NAME, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
+        MODEL_SERVER_NAME, MODEL_SERVER_REQUEST_TIMEOUT_MS, REQUEST_ID_HEADER, TOOL_ROLE,
+        TRACE_PARENT_HEADER, USER_ROLE,
    },
    errors::ServerError,
    http::{CallArgs, Client},
@ -137,9 +138,23 @@ impl HttpContext for StreamContext {
            .map(|(_, pt)| pt.into())
            .collect();

+        let mut metadata = deserialized_body.metadata.clone();
+
+        if let Some(overrides) = self.overrides.as_ref() {
+            if overrides.optimize_context_window.unwrap_or_default() {
+                if metadata.is_none() {
+                    metadata = Some(HashMap::new());
+                }
+                metadata
+                    .as_mut()
+                    .unwrap()
+                    .insert("optimize_context_window".to_string(), "true".to_string());
+            }
+        }
+
        let arch_fc_chat_completion_request = ChatCompletionsRequest {
            messages: deserialized_body.messages.clone(),
-            metadata: deserialized_body.metadata.clone(),
+            metadata,
            stream: deserialized_body.stream,
            model: "--".to_string(),
            stream_options: deserialized_body.stream_options.clone(),
@ -159,12 +174,15 @@ impl HttpContext for StreamContext {
        debug!("sending request to model server");
        trace!("request body: {}", json_data);

+        let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string();
+
        let mut headers = vec![
            (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
            (":method", "POST"),
            (":path", "/function_calling"),
            ("content-type", "application/json"),
            (":authority", MODEL_SERVER_NAME),
+            ("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
        ];

        if self.request_id.is_some() {
--- a/crates/prompt_gateway/src/lib.rs
+++ b/crates/prompt_gateway/src/lib.rs
@ -7,6 +7,7 @@ mod filter_context;
 mod http_context;
 mod metrics;
 mod stream_context;
+mod tools;

 proxy_wasm::main! {{
    proxy_wasm::set_log_level(LogLevel::Trace);
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@ -1,13 +1,14 @@
 use crate::metrics::Metrics;
+use crate::tools::compute_request_path_body;
 use common::api::open_ai::{
    to_server_events, ArchState, ChatCompletionStreamResponse, ChatCompletionsRequest,
    ChatCompletionsResponse, Message, ModelServerResponse, ToolCall,
 };
 use common::configuration::{Overrides, PromptTarget, Tracing};
 use common::consts::{
-    ARCH_FC_MODEL_NAME, ARCH_FC_REQUEST_TIMEOUT_MS, ARCH_INTERNAL_CLUSTER_NAME,
-    ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE,
-    TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
+    API_REQUEST_TIMEOUT_MS, ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME,
+    ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_TARGET_REQUEST_TIMEOUT_MS, MESSAGES_KEY,
+    REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE,
 };
 use common::errors::ServerError;
 use common::http::{CallArgs, Client};
@ -16,7 +17,6 @@ use derivative::Derivative;
 use http::StatusCode;
 use log::{debug, trace, warn};
 use proxy_wasm::traits::*;
-use serde_yaml::Value;
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::rc::Rc;
@ -46,7 +46,7 @@ pub struct StreamCallContext {
 pub struct StreamContext {
    system_prompt: Rc<Option<String>>,
    pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
-    _overrides: Rc<Option<Overrides>>,
+    pub overrides: Rc<Option<Overrides>>,
    pub metrics: Rc<Metrics>,
    pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
    pub context_id: u32,
@ -89,7 +89,7 @@ impl StreamContext {
            streaming_response: false,
            user_prompt: None,
            is_chat_completions_request: false,
-            _overrides: overrides,
+            overrides,
            request_id: None,
            traceparent: None,
            _tracing: tracing,
@ -160,7 +160,7 @@ impl StreamContext {
                            callout_context.request_body.messages.clone(),
                        );
                        let arch_messages_json = serde_json::to_string(&params).unwrap();
-                        let timeout_str = ARCH_FC_REQUEST_TIMEOUT_MS.to_string();
+                        let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string();

                        let mut headers = vec![
                            (":method", "POST"),
@ -273,84 +273,79 @@ impl StreamContext {

    fn schedule_api_call_request(&mut self, mut callout_context: StreamCallContext) {
        let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
+        let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap();
+        let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
+        let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
+        let endpoint_path: String = endpoint_details
+            .path
+            .as_ref()
+            .unwrap_or(&String::from("/"))
+            .to_string();

-        let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
+        let http_method = endpoint_details.method.clone().unwrap_or_default();
+        let prompt_target_params = prompt_target.parameters.clone().unwrap_or_default();

-        let mut tool_params = self.tool_calls.as_ref().unwrap()[0]
-            .function
-            .arguments
-            .clone();
-        tool_params.insert(
-            String::from(MESSAGES_KEY),
-            serde_yaml::to_value(&callout_context.request_body.messages).unwrap(),
-        );
-
-        let tool_params_json_str = serde_json::to_string(&tool_params).unwrap();
-
-        let endpoint = prompt_target.endpoint.unwrap();
-        let path: String = endpoint.path.unwrap_or(String::from("/"));
-
-        // only add params that are of string, number and bool type
-        let url_params = tool_params
-            .iter()
-            .filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
-            .map(|(key, value)| match value {
-                Value::Number(n) => (key.clone(), n.to_string()),
-                Value::String(s) => (key.clone(), s.clone()),
-                Value::Bool(b) => (key.clone(), b.to_string()),
-                Value::Null => todo!(),
-                Value::Sequence(_) => todo!(),
-                Value::Mapping(_) => todo!(),
-                Value::Tagged(_) => todo!(),
-            })
-            .collect::<HashMap<String, String>>();
-
-        let path = match common::path::replace_params_in_path(&path, &url_params) {
-            Ok(path) => path,
+        let (path, body) = match compute_request_path_body(
+            &endpoint_path,
+            tool_params,
+            &prompt_target_params,
+            &http_method,
+        ) {
+            Ok((path, body)) => (path, body),
            Err(e) => {
                return self.send_server_error(
                    ServerError::BadRequest {
-                        why: format!("error replacing params in path: {}", e),
+                        why: format!("error computing api request path or body: {}", e),
                    },
                    Some(StatusCode::BAD_REQUEST),
                );
            }
        };

-        let http_method = endpoint.method.unwrap_or_default().to_string();
-        let mut headers = vec![
-            (ARCH_UPSTREAM_HOST_HEADER, endpoint.name.as_str()),
-            (":method", &http_method),
+        let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
+
+        let http_method_str = http_method.to_string();
+        let mut headers: HashMap<_, _> = [
+            (ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
+            (":method", &http_method_str),
            (":path", &path),
-            (":authority", endpoint.name.as_str()),
+            (":authority", endpoint_details.name.as_str()),
            ("content-type", "application/json"),
            ("x-envoy-max-retries", "3"),
-        ];
+            ("x-envoy-upstream-rq-timeout-ms", timeout_str.as_str()),
+        ]
+        .into_iter()
+        .collect();

        if self.request_id.is_some() {
-            headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap()));
+            headers.insert(REQUEST_ID_HEADER, self.request_id.as_ref().unwrap());
        }

        if self.traceparent.is_some() {
-            headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap()));
+            headers.insert(TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap());
+        }
+
+        // override http headers that are set in the prompt target
+        let http_headers = endpoint_details.http_headers.clone().unwrap_or_default();
+        for (key, value) in http_headers.iter() {
+            headers.insert(key.as_str(), value.as_str());
        }

        let call_args = CallArgs::new(
            ARCH_INTERNAL_CLUSTER_NAME,
            &path,
-            headers,
-            Some(tool_params_json_str.as_bytes()),
+            headers.into_iter().collect(),
+            body.as_deref().map(|s| s.as_bytes()),
            vec![],
            Duration::from_secs(5),
        );

        debug!(
-            "dispatching api call to developer endpoint: {}, path: {}",
-            endpoint.name, path
+            "dispatching api call to developer endpoint: {}, path: {}, method: {}",
+            endpoint_details.name, path, http_method_str
        );
-        trace!("request body: {}", tool_params_json_str);

-        callout_context.upstream_cluster = Some(endpoint.name.to_owned());
+        callout_context.upstream_cluster = Some(endpoint_details.name.to_owned());
        callout_context.upstream_cluster_path = Some(path.to_owned());
        callout_context.response_handler_type = ResponseHandlerType::FunctionCall;

--- a/crates/prompt_gateway/src/tools.rs
+++ b/crates/prompt_gateway/src/tools.rs
@ -0,0 +1,157 @@
+use common::configuration::{HttpMethod, Parameter};
+use std::collections::HashMap;
+
+use serde_yaml::Value;
+
+// only add params that are of string, number and bool type
+pub fn filter_tool_params(tool_params: &HashMap<String, Value>) -> HashMap<String, String> {
+    tool_params
+        .iter()
+        .filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
+        .map(|(key, value)| match value {
+            Value::Number(n) => (key.clone(), n.to_string()),
+            Value::String(s) => (key.clone(), s.clone()),
+            Value::Bool(b) => (key.clone(), b.to_string()),
+            Value::Null => todo!(),
+            Value::Sequence(_) => todo!(),
+            Value::Mapping(_) => todo!(),
+            Value::Tagged(_) => todo!(),
+        })
+        .collect::<HashMap<String, String>>()
+}
+
+pub fn compute_request_path_body(
+    endpoint_path: &str,
+    tool_params: &HashMap<String, Value>,
+    prompt_target_params: &[Parameter],
+    http_method: &HttpMethod,
+) -> Result<(String, Option<String>), String> {
+    let tool_url_params = filter_tool_params(tool_params);
+    let (path_with_params, query_string, additional_params) = common::path::replace_params_in_path(
+        endpoint_path,
+        &tool_url_params,
+        prompt_target_params,
+    )?;
+
+    let (path, body) = match http_method {
+        HttpMethod::Get => (format!("{}?{}", path_with_params, query_string), None),
+        HttpMethod::Post => {
+            let mut additional_params = additional_params;
+            if !query_string.is_empty() {
+                query_string.split("&").for_each(|param| {
+                    let mut parts = param.split("=");
+                    let key = parts.next().unwrap();
+                    let value = parts.next().unwrap();
+                    additional_params.insert(key.to_string(), value.to_string());
+                });
+            }
+            let body = serde_json::to_string(&additional_params).unwrap();
+            (path_with_params, Some(body))
+        }
+    };
+
+    Ok((path, body))
+}
+
+#[cfg(test)]
+mod test {
+    use common::configuration::{HttpMethod, Parameter};
+
+    #[test]
+    fn test_compute_request_path_body() {
+        let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/{cluster_name}";
+        let tool_params = serde_yaml::from_str(
+            r#"
+      cluster_name: test1
+      hello: hello world
+      "#,
+        )
+        .unwrap();
+        let prompt_target_params = vec![Parameter {
+            name: "country".to_string(),
+            parameter_type: None,
+            description: "test target".to_string(),
+            required: None,
+            enum_values: None,
+            default: Some("US".to_string()),
+            in_path: None,
+            format: None,
+        }];
+        let http_method = HttpMethod::Get;
+        let (path, body) = super::compute_request_path_body(
+            endpoint_path,
+            &tool_params,
+            &prompt_target_params,
+            &http_method,
+        )
+        .unwrap();
+        assert_eq!(
+            path,
+            "/cluster.open-cluster-management.io/v1/managedclusters/test1?hello=hello%20world&country=US"
+        );
+        assert_eq!(body, None);
+    }
+
+    #[test]
+    fn test_compute_request_path_body_empty_params() {
+        let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/";
+        let tool_params = serde_yaml::from_str(r#"{}"#).unwrap();
+        let prompt_target_params = vec![Parameter {
+            name: "country".to_string(),
+            parameter_type: None,
+            description: "test target".to_string(),
+            required: None,
+            enum_values: None,
+            default: Some("US".to_string()),
+            in_path: None,
+            format: None,
+        }];
+        let http_method = HttpMethod::Get;
+        let (path, body) = super::compute_request_path_body(
+            endpoint_path,
+            &tool_params,
+            &prompt_target_params,
+            &http_method,
+        )
+        .unwrap();
+        assert_eq!(
+            path,
+            "/cluster.open-cluster-management.io/v1/managedclusters/?country=US"
+        );
+        assert_eq!(body, None);
+    }
+
+    #[test]
+    fn test_compute_request_path_body_override_default_val() {
+        let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/";
+        let tool_params = serde_yaml::from_str(
+            r#"
+      country: UK
+      "#,
+        )
+        .unwrap();
+        let prompt_target_params = vec![Parameter {
+            name: "country".to_string(),
+            parameter_type: None,
+            description: "test target".to_string(),
+            required: None,
+            enum_values: None,
+            default: Some("US".to_string()),
+            in_path: None,
+            format: None,
+        }];
+        let http_method = HttpMethod::Get;
+        let (path, body) = super::compute_request_path_body(
+            endpoint_path,
+            &tool_params,
+            &prompt_target_params,
+            &http_method,
+        )
+        .unwrap();
+        assert_eq!(
+            path,
+            "/cluster.open-cluster-management.io/v1/managedclusters/?country=UK"
+        );
+        assert_eq!(body, None);
+    }
+}
--- a/crates/prompt_gateway/tests/integration.rs
+++ b/crates/prompt_gateway/tests/integration.rs
@ -81,10 +81,11 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
                (":path", "/function_calling"),
                ("content-type", "application/json"),
                (":authority", "model_server"),
+                ("x-envoy-upstream-rq-timeout-ms", "30000"),
            ]),
            None,
            None,
-            None,
+            Some(5000),
        )
        .returning(Some(1))
        .expect_log(Some(LogLevel::Trace), None)
@ -365,6 +366,7 @@ fn prompt_gateway_request_to_llm_gateway() {
        metadata: None,
    };

+    let expected_body = "{\"city\":\"seattle\"}";
    let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
    module
        .call_proxy_on_http_call_response(http_context, 1, 0, arch_fc_resp_str.len() as i32, 0)
@ -377,20 +379,20 @@ fn prompt_gateway_request_to_llm_gateway() {
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Trace), None)
-        .expect_log(Some(LogLevel::Trace), None)
        .expect_http_call(
            Some("arch_internal"),
            Some(vec![
-                ("x-arch-upstream", "api_server"),
                (":method", "POST"),
-                (":path", "/weather"),
-                (":authority", "api_server"),
                ("content-type", "application/json"),
+                ("x-arch-upstream", "api_server"),
+                (":authority", "api_server"),
                ("x-envoy-max-retries", "3"),
+                (":path", "/weather"),
+                ("x-envoy-upstream-rq-timeout-ms", "30000"),
            ]),
+            Some(expected_body),
            None,
-            None,
-            None,
+            Some(5000),
        )
        .returning(Some(2))
        .expect_metric_increment("active_http_calls", 1)
--- a/demos/insurance_agent/Dockerfile
+++ b/demos/insurance_agent/Dockerfile
@ -1,19 +0,0 @@
-FROM python:3 AS base
-
-FROM base AS builder
-
-WORKDIR /src
-
-COPY requirements.txt /src/
-RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
-
-COPY . /src
-
-FROM python:3-slim AS output
-
-COPY --from=builder /runtime /usr/local
-
-COPY . /app
-WORKDIR /app
-
-CMD ["uvicorn", "insurance_agent_main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"]
--- a/demos/insurance_agent/README.md
+++ b/demos/insurance_agent/README.md
@ -1,58 +0,0 @@
-# Insurance Agent Demo
-
-This demo showcases how the **Arch** can be used to manage insurance-related tasks such as policy inquiries, initiating policies, and updating claims or deductibles. In this demo, the assistant provides factual information related to insurance policies (e.g., car, boat, house, motorcycle).
-
-The system can perform a variety of tasks, such as answering insurance-related questions, retrieving policy coverage details, initiating policies, and updating claims or deductibles.
-
-## Available Functions:
-
- **Policy Q/A**: Handles general Q&A related to insurance policies.
-  - **Endpoint**: `/policy/qa`
-  - This function answers general inquiries related to insurance, such as coverage details or policy types. It is the default target for insurance-related queries.
-
- **Get Policy Coverage**: Retrieves the coverage details for a given policy type (car, boat, house, motorcycle).
-  - **Endpoint**: `/policy/coverage`
-  - Parameters:
-    - `policy_type` (required): The type of policy. Available options: `car`, `boat`, `house`, `motorcycle`. Defaults to `car`.
-
- **Initiate Policy**: Starts a policy coverage for car, boat, motorcycle, or house.
-  - **Endpoint**: `/policy/initiate`
-  - Parameters:
-    - `policy_type` (required): The type of policy. Available options: `car`, `boat`, `house`, `motorcycle`. Defaults to `car`.
-    - `deductible` (required): The deductible amount set for the policy.
-
- **Update Claim**: Updates the notes on a specific insurance claim.
-  - **Endpoint**: `/policy/claim`
-  - Parameters:
-    - `claim_id` (required): The claim number.
-    - `notes` (optional): Notes about the claim number for the adjustor to see.
-
- **Update Deductible**: Updates the deductible amount for a specific policy coverage.
-  - **Endpoint**: `/policy/deductible`
-  - Parameters:
-    - `policy_id` (required): The ID of the policy.
-    - `deductible` (required): The deductible amount to be set for the policy.
-
-**Arch** is designed to intelligently routes prompts to the appropriate functions based on the target, allowing for seamless interaction with various insurance-related services.
-
-# Starting the demo
-1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
-2. Start Arch
-   ```sh
-   sh run_demo.sh
-   ```
-3. Navigate to http://localhost:18080/
-4. Tell me what can you do for me?"
-
-# Observability
-Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
-
-1. Start grafana and prometheus using following command
-   ```yaml
-   docker compose --profile monitoring up
-   ```
-1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
-1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
-
-Here is sample interaction,
-<img width="575" alt="image" src="https://github.com/user-attachments/assets/25d40f46-616e-41ea-be8e-1623055c84ec">
--- a/demos/insurance_agent/arch_config.yaml
+++ b/demos/insurance_agent/arch_config.yaml
@ -1,105 +0,0 @@
-version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
-
-system_prompt: |
-  You are an insurance assistant that just offers guidance related to car, boat, rental and home insurnace only. Please be pricese and summarize based on the context provided.
-
-llm_providers:
-  - name: OpenAI
-    provider_interface: openai
-    access_key: $OPENAI_API_KEY
-    model: gpt-4o
-    default: true
-
-# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
-endpoints:
-  app_server:
-    # value could be ip address or a hostname with port
-    # this could also be a list of endpoints for load balancing
-    # for example endpoint: [ ip1:port, ip2:port ]
-    endpoint: host.docker.internal:18083
-    # max time to wait for a connection to be established
-    connect_timeout: 0.05s
-
-prompt_targets:
-  - name: policy_qa
-    endpoint:
-      name: app_server
-      path: /policy/qa
-      http_method: POST
-    description: Handle general Q/A related to insurance.
-    default: true
-
-  - name: get_policy_coverage
-    description: Retrieve the coverage details for an insurance policy.
-    endpoint:
-      name: app_server
-      path: /policy/coverage
-      http_method: POST
-    parameters:
-    - name: policy_type
-      type: str
-      description: The type of policy
-      default: car
-      required: true
-
-  - name: initiate_policy
-    endpoint:
-      name: app_server
-      path: /policy/initiate
-      http_method: POST
-    description: Start a policy coverage for an insurance policy
-    parameters:
-    - name: policy_type
-      type: str
-      description: The type of policy
-      default: car
-      required: true
-    - name: deductible
-      type: float
-      description: the deductible amount set of the policy
-      required: true
-
-  - name: update_claim
-    endpoint:
-      name: app_server
-      path: /policy/claim
-      http_method: POST
-    description: Update the notes on the claim
-    parameters:
-    - name: claim_id
-      type: str
-      description: the claim number
-      required: true
-    - name: notes
-      type: str
-      description: notes about the cliam number for your adjustor to see
-      required: false
-
-  - name: update_deductible
-    endpoint:
-      name: app_server
-      path: /policy/deductible
-      http_method: POST
-    description: Update the deductible amount for a specific insurance policy coverage.
-    parameters:
-    - name: policy_id
-      type: str
-      description: The id of the insurance policy
-      required: true
-    - name: deductible
-      type: float
-      description: the deductible amount set of the policy
-      required: true
-
-ratelimits:
-  - model: gpt-4
-    selector:
-      key: selector-key
-      value: selector-value
-    limit:
-      tokens: 1
-      unit: minute
--- a/demos/insurance_agent/insurance_agent_main.py
+++ b/demos/insurance_agent/insurance_agent_main.py
@ -1,140 +0,0 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel, Field
-
-app = FastAPI()
-
-
-class Conversation(BaseModel):
-    arch_messages: list
-
-
-class PolicyCoverageRequest(BaseModel):
-    policy_type: str = Field(
-        ...,
-        description="The type of a policy held by the customer For, e.g. car, boat, house, motorcycle)",
-    )
-
-
-class PolicyInitiateRequest(PolicyCoverageRequest):
-    deductible: float = Field(
-        ..., description="The deductible amount set of the policy"
-    )
-
-
-class ClaimUpdate(BaseModel):
-    claim_id: str
-    notes: str  # Status or details of the claim
-
-
-class DeductibleUpdate(BaseModel):
-    policy_id: str
-    deductible: float
-
-
-class CoverageResponse(BaseModel):
-    policy_type: str
-    coverage: str  # Description of coverage
-    premium: float  # The premium cost
-
-
-# Get information about policy coverage
-@app.post("/policy/coverage", response_model=CoverageResponse)
-async def get_policy_coverage(req: PolicyCoverageRequest):
-    """
-    Retrieve the coverage details for a given policy type (car, boat, house, motorcycle).
-    """
-    policy_coverage = {
-        "car": {
-            "coverage": "Full car coverage with collision, liability",
-            "premium": 500.0,
-        },
-        "boat": {
-            "coverage": "Full boat coverage including theft and storm damage",
-            "premium": 700.0,
-        },
-        "house": {
-            "coverage": "Full house coverage including fire, theft, flood",
-            "premium": 1000.0,
-        },
-        "motorcycle": {
-            "coverage": "Full motorcycle coverage with liability",
-            "premium": 400.0,
-        },
-    }
-
-    if req.policy_type not in policy_coverage:
-        raise HTTPException(status_code=404, detail="Policy type not found")
-
-    return CoverageResponse(
-        policy_type=req.policy_type,
-        coverage=policy_coverage[req.policy_type]["coverage"],
-        premium=policy_coverage[req.policy_type]["premium"],
-    )
-
-
-# Initiate policy coverage
-@app.post("/policy/initiate")
-async def initiate_policy(policy_request: PolicyInitiateRequest):
-    """
-    Initiate policy coverage for a car, boat, house, or motorcycle.
-    """
-    if policy_request.policy_type not in ["car", "boat", "house", "motorcycle"]:
-        raise HTTPException(status_code=400, detail="Invalid policy type")
-
-    return {
-        "message": f"Policy initiated for {policy_request.policy_type}",
-        "deductible": policy_request.deductible,
-    }
-
-
-# Update claim details
-@app.post("/policy/claim")
-async def update_claim(req: ClaimUpdate):
-    """
-    Update the status or details of a claim.
-    """
-    # For simplicity, this is a mock update response
-    return {
-        "message": f"Claim {claim_update.claim_id} for policy {claim_update.claim_id} has been updated",
-        "update": claim_update.notes,
-    }
-
-
-# Update deductible amount
-@app.post("/policy/deductible")
-async def update_deductible(deductible_update: DeductibleUpdate):
-    """
-    Update the deductible amount for a specific policy.
-    """
-    # For simplicity, this is a mock update response
-    return {
-        "message": f"Deductible for policy {deductible_update.policy_id} has been updated",
-        "new_deductible": deductible_update.deductible,
-    }
-
-
-# Post method for policy Q/A
-@app.post("/policy/qa")
-async def policy_qa(conversation: Conversation):
-    """
-    This method handles Q/A related to general issues in insurance.
-    It forwards the conversation to the OpenAI client via a local proxy and returns the response.
-    """
-    return {
-        "choices": [
-            {
-                "message": {
-                    "role": "assistant",
-                    "content": "I am a helpful insurance agent, and can only help with insurance things",
-                },
-                "finish_reason": "completed",
-                "index": 0,
-            }
-        ],
-        "model": "insurance_agent",
-        "usage": {"completion_tokens": 0},
-    }
-
-
-# Run the app using:
-# uvicorn main:app --reload
--- a/demos/insurance_agent/requirements.txt
+++ b/demos/insurance_agent/requirements.txt
@ -1,4 +0,0 @@
-fastapi
-uvicorn
-pydantic
-openai
--- a/demos/network_agent/grafana/dashboard.yaml
+++ b/demos/network_agent/grafana/dashboard.yaml
@ -1,12 +0,0 @@
-apiVersion: 1
-
-providers:
-  - name: "Dashboard provider"
-    orgId: 1
-    type: file
-    disableDeletion: false
-    updateIntervalSeconds: 10
-    allowUiUpdates: false
-    options:
-      path: /var/lib/grafana/dashboards
-      foldersFromFilesStructure: true
--- a/demos/network_agent/grafana/dashboards/envoy_overview.json
+++ b/demos/network_agent/grafana/dashboards/envoy_overview.json
@ -1,355 +0,0 @@
-{
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": {
-          "type": "grafana",
-          "uid": "-- Grafana --"
-        },
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
-  "fiscalYearStartMonth": 0,
-  "graphTooltip": 1,
-  "links": [],
-  "panels": [
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "PBFA97CFB590B2093"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "red",
-                "value": 80
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 0,
-        "y": 0
-      },
-      "id": 2,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-          },
-          "disableTextWrap": false,
-          "editorMode": "code",
-          "expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
-          "fullMetaSearch": false,
-          "hide": false,
-          "includeNullMetadata": true,
-          "instant": false,
-          "legendFormat": "__auto",
-          "range": true,
-          "refId": "A",
-          "useBackend": false
-        }
-      ],
-      "title": "request latency - internal (ms)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "PBFA97CFB590B2093"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "red",
-                "value": 80
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 12,
-        "y": 0
-      },
-      "id": 1,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-          },
-          "disableTextWrap": false,
-          "editorMode": "code",
-          "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
-          "fullMetaSearch": false,
-          "hide": false,
-          "includeNullMetadata": true,
-          "instant": false,
-          "legendFormat": "__auto",
-          "range": true,
-          "refId": "A",
-          "useBackend": false
-        }
-      ],
-      "title": "request latency - external (ms)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "PBFA97CFB590B2093"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "red",
-                "value": 80
-              }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 0,
-        "y": 8
-      },
-      "id": 3,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-          },
-          "disableTextWrap": false,
-          "editorMode": "code",
-          "expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
-          "fullMetaSearch": false,
-          "includeNullMetadata": true,
-          "instant": false,
-          "legendFormat": "__auto",
-          "range": true,
-          "refId": "A",
-          "useBackend": false
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "PBFA97CFB590B2093"
-          },
-          "disableTextWrap": false,
-          "editorMode": "code",
-          "expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
-          "fullMetaSearch": false,
-          "hide": false,
-          "includeNullMetadata": true,
-          "instant": false,
-          "legendFormat": "__auto",
-          "range": true,
-          "refId": "B",
-          "useBackend": false
-        }
-      ],
-      "title": "Upstream request count",
-      "type": "timeseries"
-    }
-  ],
-  "schemaVersion": 39,
-  "tags": [],
-  "templating": {
-    "list": []
-  },
-  "time": {
-    "from": "now-15m",
-    "to": "now"
-  },
-  "timepicker": {},
-  "timezone": "browser",
-  "title": "Intelligent Gateway Overview",
-  "uid": "adt6uhx5lk8aob",
-  "version": 3,
-  "weekStart": ""
-}
--- a/demos/network_agent/grafana/datasource.yaml
+++ b/demos/network_agent/grafana/datasource.yaml
@ -1,9 +0,0 @@
-apiVersion: 1
-
-datasources:
- name: Prometheus
-  type: prometheus
-  url: http://prometheus:9090
-  isDefault: true
-  access: proxy
-  editable: true
--- a/demos/network_agent/utils.py
+++ b/demos/network_agent/utils.py
@ -1,253 +0,0 @@
-import logging
-import random
-import re
-import sqlite3
-from datetime import datetime, timedelta, timezone
-
-import pandas as pd
-from dateparser import parse
-
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
-
-def loadsql():
-    # Example Usage
-    conn = sqlite3.connect(":memory:")
-
-    # create and load the devices table
-    device_data = generate_device_data(conn)
-
-    # create and load the interface_stats table
-    generate_interface_stats_data(conn, device_data)
-
-    # create and load the flow table
-    generate_flow_data(conn, device_data)
-
-    return conn
-
-
-# Function to convert natural language time expressions to "X {time} ago" format
-def convert_to_ago_format(expression):
-    # Define patterns for different time units
-    time_units = {
-        r"seconds": "seconds",
-        r"minutes": "minutes",
-        r"mins": "mins",
-        r"hrs": "hrs",
-        r"hours": "hours",
-        r"hour": "hour",
-        r"hr": "hour",
-        r"days": "days",
-        r"day": "day",
-        r"weeks": "weeks",
-        r"week": "week",
-        r"months": "months",
-        r"month": "month",
-        r"years": "years",
-        r"yrs": "years",
-        r"year": "year",
-        r"yr": "year",
-    }
-
-    # Iterate over each time unit and create regex for each phrase format
-    for pattern, unit in time_units.items():
-        # Handle "for the past X {unit}"
-        match = re.search(rf"(\d+) {pattern}", expression)
-        if match:
-            quantity = match.group(1)
-            return f"{quantity} {unit} ago"
-
-    # If the format is not recognized, return None or raise an error
-    return None
-
-
-# Function to generate random MAC addresses
-def random_mac():
-    return "AA:BB:CC:DD:EE:" + ":".join(
-        [f"{random.randint(0, 255):02X}" for _ in range(2)]
-    )
-
-
-# Function to generate random IP addresses
-def random_ip():
-    return f"""{random.randint(1, 255)}
-    .{random.randint(1, 255)}
-    .{random.randint(1, 255)}
-    .{random.randint(1, 255)}"""
-
-
-# Generate synthetic data for the device table
-def generate_device_data(
-    conn,
-    n=1000,
-):
-    device_data = {
-        "switchip": [random_ip() for _ in range(n)],
-        "hwsku": [f"HW{i+1}" for i in range(n)],
-        "hostname": [f"switch{i+1}" for i in range(n)],
-        "osversion": [f"v{i+1}" for i in range(n)],
-        "layer": ["L2" if i % 2 == 0 else "L3" for i in range(n)],
-        "region": [random.choice(["US", "EU", "ASIA"]) for _ in range(n)],
-        "uptime": [
-            f"""{random.randint(0, 10)} days {random.randint(0, 23)}
-            :{random.randint(0, 59)}:{random.randint(0, 59)}"""
-            for _ in range(n)
-        ],
-        "device_mac_address": [random_mac() for _ in range(n)],
-    }
-    df = pd.DataFrame(device_data)
-    df.to_sql("device", conn, index=False)
-    return df
-
-
-# Generate synthetic data for the interfacestats table
-def generate_interface_stats_data(conn, device_df, n=1000):
-    interface_stats_data = []
-    for _ in range(n):
-        device_mac = random.choice(device_df["device_mac_address"])
-        ifname = random.choice(["eth0", "eth1", "eth2", "eth3"])
-        time = datetime.now(timezone.utc) - timedelta(
-            minutes=random.randint(0, 1440 * 5)
-        )  # random timestamps in the past 5 day
-        in_discards = random.randint(0, 1000)
-        in_errors = random.randint(0, 500)
-        out_discards = random.randint(0, 800)
-        out_errors = random.randint(0, 400)
-        in_octets = random.randint(1000, 100000)
-        out_octets = random.randint(1000, 100000)
-
-        interface_stats_data.append(
-            {
-                "device_mac_address": device_mac,
-                "ifname": ifname,
-                "time": time,
-                "in_discards": in_discards,
-                "in_errors": in_errors,
-                "out_discards": out_discards,
-                "out_errors": out_errors,
-                "in_octets": in_octets,
-                "out_octets": out_octets,
-            }
-        )
-    df = pd.DataFrame(interface_stats_data)
-    df.to_sql("interfacestats", conn, index=False)
-
-
-# Generate synthetic data for the ts_flow table
-def generate_flow_data(conn, device_df, n=1000):
-    flow_data = []
-    for _ in range(n):
-        sampler_address = random.choice(device_df["switchip"])
-        proto = random.choice(["TCP", "UDP"])
-        src_addr = random_ip()
-        dst_addr = random_ip()
-        src_port = random.randint(1024, 65535)
-        dst_port = random.randint(1024, 65535)
-        in_if = random.randint(1, 10)
-        out_if = random.randint(1, 10)
-        flow_start = int(
-            (datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
-        )
-        flow_end = int(
-            (datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
-        )
-        bytes_transferred = random.randint(1000, 100000)
-        packets = random.randint(1, 1000)
-        flow_time = datetime.now(timezone.utc) - timedelta(
-            minutes=random.randint(0, 1440 * 5)
-        )  # random flow time
-
-        flow_data.append(
-            {
-                "sampler_address": sampler_address,
-                "proto": proto,
-                "src_addr": src_addr,
-                "dst_addr": dst_addr,
-                "src_port": src_port,
-                "dst_port": dst_port,
-                "in_if": in_if,
-                "out_if": out_if,
-                "flow_start": flow_start,
-                "flow_end": flow_end,
-                "bytes": bytes_transferred,
-                "packets": packets,
-                "time": flow_time,
-            }
-        )
-    df = pd.DataFrame(flow_data)
-    df.to_sql("ts_flow", conn, index=False)
-
-
-def load_params(req):
-    # Step 1: Convert the from_time natural language string to a timestamp if provided
-    if req.from_time:
-        # Use `dateparser` to parse natural language timeframes
-        logger.info("%s\n\nCaptured from time: %s\n\n", "* " * 50, req.from_time)
-        parsed_time = parse(req.from_time, settings={"RELATIVE_BASE": datetime.now()})
-        if not parsed_time:
-            conv_time = convert_to_ago_format(req.from_time)
-            if conv_time:
-                parsed_time = parse(
-                    conv_time, settings={"RELATIVE_BASE": datetime.now()}
-                )
-            else:
-                return {
-                    "error": """Invalid from_time format. Please provide a valid time description
-                    such as 'past 7 days' or 'since last month'."""
-                }
-        logger.info("\n\nConverted from time: %s\n\n%s\n\n", parsed_time, "* " * 50)
-        from_time = parsed_time
-        logger.info("Using parsed from_time: %f", from_time)
-    else:
-        # If no from_time is provided, use a default value (e.g., the past 7 days)
-        from_time = datetime.now() - timedelta(days=7)
-        logger.info("Using default from_time: %f", from_time)
-
-    # Step 2: Build the dynamic SQL query based on the optional filters
-    filters = []
-    params = {"from_time": from_time}
-
-    if req.ifname:
-        filters.append("i.ifname = :ifname")
-        params["ifname"] = req.ifname
-
-    if req.region:
-        filters.append("d.region = :region")
-        params["region"] = req.region
-
-    if req.min_in_errors is not None:
-        filters.append("i.in_errors >= :min_in_errors")
-        params["min_in_errors"] = req.min_in_errors
-
-    if req.max_in_errors is not None:
-        filters.append("i.in_errors <= :max_in_errors")
-        params["max_in_errors"] = req.max_in_errors
-
-    if req.min_out_errors is not None:
-        filters.append("i.out_errors >= :min_out_errors")
-        params["min_out_errors"] = req.min_out_errors
-
-    if req.max_out_errors is not None:
-        filters.append("i.out_errors <= :max_out_errors")
-        params["max_out_errors"] = req.max_out_errors
-
-    if req.min_in_discards is not None:
-        filters.append("i.in_discards >= :min_in_discards")
-        params["min_in_discards"] = req.min_in_discards
-
-    if req.max_in_discards is not None:
-        filters.append("i.in_discards <= :max_in_discards")
-        params["max_in_discards"] = req.max_in_discards
-
-    if req.min_out_discards is not None:
-        filters.append("i.out_discards >= :min_out_discards")
-        params["min_out_discards"] = req.min_out_discards
-
-    if req.max_out_discards is not None:
-        filters.append("i.out_discards <= :max_out_discards")
-        params["max_out_discards"] = req.max_out_discards
-
-    return params, filters
--- a/demos/samples_java/weather_forcecast_service/Dockerfile
+++ b/demos/samples_java/weather_forcecast_service/Dockerfile
@ -0,0 +1,18 @@
+# Stage 1: Build the application using Maven
+FROM maven:3.8.7-openjdk-18-slim AS build
+WORKDIR /app
+# Copy pom.xml and download dependencies first (caching)
+COPY pom.xml .
+RUN mvn dependency:go-offline
+# Copy the source code and build the application
+COPY src ./src
+RUN mvn clean package -DskipTests
+
+# Stage 2: Run the application using a slim JDK image
+FROM openjdk:17-jdk-slim
+WORKDIR /app
+# Copy the built jar from the previous stage
+COPY --from=build /app/target/weather-forecast-service-0.0.1-SNAPSHOT.jar app.jar
+# Expose the port on which the app runs (default Spring Boot is 8080)
+EXPOSE 8081
+ENTRYPOINT ["java", "-jar", "app.jar"]
--- a/demos/samples_java/weather_forcecast_service/arch_config.yaml
+++ b/demos/samples_java/weather_forcecast_service/arch_config.yaml
@ -0,0 +1,47 @@
+version: v0.1
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
+
+# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
+llm_providers:
+  - name: OpenAI
+    provider_interface: openai
+    access_key: $OPENAI_API_KEY
+    model: gpt-4o-mini
+    default: true
+
+# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+endpoints:
+  weather_forecast_service:
+    # value could be ip address or a hostname with port
+    # this could also be a list of endpoints for load balancing
+    # for example endpoint: [ ip1:port, ip2:port ]
+    endpoint: host.docker.internal:18081
+    # max time to wait for a connection to be established
+    connect_timeout: 0.005s
+
+# default system prompt used by all prompt targets
+system_prompt: |
+  You are a helpful weather assistant.
+
+prompt_targets:
+  - name: weather_forecast
+    description: get the weather forecast
+    parameters:
+      - name: location
+        description: the location for which to get the weather forecast
+        required: true
+        type: string
+        format: City, State
+      - name: days
+        description: the number of days for the forecast
+        required: true
+        type: int
+    endpoint:
+      name: weather_forecast_service
+      path: /weather
+      http_method: POST
--- a/demos/samples_java/weather_forcecast_service/docker-compose.yaml
+++ b/demos/samples_java/weather_forcecast_service/docker-compose.yaml
@ -1,18 +1,14 @@
 services:
-  api_server:
+  weather_forecast_service:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
-      - "18083:80"
-    healthcheck:
-        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
-        interval: 5s
-        retries: 20
+      - "18081:8081"

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
      dockerfile: Dockerfile
    ports:
      - "18080:8080"
--- a/demos/samples_java/weather_forcecast_service/pom.xml
+++ b/demos/samples_java/weather_forcecast_service/pom.xml
@ -0,0 +1,40 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+         https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>weather</groupId>
+    <artifactId>weather-forecast-service</artifactId>
+    <version>0.0.1-SNAPSHOT</version>
+    <packaging>jar</packaging>
+
+    <parent>
+        <groupId>org.springframework.boot</groupId>
+        <artifactId>spring-boot-starter-parent</artifactId>
+        <version>2.7.10</version>
+        <relativePath/>
+    </parent>
+
+    <dependencies>
+        <!-- Spring Boot Starter Web -->
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-web</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter</artifactId>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <!-- Spring Boot Maven Plugin -->
+            <plugin>
+                <groupId>org.springframework.boot</groupId>
+                <artifactId>spring-boot-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+</project>
--- a/demos/samples_java/weather_forcecast_service/run_demo.sh
+++ b/demos/samples_java/weather_forcecast_service/run_demo.sh
--- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/WeatherForecastApplication.java
+++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/WeatherForecastApplication.java
@ -0,0 +1,12 @@
+// File: src/main/java/com/example/weather/WeatherForecastApplication.java
+package weather;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+
+@SpringBootApplication
+public class WeatherForecastApplication {
+    public static void main(String[] args) {
+        SpringApplication.run(WeatherForecastApplication.class, args);
+    }
+}
--- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java
+++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/controller/WeatherController.java
@ -0,0 +1,54 @@
+package weather.controller;
+
+import weather.model.DayForecast;
+import weather.model.WeatherForecastResponse;
+import weather.model.WeatherRequest;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.time.Instant;
+import java.time.LocalDate;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+@RestController
+public class WeatherController {
+
+    private Random random = new Random();
+
+    @PostMapping("/weather")
+    public WeatherForecastResponse getRandomWeatherForecast(@RequestBody WeatherRequest req) {
+        WeatherForecastResponse response = new WeatherForecastResponse();
+        response.setLocation(req.getLocation());
+        response.setUnits(req.getUnits());
+
+        List<DayForecast> forecasts = new ArrayList<>();
+        for (int i = 0; i < req.getDays(); i++) {
+            // Generate a random min temperature between 50 and 89 (inclusive)
+            int minTemp = random.nextInt(90 - 50) + 50;
+            // Generate a max temperature between (minTemp + 5) and (minTemp + 19)
+            int maxTemp = random.nextInt(15) + (minTemp + 5);
+
+            double finalMinTemp = minTemp;
+            double finalMaxTemp = maxTemp;
+
+            // Convert to Celsius if necessary
+            if (req.getUnits().equalsIgnoreCase("celsius") || req.getUnits().equalsIgnoreCase("c")) {
+                finalMinTemp = (minTemp - 32) * 5.0 / 9.0;
+                finalMaxTemp = (maxTemp - 32) * 5.0 / 9.0;
+            }
+
+            DayForecast dayForecast = new DayForecast();
+            dayForecast.setDate(LocalDate.now().plusDays(i).toString());
+            dayForecast.setMin(finalMinTemp);
+            dayForecast.setMax(finalMaxTemp);
+            dayForecast.setUnits(req.getUnits());
+
+            forecasts.add(dayForecast);
+        }
+        response.setDailyForecast(forecasts);
+        return response;
+    }
+}
--- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/model/DayForecast.java
+++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/model/DayForecast.java
@ -0,0 +1,40 @@
+package weather.model;
+
+public class DayForecast {
+    private String date;
+    private String units;
+    private double min;
+    private double max;
+
+    public DayForecast() {}
+
+    // Getters and setters
+    public String getDate() {
+        return date;
+    }
+
+    public void setDate(String date) {
+        this.date = date;
+    }
+
+    public String getUnits() {
+        return units;
+    }
+
+    public void setUnits(String units) {
+        this.units = units;
+    }
+
+    public double getMin() {
+        return min;
+    }
+    public void setMin(double min) {
+        this.min = min;
+    }
+    public double getMax() {
+        return max;
+    }
+    public void setMax(double max) {
+        this.max = max;
+    }
+}
--- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/model/WeatherForecastResponse.java
+++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/model/WeatherForecastResponse.java
@ -0,0 +1,37 @@
+package weather.model;
+
+import java.util.List;
+
+public class WeatherForecastResponse {
+    private String location;
+    private String units;
+    private List<DayForecast> forecast;
+
+    // Default Constructor
+    public WeatherForecastResponse() {}
+
+    // Getters and Setters
+    public String getLocation() {
+        return location;
+    }
+
+    public void setLocation(String location) {
+        this.location = location;
+    }
+
+    public String getUnits() {
+        return units;
+    }
+
+    public void setUnits(String units) {
+        this.units = units;
+    }
+
+    public List<DayForecast> getDailyForecast() {
+        return forecast;
+    }
+
+    public void setDailyForecast(List<DayForecast> forecast) {
+        this.forecast = forecast;
+    }
+}
--- a/demos/samples_java/weather_forcecast_service/src/main/java/weather/model/WeatherRequest.java
+++ b/demos/samples_java/weather_forcecast_service/src/main/java/weather/model/WeatherRequest.java
@ -0,0 +1,29 @@
+package weather.model;
+
+public class WeatherRequest {
+    private String location;
+    private int days = 7;
+    private String units = "Farenheit";
+
+    public WeatherRequest() {}
+
+    // Getters and setters
+    public String getLocation() {
+        return location;
+    }
+    public void setLocation(String location) {
+        this.location = location;
+    }
+    public int getDays() {
+        return days;
+    }
+    public void setDays(int days) {
+        this.days = days;
+    }
+    public String getUnits() {
+        return units;
+    }
+    public void setUnits(String units) {
+        this.units = units;
+    }
+}
--- a/demos/samples_java/weather_forcecast_service/src/main/resources/application.properties
+++ b/demos/samples_java/weather_forcecast_service/src/main/resources/application.properties
@ -0,0 +1 @@
+server.port=8081
--- a/demos/samples_python/currency_exchange/README.md
+++ b/demos/samples_python/currency_exchange/README.md
--- a/demos/samples_python/currency_exchange/arch_config.yaml
+++ b/demos/samples_python/currency_exchange/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 llm_providers:
  - name: gpt-4o
@ -12,6 +13,11 @@ llm_providers:
    provider_interface: openai
    model: gpt-4o

+endpoints:
+  frankfurther_api:
+    endpoint: api.frankfurter.dev
+    protocol: https
+
 system_prompt: |
  You are a helpful assistant.

@ -26,7 +32,7 @@ prompt_targets:
    description: Get currency exchange rate from USD to other currencies
    parameters:
      - name: currency_symbol
-        description: the currency that needs conversion
+        description: currency symbol to convert from USD
        required: true
        type: str
        in_path: true
@ -42,11 +48,6 @@ prompt_targets:
      name: frankfurther_api
      path: /v1/currencies

-endpoints:
-  frankfurther_api:
-    endpoint: api.frankfurter.dev
-    protocol: https
-
 tracing:
  random_sampling: 100
  trace_arch_internal: true
--- a/demos/samples_python/currency_exchange/docker-compose.yaml
+++ b/demos/samples_python/currency_exchange/docker-compose.yaml
@ -1,7 +1,7 @@
 services:
  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
    ports:
      - "18080:8080"
    environment:
@ -14,7 +14,7 @@ services:

  jaeger:
    build:
-      context: ../shared/jaeger
+      context: ../../shared/jaeger
    ports:
      - "16686:16686"
      - "4317:4317"
--- a/demos/samples_python/currency_exchange/run_demo.sh
+++ b/demos/samples_python/currency_exchange/run_demo.sh
--- a/demos/samples_python/currency_exchange/test_data.yaml
+++ b/demos/samples_python/currency_exchange/test_data.yaml
--- a/demos/samples_python/human_resources_agent/Dockerfile
+++ b/demos/samples_python/human_resources_agent/Dockerfile
--- a/demos/samples_python/human_resources_agent/README.md
+++ b/demos/samples_python/human_resources_agent/README.md
--- a/demos/samples_python/human_resources_agent/arch_config.yaml
+++ b/demos/samples_python/human_resources_agent/arch_config.yaml
@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
--- a/demos/samples_python/human_resources_agent/docker-compose.yaml
+++ b/demos/samples_python/human_resources_agent/docker-compose.yaml
@ -8,7 +8,6 @@ services:
      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
    volumes:
      - ./arch_config.yaml:/app/arch_config.yaml
-      - ../shared/chatbot_ui/common.py:/app/common.py
    ports:
      - "18080:80"
    healthcheck:
@ -18,7 +17,7 @@ services:

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
      dockerfile: Dockerfile
    ports:
      - "18080:8080"
--- a/demos/samples_python/human_resources_agent/image.png
+++ b/demos/samples_python/human_resources_agent/image.png
--- a/demos/samples_python/human_resources_agent/main.py
+++ b/demos/samples_python/human_resources_agent/main.py
--- a/demos/samples_python/human_resources_agent/requirements.txt
+++ b/demos/samples_python/human_resources_agent/requirements.txt
--- a/demos/samples_python/human_resources_agent/run_demo.sh
+++ b/demos/samples_python/human_resources_agent/run_demo.sh
--- a/demos/samples_python/human_resources_agent/test_data.yaml
+++ b/demos/samples_python/human_resources_agent/test_data.yaml
--- a/demos/samples_python/human_resources_agent/workforce_data.json
+++ b/demos/samples_python/human_resources_agent/workforce_data.json
--- a/demos/samples_python/multi_turn_rag_agent/Dockerfile
+++ b/demos/samples_python/multi_turn_rag_agent/Dockerfile
--- a/demos/samples_python/multi_turn_rag_agent/README.md
+++ b/demos/samples_python/multi_turn_rag_agent/README.md
--- a/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/arch_config.yaml
@ -1,10 +1,11 @@
 version: v0.1

-listener:
-  address: 127.0.0.1
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 endpoints:
  rag_energy_source_agent:
--- a/demos/samples_python/multi_turn_rag_agent/docker-compose.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/docker-compose.yaml
@ -12,7 +12,7 @@ services:

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
      dockerfile: Dockerfile
    ports:
      - "18080:8080"
--- a/demos/samples_python/multi_turn_rag_agent/main.py
+++ b/demos/samples_python/multi_turn_rag_agent/main.py
--- a/demos/samples_python/multi_turn_rag_agent/mutli-turn-example.png
+++ b/demos/samples_python/multi_turn_rag_agent/mutli-turn-example.png
--- a/demos/samples_python/multi_turn_rag_agent/requirements.txt
+++ b/demos/samples_python/multi_turn_rag_agent/requirements.txt
--- a/demos/samples_python/multi_turn_rag_agent/run_demo.sh
+++ b/demos/samples_python/multi_turn_rag_agent/run_demo.sh
--- a/demos/samples_python/network_switch_operator_agent/Dockerfile
+++ b/demos/samples_python/network_switch_operator_agent/Dockerfile
@ -7,13 +7,13 @@ WORKDIR /src
 COPY requirements.txt /src/
 RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt

-COPY . /src
+COPY ../. /src

 FROM python:3.12-slim AS output

 COPY --from=builder /runtime /usr/local

-COPY . /app
+COPY ../. /app
 WORKDIR /app

 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"]
--- a/demos/samples_python/network_switch_operator_agent/README.md
+++ b/demos/samples_python/network_switch_operator_agent/README.md
--- a/demos/samples_python/network_switch_operator_agent/arch_config.yaml
+++ b/demos/samples_python/network_switch_operator_agent/arch_config.yaml
@ -1,8 +1,10 @@
 version: v0.1
-listener:
-  address: 127.0.0.1
-  port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
-  message_format: huggingface
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
--- a/demos/samples_python/network_switch_operator_agent/docker-compose.yaml
+++ b/demos/samples_python/network_switch_operator_agent/docker-compose.yaml
@ -8,7 +8,7 @@ services:

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
      dockerfile: Dockerfile
    ports:
      - "18080:8080"
--- a/demos/samples_python/network_switch_operator_agent/image.png
+++ b/demos/samples_python/network_switch_operator_agent/image.png
--- a/demos/samples_python/network_switch_operator_agent/main.py
+++ b/demos/samples_python/network_switch_operator_agent/main.py
--- a/demos/samples_python/network_switch_operator_agent/requirements.txt
+++ b/demos/samples_python/network_switch_operator_agent/requirements.txt
--- a/demos/samples_python/network_switch_operator_agent/run_demo.sh
+++ b/demos/samples_python/network_switch_operator_agent/run_demo.sh
@ -24,6 +24,7 @@ start_demo() {

  # Step 4: Start Network Agent
  echo "Starting Network Agent using Docker Compose..."
+  cd build
  docker compose up -d  # Run in detached mode
 }

--- a/demos/samples_python/stock_quote/README.md
+++ b/demos/samples_python/stock_quote/README.md
@ -0,0 +1,9 @@
+This demo shows how you can use a publicly hosted rest api that is protected by an access key.
+
+Before you start the demo make sure you set `OPENAI_API_KEY` and `TWELVEDATA_API_KEY`.
+
+To get `TWELVEDATA_API_KEY` please head over to https://twelvedata.com/.
+
+Following screenshot shows interaction with stock quote demo,
+
+![alt text](stock_quote_demo.png)
--- a/demos/samples_python/stock_quote/arch_config.yaml
+++ b/demos/samples_python/stock_quote/arch_config.yaml
@ -0,0 +1,70 @@
+version: v0.1
+
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s
+
+llm_providers:
+  - name: gpt-4o
+    access_key: $OPENAI_API_KEY
+    provider_interface: openai
+    model: gpt-4o
+
+endpoints:
+  twelvedata_api:
+    endpoint: api.twelvedata.com
+    protocol: https
+
+system_prompt: |
+  You are a helpful assistant.
+
+prompt_guards:
+  input_guards:
+    jailbreak:
+      on_exception:
+        message: Looks like you're curious about my abilities, but I can only provide assistance for currency exchange.
+
+prompt_targets:
+  - name: stock_quote
+    description: get current stock exchange rate for a given symbol
+    parameters:
+      - name: symbol
+        description: Stock symbol
+        required: true
+        type: str
+    endpoint:
+      name: twelvedata_api
+      path: /quote
+      http_headers:
+        Authorization: "apikey $TWELVEDATA_API_KEY"
+    system_prompt: |
+      You are a helpful stock exchange assistant. You are given stock symbol along with its exchange rate in json format. Your task is to parse the data and present it in a human-readable format. Keep the details to highlevel and be concise.
+
+  - name: stock_quote_time_series
+    description: get historical stock exchange rate for a given symbol
+    parameters:
+      - name: symbol
+        description: Stock symbol
+        required: true
+        type: str
+      - name: interval
+        description: Time interval
+        default: 1day
+        enum:
+          - 1h
+          - 1day
+        type: str
+    endpoint:
+      name: twelvedata_api
+      path: /time_series
+      http_headers:
+        Authorization: "apikey $TWELVEDATA_API_KEY"
+    system_prompt: |
+      You are a helpful stock exchange assistant. You are given stock symbol along with its historical data in json format. Your task is to parse the data and present it in a human-readable format. Keep the details to highlevel only and be concise.
+
+tracing:
+  random_sampling: 100
+  trace_arch_internal: true
--- a/demos/samples_python/stock_quote/docker-compose.yaml
+++ b/demos/samples_python/stock_quote/docker-compose.yaml
@ -0,0 +1,21 @@
+services:
+  chatbot_ui:
+    build:
+      context: ../../shared/chatbot_ui
+    ports:
+      - "18080:8080"
+    environment:
+      # this is only because we are running the sample app in the same docker container environment as archgw
+      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    volumes:
+      - ./arch_config.yaml:/app/arch_config.yaml
+
+  jaeger:
+    build:
+      context: ../../shared/jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
--- a/demos/samples_python/stock_quote/run_demo.sh
+++ b/demos/samples_python/stock_quote/run_demo.sh
@ -22,7 +22,7 @@ start_demo() {
  echo "Starting Arch with arch_config.yaml..."
  archgw up arch_config.yaml

-  # Step 4: Start Network Agent
+  # Step 4: Start developer services
  echo "Starting Network Agent using Docker Compose..."
  docker compose up -d  # Run in detached mode
 }
--- a/demos/samples_python/stock_quote/stock_quote_demo.png
+++ b/demos/samples_python/stock_quote/stock_quote_demo.png
--- a/demos/samples_python/weather_forecast/Dockerfile
+++ b/demos/samples_python/weather_forecast/Dockerfile
--- a/demos/samples_python/weather_forecast/README.md
+++ b/demos/samples_python/weather_forecast/README.md
--- a/demos/samples_python/weather_forecast/arch_config.yaml
+++ b/demos/samples_python/weather_forecast/arch_config.yaml
@ -1,10 +1,11 @@
 version: "0.1-beta"

-listener:
-  address: 0.0.0.0
-  port: 10000
-  message_format: huggingface
-  connect_timeout: 0.005s
+listeners:
+  ingress_traffic:
+    address: 0.0.0.0
+    port: 10000
+    message_format: openai
+    timeout: 30s

 endpoints:
  weather_forecast_service:
--- a/demos/samples_python/weather_forecast/docker-compose-honeycomb.yaml
+++ b/demos/samples_python/weather_forecast/docker-compose-honeycomb.yaml
@ -11,7 +11,7 @@ services:

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
    ports:
      - "18080:8080"
    environment:
@ -24,12 +24,12 @@ services:

  otel-collector:
    build:
-      context: ../shared/honeycomb/
+      context: ../../shared/honeycomb/
    ports:
      - "4317:4317"
      - "4318:4318"
    volumes:
-      - ../shared/honeycomb/otel-collector-config.yaml:/etc/otel-collector-config.yaml
+      - ../../shared/honeycomb/otel-collector-config.yaml:/etc/otel-collector-config.yaml
    env_file:
      - .env
    environment:
@ -37,10 +37,10 @@ services:

  prometheus:
    build:
-      context: ../shared/prometheus
+      context: ../../shared/prometheus

  grafana:
    build:
-      context: ../shared/grafana
+      context: ../../shared/grafana
    ports:
      - "3000:3000"
--- a/demos/samples_python/weather_forecast/docker-compose-jaeger.yaml
+++ b/demos/samples_python/weather_forecast/docker-compose-jaeger.yaml
@ -11,7 +11,7 @@ services:

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
    ports:
      - "18080:8080"
    environment:
@ -24,7 +24,7 @@ services:

  jaeger:
    build:
-      context: ../shared/jaeger
+      context: ../../shared/jaeger
    ports:
      - "16686:16686"
      - "4317:4317"
@ -32,10 +32,10 @@ services:

  prometheus:
    build:
-      context: ../shared/prometheus
+      context: ../../shared/prometheus

  grafana:
    build:
-      context: ../shared/grafana
+      context: ../../shared/grafana
    ports:
      - "3000:3000"
--- a/demos/samples_python/weather_forecast/docker-compose-logfire.yaml
+++ b/demos/samples_python/weather_forecast/docker-compose-logfire.yaml
@ -11,7 +11,7 @@ services:

  chatbot_ui:
    build:
-      context: ../shared/chatbot_ui
+      context: ../../shared/chatbot_ui
    ports:
      - "18080:8080"
    environment:
@ -24,12 +24,12 @@ services:

  otel-collector:
    build:
-      context: ../shared/logfire/
+      context: ../../shared/logfire/
    ports:
      - "4317:4317"
      - "4318:4318"
    volumes:
-      - ../shared/logfire/otel-collector-config.yaml:/etc/otel-collector-config.yaml
+      - ../../shared/logfire/otel-collector-config.yaml:/etc/otel-collector-config.yaml
    env_file:
      - .env
    environment:
@ -37,10 +37,10 @@ services:

  prometheus:
    build:
-      context: ../shared/prometheus
+      context: ../../shared/prometheus

  grafana:
    build:
-      context: ../shared/grafana
+      context: ../../shared/grafana
    ports:
      - "3000:3000"
--- a/Show more
+++ b/Show more