Merge branch 'main' into adil/add_acm_demo

2026-06-26 15:39:40 +02:00 · 2025-01-28 13:33:42 -08:00 · 2025-01-28 13:33:42 -08:00 · fc217fc5e8
commit fc217fc5e8
parent 5d4cd624c1 a7feb6bffb
25 changed files with 163 additions and 31 deletions
--- a/demos/currency_exchange/arch_config.yaml
+++ b/demos/currency_exchange/arch_config.yaml
@ -9,7 +9,7 @@ listener:
 llm_providers:
  - name: gpt-4o
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-4o

 system_prompt: |
--- a/demos/currency_exchange_ollama/arch_config.yaml
+++ b/demos/currency_exchange_ollama/arch_config.yaml
@ -7,6 +7,7 @@ listener:
  connect_timeout: 0.005s

 llm_providers:
+
  - name: local-llama
    provider_interface: openai
    model: llama3.2
--- a/demos/hr_agent/arch_config.yaml
+++ b/demos/hr_agent/arch_config.yaml
@ -7,7 +7,7 @@ listener:
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
  - name: OpenAI
-    provider: openai
+    provider_interface: openai
    access_key: $OPENAI_API_KEY
    model: gpt-4o-mini
    default: true
--- a/demos/insurance_agent/arch_config.yaml
+++ b/demos/insurance_agent/arch_config.yaml
@ -9,7 +9,7 @@ system_prompt: |

 llm_providers:
  - name: OpenAI
-    provider: openai
+    provider_interface: openai
    access_key: $OPENAI_API_KEY
    model: gpt-4o
    default: true
--- a/demos/llm_routing/README.md
+++ b/demos/llm_routing/README.md
@ -1,5 +1,5 @@
 # LLM Routing
-This demo shows how you can arch gateway to manage keys and route to appropricate LLM.
+This demo shows how you can arch gateway to manage keys and route to appropriate LLM.

 # Starting the demo
 1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
@ -9,11 +9,50 @@ This demo shows how you can arch gateway to manage keys and route to appropricat
   ```
 1. Navigate to http://localhost:18080/

+Following screen shows an example of interaction with arch gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
+
+![LLM Routing Demo](llm_routing_demo.png)
+
+You can also pass in a header to override model when sending prompt. Following example shows how you can use `x-arch-llm-provider-hint` header to override model selection,
+
+```bash
+
+$ curl --header 'Content-Type: application/json' \
+  --header 'x-arch-llm-provider-hint: ministral-3b' \
+  --data '{"messages": [{"role": "user","content": "hello"}]}' \
+  http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
+{
+  "id": "xxx",
+  "object": "chat.completion",
+  "created": 1737760394,
+  "model": "ministral-3b-latest",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "tool_calls": null,
+        "content": "Hello! How can I assist you today? Let's chat about anything you'd like. 😊"
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 4,
+    "total_tokens": 25,
+    "completion_tokens": 21
+  }
+}
+
+```
+
 # Observability
-Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
+Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visualize the stats in dashboard. To see grafana dashboard follow instructions below,

 1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
 1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
+1. For tracing you can head over to http://localhost:16686/ to view recent traces.

-# Selecting different LLM
-You can pick different LLM based on header `x-arch-llm-provider-hint` to override default LLM.
+Following is a screenshot of tracing UI showing call received by arch gateway and making upstream call to LLM,
+
+![Jaeger Tracing](jaeger_tracing_llm_routing.png)
--- a/demos/llm_routing/arch_config.yaml
+++ b/demos/llm_routing/arch_config.yaml
@ -9,23 +9,23 @@ listener:
 llm_providers:
  - name: gpt-4o-mini
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-4o-mini
    default: true

  - name: gpt-3.5-turbo-0125
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-3.5-turbo-0125

  - name: gpt-4o
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-4o

  - name: ministral-3b
    access_key: $MISTRAL_API_KEY
-    provider: mistral
+    provider_interface: mistral
    model: ministral-3b-latest

 tracing:
--- a/demos/llm_routing/jaeger_tracing_llm_routing.png
+++ b/demos/llm_routing/jaeger_tracing_llm_routing.png
--- a/demos/llm_routing/llm_routing_demo.png
+++ b/demos/llm_routing/llm_routing_demo.png
--- a/demos/llm_routing/run_demo.sh
+++ b/demos/llm_routing/run_demo.sh
@ -0,0 +1,47 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Arch
+  echo "Starting Arch with arch_config.yaml..."
+  archgw up arch_config.yaml
+
+  # Step 4: Start LLM Routing
+  echo "Starting LLM Routing using Docker Compose..."
+  docker compose up -d  # Run in detached mode
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping LLM Routing using Docker Compose..."
+  docker compose down
+
+  # Step 2: Stop Arch
+  echo "Stopping Arch..."
+  archgw down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  # Default action is to bring the demo up
+  start_demo
+fi
--- a/demos/multi_turn_rag_agent/arch_config.yaml
+++ b/demos/multi_turn_rag_agent/arch_config.yaml
@ -14,7 +14,7 @@ endpoints:
 llm_providers:
  - name: gpt-4o-mini
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-4o-mini
    default: true

--- a/demos/network_agent/arch_config.yaml
+++ b/demos/network_agent/arch_config.yaml
@ -7,7 +7,7 @@ listener:
 # Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
 llm_providers:
  - name: OpenAI
-    provider: openai
+    provider_interface: openai
    access_key: $OPENAI_API_KEY
    model: gpt-3.5-turbo
    default: true
--- a/demos/weather_forecast/arch_config.yaml
+++ b/demos/weather_forecast/arch_config.yaml
@ -18,18 +18,18 @@ overrides:
 llm_providers:
  - name: gpt-4o-mini
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-4o-mini
    default: true

  - name: gpt-3.5-turbo-0125
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-3.5-turbo-0125

  - name: gpt-4o
    access_key: $OPENAI_API_KEY
-    provider: openai
+    provider_interface: openai
    model: gpt-4o

 system_prompt: |