Merge branch 'main' into adil/add_acm_demo

This commit is contained in:
Adil Hafeez 2025-01-28 13:33:42 -08:00
commit fc217fc5e8
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
25 changed files with 163 additions and 31 deletions

View file

@ -9,7 +9,7 @@ listener:
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o
system_prompt: |

View file

@ -7,6 +7,7 @@ listener:
connect_timeout: 0.005s
llm_providers:
- name: local-llama
provider_interface: openai
model: llama3.2

View file

@ -7,7 +7,7 @@ listener:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider: openai
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
default: true

View file

@ -9,7 +9,7 @@ system_prompt: |
llm_providers:
- name: OpenAI
provider: openai
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true

View file

@ -1,5 +1,5 @@
# LLM Routing
This demo shows how you can arch gateway to manage keys and route to appropricate LLM.
This demo shows how you can arch gateway to manage keys and route to appropriate LLM.
# Starting the demo
1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
@ -9,11 +9,50 @@ This demo shows how you can arch gateway to manage keys and route to appropricat
```
1. Navigate to http://localhost:18080/
Following screen shows an example of interaction with arch gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
![LLM Routing Demo](llm_routing_demo.png)
You can also pass in a header to override model when sending prompt. Following example shows how you can use `x-arch-llm-provider-hint` header to override model selection,
```bash
$ curl --header 'Content-Type: application/json' \
--header 'x-arch-llm-provider-hint: ministral-3b' \
--data '{"messages": [{"role": "user","content": "hello"}]}' \
http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
{
"id": "xxx",
"object": "chat.completion",
"created": 1737760394,
"model": "ministral-3b-latest",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"tool_calls": null,
"content": "Hello! How can I assist you today? Let's chat about anything you'd like. 😊"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 4,
"total_tokens": 25,
"completion_tokens": 21
}
}
```
# Observability
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visualize the stats in dashboard. To see grafana dashboard follow instructions below,
1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
1. For tracing you can head over to http://localhost:16686/ to view recent traces.
# Selecting different LLM
You can pick different LLM based on header `x-arch-llm-provider-hint` to override default LLM.
Following is a screenshot of tracing UI showing call received by arch gateway and making upstream call to LLM,
![Jaeger Tracing](jaeger_tracing_llm_routing.png)

View file

@ -9,23 +9,23 @@ listener:
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o-mini
default: true
- name: gpt-3.5-turbo-0125
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-3.5-turbo-0125
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o
- name: ministral-3b
access_key: $MISTRAL_API_KEY
provider: mistral
provider_interface: mistral
model: ministral-3b-latest
tracing:

Binary file not shown.

After

Width:  |  Height:  |  Size: 273 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

View file

@ -0,0 +1,47 @@
#!/bin/bash
set -e
# Function to start the demo
start_demo() {
# Step 1: Check if .env file exists
if [ -f ".env" ]; then
echo ".env file already exists. Skipping creation."
else
# Step 2: Create `.env` file and set OpenAI key
if [ -z "$OPENAI_API_KEY" ]; then
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
exit 1
fi
echo "Creating .env file..."
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
echo ".env file created with OPENAI_API_KEY."
fi
# Step 3: Start Arch
echo "Starting Arch with arch_config.yaml..."
archgw up arch_config.yaml
# Step 4: Start LLM Routing
echo "Starting LLM Routing using Docker Compose..."
docker compose up -d # Run in detached mode
}
# Function to stop the demo
stop_demo() {
# Step 1: Stop Docker Compose services
echo "Stopping LLM Routing using Docker Compose..."
docker compose down
# Step 2: Stop Arch
echo "Stopping Arch..."
archgw down
}
# Main script logic
if [ "$1" == "down" ]; then
stop_demo
else
# Default action is to bring the demo up
start_demo
fi

View file

@ -14,7 +14,7 @@ endpoints:
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o-mini
default: true

View file

@ -7,7 +7,7 @@ listener:
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider: openai
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-3.5-turbo
default: true

View file

@ -18,18 +18,18 @@ overrides:
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o-mini
default: true
- name: gpt-3.5-turbo-0125
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-3.5-turbo-0125
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
provider_interface: openai
model: gpt-4o
system_prompt: |