mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
initial commit
This commit is contained in:
parent
1f95fac4af
commit
1d19f0c2f7
36 changed files with 3003 additions and 109 deletions
58
demos/use_cases/preference_based_routing/README.md
Normal file
58
demos/use_cases/preference_based_routing/README.md
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# LLM Routing
|
||||
This demo shows how you can arch gateway to manage keys and route to upstream LLM.
|
||||
|
||||
# Starting the demo
|
||||
1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
|
||||
1. Start Arch
|
||||
```sh
|
||||
sh run_demo.sh
|
||||
```
|
||||
1. Navigate to http://localhost:18080/
|
||||
|
||||
Following screen shows an example of interaction with arch gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
|
||||
|
||||

|
||||
|
||||
You can also pass in a header to override model when sending prompt. Following example shows how you can use `x-arch-llm-provider-hint` header to override model selection,
|
||||
|
||||
```bash
|
||||
|
||||
$ curl --header 'Content-Type: application/json' \
|
||||
--header 'x-arch-llm-provider-hint: ministral-3b' \
|
||||
--data '{"messages": [{"role": "user","content": "hello"}]}' \
|
||||
http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
|
||||
{
|
||||
"id": "xxx",
|
||||
"object": "chat.completion",
|
||||
"created": 1737760394,
|
||||
"model": "ministral-3b-latest",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"tool_calls": null,
|
||||
"content": "Hello! How can I assist you today? Let's chat about anything you'd like. 😊"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 4,
|
||||
"total_tokens": 25,
|
||||
"completion_tokens": 21
|
||||
}
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
# Observability
|
||||
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visualize the stats in dashboard. To see grafana dashboard follow instructions below,
|
||||
|
||||
1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
|
||||
1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
|
||||
1. For tracing you can head over to http://localhost:16686/ to view recent traces.
|
||||
|
||||
Following is a screenshot of tracing UI showing call received by arch gateway and making upstream call to LLM,
|
||||
|
||||

|
||||
33
demos/use_cases/preference_based_routing/arch_config.yaml
Normal file
33
demos/use_cases/preference_based_routing/arch_config.yaml
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
endpoints:
|
||||
gcp_hosted_outer_llm:
|
||||
endpoint: 34.46.85.85:8000
|
||||
http_host: 34.46.85.85
|
||||
# endpoint: host.docker.internal:11223
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- name: gpt-4o
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
usage: |
|
||||
complex reasoning problem, require multi step answer
|
||||
|
||||
- name: o4-mini
|
||||
provider_interface: openai
|
||||
access_key: $OPENAI_API_KEY
|
||||
model: o4-mini
|
||||
usage: |
|
||||
simple requests, basic fact retrieval, easy to answer
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
import json
|
||||
import yaml
|
||||
|
||||
system_prompt = """
|
||||
You are an advanced Routing Assistant designed to select the optimal route based on user requests.
|
||||
Your task is to analyze conversations and match them to the most appropriate predefined route.
|
||||
Review the available routes config:
|
||||
|
||||
# ROUTES CONFIG START
|
||||
{routes}
|
||||
# ROUTES CONFIG END
|
||||
|
||||
Examine the following conversation between a user and an assistant:
|
||||
|
||||
# CONVERSATION START
|
||||
{conversation}
|
||||
# CONVERSATION END
|
||||
|
||||
Your goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:
|
||||
|
||||
1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.
|
||||
2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).
|
||||
3. Find the route that best matches.
|
||||
4. Use context clues from the entire conversation to determine the best fit.
|
||||
5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.
|
||||
6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''.
|
||||
"""
|
||||
|
||||
output_format = """
|
||||
# OUTPUT FORMAT
|
||||
Your final output must follow this JSON format:
|
||||
{
|
||||
"route": "route_name" # The matched route name, or empty string '' if no match
|
||||
}
|
||||
|
||||
Based on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.
|
||||
"""
|
||||
|
||||
|
||||
with open("arch_config.yaml", "r") as file:
|
||||
data = yaml.safe_load(file)
|
||||
|
||||
llm_provider_routes = ""
|
||||
|
||||
for llm_provider in data.get("llm_providers", []):
|
||||
llm_provider_routes += f"- name: {llm_provider.get('name')}()\n"
|
||||
llm_provider_routes += f" description: {json.dumps(llm_provider.get('usage'))}\n"
|
||||
|
||||
|
||||
conversation = """
|
||||
user: Hello
|
||||
assistant: Hi! How can I assist you today?
|
||||
user: I want to know how far is sun from earth.
|
||||
"""
|
||||
|
||||
system_prompt_formatted = system_prompt.format(
|
||||
routes=llm_provider_routes, conversation=conversation
|
||||
)
|
||||
|
||||
system_prompt_2 = f"{system_prompt_formatted}\n{output_format}"
|
||||
print(system_prompt_2)
|
||||
print(json.dumps(system_prompt_2, indent=2))
|
||||
32
demos/use_cases/preference_based_routing/docker-compose.yaml
Normal file
32
demos/use_cases/preference_based_routing/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
services:
|
||||
|
||||
chatbot_ui:
|
||||
build:
|
||||
context: ../../shared/chatbot_ui
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18080:8080"
|
||||
environment:
|
||||
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:12000/v1
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- ./arch_config.yaml:/app/arch_config.yaml
|
||||
|
||||
jaeger:
|
||||
build:
|
||||
context: ../../shared/jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
|
||||
prometheus:
|
||||
build:
|
||||
context: ../../shared/prometheus
|
||||
|
||||
grafana:
|
||||
build:
|
||||
context: ../../shared/grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 273 KiB |
BIN
demos/use_cases/preference_based_routing/llm_routing_demo.png
Normal file
BIN
demos/use_cases/preference_based_routing/llm_routing_demo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 284 KiB |
47
demos/use_cases/preference_based_routing/run_demo.sh
Normal file
47
demos/use_cases/preference_based_routing/run_demo.sh
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Function to start the demo
|
||||
start_demo() {
|
||||
# Step 1: Check if .env file exists
|
||||
if [ -f ".env" ]; then
|
||||
echo ".env file already exists. Skipping creation."
|
||||
else
|
||||
# Step 2: Create `.env` file and set OpenAI key
|
||||
if [ -z "$OPENAI_API_KEY" ]; then
|
||||
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Creating .env file..."
|
||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
||||
echo ".env file created with OPENAI_API_KEY."
|
||||
fi
|
||||
|
||||
# Step 3: Start Arch
|
||||
echo "Starting Arch with arch_config.yaml..."
|
||||
archgw up arch_config.yaml
|
||||
|
||||
# Step 4: Start LLM Routing
|
||||
echo "Starting LLM Routing using Docker Compose..."
|
||||
docker compose up -d # Run in detached mode
|
||||
}
|
||||
|
||||
# Function to stop the demo
|
||||
stop_demo() {
|
||||
# Step 1: Stop Docker Compose services
|
||||
echo "Stopping LLM Routing using Docker Compose..."
|
||||
docker compose down
|
||||
|
||||
# Step 2: Stop Arch
|
||||
echo "Stopping Arch..."
|
||||
archgw down
|
||||
}
|
||||
|
||||
# Main script logic
|
||||
if [ "$1" == "down" ]; then
|
||||
stop_demo
|
||||
else
|
||||
# Default action is to bring the demo up
|
||||
start_demo
|
||||
fi
|
||||
12
demos/use_cases/preference_based_routing/staff_req.json
Normal file
12
demos/use_cases/preference_based_routing/staff_req.json
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"model": "cotran2/llama-1b-4-26",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the capital of France?"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"llm_providers": "[]"
|
||||
}
|
||||
}
|
||||
24
demos/use_cases/preference_based_routing/test.rest
Normal file
24
demos/use_cases/preference_based_routing/test.rest
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
@arch_llm_router_endpoint = http://34.30.16.38:8000
|
||||
|
||||
POST {{arch_llm_router_endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "cotran2/llama-1b-4-26",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "You are an advanced Routing Assistant designed to select the optimal route based on user requests. \nYour task is to analyze conversations and match them to the most appropriate predefined route.\nReview the available routes config:\n\n# ROUTES CONFIG START\n- name: gpt-4o()\n description: \"complex reasoning problem, require multi step answer\\n\"\n- name: o4-mini()\n description: \"simple requests, basic fact retrieval, easy to answer\\n\"\n\n# ROUTES CONFIG END\n\nExamine the following conversation between a user and an assistant:\n\n# CONVERSATION START\n\nuser: Hello\nassistant: Hi! How can I assist you today?\nuser: List us presidents who are born in odd years and are still alive. Order them by their age and I also know what is their home city they were born. And what year they became president. Also give me summary of which president was the best for economy of the US.\n\n# CONVERSATION END\n\nYour goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:\n\n1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.\n2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).\n3. Find the route that best matches.\n4. Use context clues from the entire conversation to determine the best fit.\n5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.\n6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''. \n\n\n# OUTPUT FORMAT\nYour final output must follow this JSON format:\n{\n \"route\": \"route_name\" # The matched route name, or empty string '' if no match\n}\n\nBased on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### test 2
|
||||
|
||||
POST {{arch_llm_router_endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{"model":"cotran2/llama-1b-4-26","messages":[{"role":"user","content":"\nYou are an advanced Routing Assistant designed to select the optimal route based on user requests. \nYour task is to analyze conversations and match them to the most appropriate predefined route.\nReview the available routes config:\n\n# ROUTES CONFIG START\n- name: gpt-4o\n description: simple requests, basic fact retrieval, easy to answer\n- name: o4-mini()\n description: complex reasoning problem, require multi step answer\n# ROUTES CONFIG END\n\nExamine the following conversation between a user and an assistant:\n\n# CONVERSATION START\n[{\"role\":\"user\",\"content\":\"What is the capital of France?\"}]\n# CONVERSATION END\n\nYour goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:\n\n1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.\n2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).\n3. Find the route that best matches.\n4. Use context clues from the entire conversation to determine the best fit.\n5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.\n6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''. \n\n# OUTPUT FORMAT\nYour final output must follow this JSON format:\n{\n \"route\": \"route_name\" # The matched route name, or empty string '' if no match\n}\n\nBased on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.\n"}],"stream":false}
|
||||
|
||||
### get model list
|
||||
GET http://34.46.85.85:8000/v1/models HTTP/1.1
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
You are an advanced Routing Assistant designed to select the optimal route based on user requests.
|
||||
Your task is to analyze conversations and match them to the most appropriate predefined route.
|
||||
Review the available routes config:
|
||||
|
||||
# ROUTES CONFIG START
|
||||
{}
|
||||
# ROUTES CONFIG END
|
||||
|
||||
Examine the following conversation between a user and an assistant:
|
||||
|
||||
# CONVERSATION START
|
||||
{}
|
||||
# CONVERSATION END
|
||||
|
||||
Your goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:
|
||||
|
||||
1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.
|
||||
2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).
|
||||
3. Find the route that best matches.
|
||||
4. Use context clues from the entire conversation to determine the best fit.
|
||||
5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.
|
||||
6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''.
|
||||
"""
|
||||
output_prompt = """
|
||||
# OUTPUT FORMAT
|
||||
Your final output must follow this JSON format:
|
||||
{
|
||||
"route": "route_name" # The matched route name, or empty string '' if no match
|
||||
}
|
||||
|
||||
Based on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.
|
||||
Loading…
Add table
Add a link
Reference in a new issue