mirror of
https://github.com/katanemo/plano.git
synced 2026-06-29 15:49:40 +02:00
Merge branch 'main' into adil/agent_format
This commit is contained in:
commit
c1757bec88
26 changed files with 864 additions and 188 deletions
148
demos/use_cases/model_alias_routing/README.md
Normal file
148
demos/use_cases/model_alias_routing/README.md
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Model Alias Demo Suite
|
||||
|
||||
This directory contains demos for the model alias feature in archgw.
|
||||
|
||||
## Overview
|
||||
|
||||
Model aliases allow clients to use friendly, semantic names instead of provider-specific model names. For example:
|
||||
- `arch.summarize.v1` → `4o-mini` (fast, cheap model for summaries)
|
||||
- `arch.reasoning.v1` → `gpt-4o` (capable model for complex reasoning)
|
||||
- `creative-model` → `claude-3-5-sonnet` (creative tasks)
|
||||
|
||||
## Configuration
|
||||
|
||||
The `arch_config_with_aliases.yaml` file defines several aliases:
|
||||
|
||||
```yaml
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
model_aliases:
|
||||
# Alias for summarization tasks -> fast/cheap model
|
||||
arch.summarize.v1:
|
||||
target: gpt-4o-mini
|
||||
|
||||
# Alias for general purpose tasks -> latest model
|
||||
arch.v1:
|
||||
target: o3
|
||||
|
||||
# Alias for reasoning tasks -> capable model
|
||||
arch.reasoning.v1:
|
||||
target: gpt-4o
|
||||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
target: claude-3-haiku-20240307
|
||||
|
||||
# Semantic aliases
|
||||
summary-model:
|
||||
target: gpt-4o-mini
|
||||
|
||||
chat-model:
|
||||
target: gpt-4o
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
- Install all dependencies as described in the main Arch README ([link](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites))
|
||||
- Set your API keys in your environment:
|
||||
- `export OPENAI_API_KEY=your-openai-key`
|
||||
- `export ANTHROPIC_API_KEY=your-anthropic-key` (optional, but recommended for Anthropic tests)
|
||||
|
||||
## How to Run
|
||||
|
||||
1. Start the demo:
|
||||
```sh
|
||||
sh run_demo.sh
|
||||
```
|
||||
- This will create a `.env` file with your API keys (if not present).
|
||||
- Starts Arch Gateway with model alias config (`arch_config_with_aliases.yaml`).
|
||||
|
||||
2. To stop the demo:
|
||||
```sh
|
||||
sh run_demo.sh down
|
||||
```
|
||||
- This will stop Arch Gateway and any related services.
|
||||
|
||||
## Example Requests
|
||||
|
||||
### OpenAI client with alias `arch.summarize.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/chat/completions" \
|
||||
-H "Authorization: Bearer test-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.summarize.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### OpenAI client with alias `arch.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/chat/completions" \
|
||||
-H "Authorization: Bearer test-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.v1!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### Anthropic client with alias `arch.summarize.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/messages" \
|
||||
-H "x-api-key: test-key" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.summarize.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### Anthropic client with alias `arch.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/messages" \
|
||||
-H "x-api-key: test-key" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.summarize.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
## Notes
|
||||
- The `.env` file will be created automatically if missing, with your API keys.
|
||||
- If `ANTHROPIC_API_KEY` is not set, Anthropic requests will not work.
|
||||
- You can add more aliases in `arch_config_with_aliases.yaml`.
|
||||
- All curl examples use `jq .` for pretty-printing JSON responses.
|
||||
|
||||
## Troubleshooting
|
||||
- Ensure your API keys are set in your environment before running the demo.
|
||||
- If you see errors about missing keys, set them and re-run the script.
|
||||
- For more details, see the main Arch documentation.
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
# OpenAI Models
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/o3
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-3-5-sonnet-20241022
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
model_aliases:
|
||||
# Alias for summarization tasks -> fast/cheap model
|
||||
arch.summarize.v1:
|
||||
target: gpt-4o-mini
|
||||
|
||||
# Alias for general purpose tasks -> latest model
|
||||
arch.v1:
|
||||
target: o3
|
||||
|
||||
# Alias for reasoning tasks -> capable model
|
||||
arch.reasoning.v1:
|
||||
target: gpt-4o
|
||||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
target: claude-3-haiku-20240307
|
||||
|
||||
# Semantic aliases
|
||||
summary-model:
|
||||
target: gpt-4o-mini
|
||||
|
||||
chat-model:
|
||||
target: gpt-4o
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
60
demos/use_cases/model_alias_routing/run_demo.sh
Normal file
60
demos/use_cases/model_alias_routing/run_demo.sh
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Function to start the demo
|
||||
start_demo() {
|
||||
# Step 1: Check if .env file exists
|
||||
if [ -f ".env" ]; then
|
||||
echo ".env file already exists. Skipping creation."
|
||||
else
|
||||
# Step 2: Create `.env` file and set API keys
|
||||
if [ -z "$OPENAI_API_KEY" ]; then
|
||||
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "$ANTHROPIC_API_KEY" ]; then
|
||||
echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
|
||||
fi
|
||||
|
||||
echo "Creating .env file..."
|
||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
||||
if [ -n "$ANTHROPIC_API_KEY" ]; then
|
||||
echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
|
||||
fi
|
||||
echo ".env file created with API keys."
|
||||
fi
|
||||
|
||||
# Step 3: Start Arch
|
||||
echo "Starting Arch with arch_config_with_aliases.yaml..."
|
||||
archgw up arch_config_with_aliases.yaml
|
||||
|
||||
echo "\n\nArch started successfully."
|
||||
echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file. \n"
|
||||
echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \
|
||||
-H \"Authorization: Bearer test-key\" \
|
||||
-H \"Content-Type: application/json\" \
|
||||
-d '{
|
||||
\"model\": \"arch.summarize.v1\",
|
||||
\"max_tokens\": 50,
|
||||
\"messages\": [
|
||||
{ \"role\": \"user\",
|
||||
\"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\"
|
||||
}
|
||||
]
|
||||
}' | jq ."
|
||||
}
|
||||
|
||||
# Function to stop the demo
|
||||
stop_demo() {
|
||||
# Step 2: Stop Arch
|
||||
echo "Stopping Arch..."
|
||||
archgw down
|
||||
}
|
||||
|
||||
# Main script logic
|
||||
if [ "$1" == "down" ]; then
|
||||
stop_demo
|
||||
else
|
||||
# Default action is to bring the demo up
|
||||
start_demo
|
||||
fi
|
||||
|
|
@ -14,9 +14,9 @@ Make sure your machine is up to date with [latest version of archgw]([url](https
|
|||
2. start archgw in the foreground
|
||||
```bash
|
||||
(venv) $ archgw up --service archgw --foreground
|
||||
2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.11
|
||||
2025-05-30 18:00:09,953 - cli.main - INFO - Starting archgw cli version: 0.3.12
|
||||
2025-05-30 18:00:09,953 - cli.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/use_cases/preference_based_routing/arch_config.yaml
|
||||
2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.11
|
||||
2025-05-30 18:00:10,422 - cli.core - INFO - Starting arch gateway, image name: archgw, tag: katanemo/archgw:0.3.12
|
||||
2025-05-30 18:00:10,662 - cli.core - INFO - archgw status: running, health status: starting
|
||||
2025-05-30 18:00:11,712 - cli.core - INFO - archgw status: running, health status: starting
|
||||
2025-05-30 18:00:12,761 - cli.core - INFO - archgw is running and is healthy!
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue