mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
Merge remote-tracking branch 'origin/main' into adilhafeez/model-listener-filter-chain
This commit is contained in:
commit
9e5c908306
36 changed files with 642 additions and 347 deletions
|
|
@ -18,22 +18,24 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start developer services
|
|
||||||
echo "Starting Network Agent using Docker Compose..."
|
|
||||||
docker compose up -d # Run in detached mode
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop Docker Compose services if running
|
||||||
echo "Stopping Network Agent using Docker Compose..."
|
docker compose down 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,6 +44,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Default action is to bring the demo up
|
start_demo "$1"
|
||||||
start_demo
|
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,4 @@
|
||||||
services:
|
services:
|
||||||
rag_energy_source_agent:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
ports:
|
|
||||||
- "18083:80"
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
|
|
||||||
interval: 5s
|
|
||||||
retries: 20
|
|
||||||
|
|
||||||
anythingllm:
|
anythingllm:
|
||||||
image: mintplexlabs/anythingllm
|
image: mintplexlabs/anythingllm
|
||||||
restart: always
|
restart: always
|
||||||
|
|
|
||||||
12
demos/advanced/multi_turn_rag/pyproject.toml
Normal file
12
demos/advanced/multi_turn_rag/pyproject.toml
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
[project]
|
||||||
|
name = "multi-turn-rag"
|
||||||
|
version = "0.1.0"
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
dependencies = [
|
||||||
|
"fastapi",
|
||||||
|
"uvicorn",
|
||||||
|
"pydantic>=2.8",
|
||||||
|
"httpx>=0.27",
|
||||||
|
"openai>=1.51",
|
||||||
|
"python-dotenv>=1.0",
|
||||||
|
]
|
||||||
|
|
@ -18,22 +18,32 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM)
|
||||||
|
# UI services must start before Plano to avoid OTEL port conflicts
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start Network Agent
|
# Step 5: Start agents natively
|
||||||
echo "Starting HR Agent using Docker Compose..."
|
echo "Starting agents..."
|
||||||
docker compose up -d # Run in detached mode
|
bash start_agents.sh &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop agents
|
||||||
echo "Stopping HR Agent using Docker Compose..."
|
echo "Stopping agents..."
|
||||||
docker compose down -v
|
pkill -f start_agents.sh 2>/dev/null || true
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Docker Compose services if running
|
||||||
|
docker compose down 2>/dev/null || true
|
||||||
|
|
||||||
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,6 +52,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Default action is to bring the demo up
|
start_demo "$1"
|
||||||
start_demo
|
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
24
demos/advanced/multi_turn_rag/start_agents.sh
Executable file
24
demos/advanced/multi_turn_rag/start_agents.sh
Executable file
|
|
@ -0,0 +1,24 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
PIDS=()
|
||||||
|
|
||||||
|
log() { echo "$(date '+%F %T') - $*"; }
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
log "Stopping agents..."
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
kill $PID 2>/dev/null && log "Stopped process $PID"
|
||||||
|
done
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
log "Starting rag_energy_source_agent on port 18083..."
|
||||||
|
uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
wait "$PID"
|
||||||
|
done
|
||||||
|
|
@ -18,22 +18,24 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start developer services
|
|
||||||
echo "Starting Network Agent using Docker Compose..."
|
|
||||||
docker compose up -d # Run in detached mode
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop Docker Compose services if running
|
||||||
echo "Stopping Network Agent using Docker Compose..."
|
docker compose down 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,6 +44,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Default action is to bring the demo up
|
start_demo "$1"
|
||||||
start_demo
|
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -41,21 +41,36 @@ cd demos/agent_orchestration/multi_agent_crewai_langchain
|
||||||
./run_demo.sh
|
./run_demo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts Plano natively and brings up via Docker Compose:
|
This starts Plano natively and runs agents as local processes:
|
||||||
- **CrewAI Flight Agent** (port 10520) - flight search
|
- **CrewAI Flight Agent** (port 10520) - flight search
|
||||||
- **LangChain Weather Agent** (port 10510) - weather forecasts
|
- **LangChain Weather Agent** (port 10510) - weather forecasts
|
||||||
- **AnythingLLM** (port 3001) - chat interface
|
|
||||||
- **Jaeger** (port 16686) - distributed tracing
|
|
||||||
|
|
||||||
Plano runs natively on the host (ports 12000, 8001).
|
Plano runs natively on the host (ports 12000, 8001).
|
||||||
|
|
||||||
|
To also start AnythingLLM (chat UI), Jaeger (tracing), and other optional services:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./run_demo.sh --with-ui
|
||||||
|
```
|
||||||
|
|
||||||
|
This additionally starts:
|
||||||
|
- **AnythingLLM** (port 3001) - chat interface
|
||||||
|
- **Jaeger** (port 16686) - distributed tracing
|
||||||
|
|
||||||
### Try It Out
|
### Try It Out
|
||||||
|
|
||||||
1. **Open the Chat Interface**
|
1. **Using curl**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8001/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "What is the weather in San Francisco?"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Using AnythingLLM (requires `--with-ui`)**
|
||||||
- Navigate to [http://localhost:3001](http://localhost:3001)
|
- Navigate to [http://localhost:3001](http://localhost:3001)
|
||||||
- Create an account (stored locally)
|
- Create an account (stored locally)
|
||||||
|
|
||||||
2. **Ask Multi-Agent Questions**
|
3. **Ask Multi-Agent Questions**
|
||||||
```
|
```
|
||||||
"What's the weather in San Francisco and can you find flights from Seattle to San Francisco?"
|
"What's the weather in San Francisco and can you find flights from Seattle to San Francisco?"
|
||||||
```
|
```
|
||||||
|
|
@ -65,7 +80,7 @@ Plano runs natively on the host (ports 12000, 8001).
|
||||||
- Routes the flight part to the CrewAI agent
|
- Routes the flight part to the CrewAI agent
|
||||||
- Combines responses seamlessly
|
- Combines responses seamlessly
|
||||||
|
|
||||||
3. **View Distributed Traces**
|
4. **View Distributed Traces (requires `--with-ui`)**
|
||||||
- Open [http://localhost:16686](http://localhost:16686) (Jaeger UI)
|
- Open [http://localhost:16686](http://localhost:16686) (Jaeger UI)
|
||||||
- See how requests flow through both agents
|
- See how requests flow through both agents
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,9 @@ version: v0.3.0
|
||||||
|
|
||||||
agents:
|
agents:
|
||||||
- id: weather_agent
|
- id: weather_agent
|
||||||
url: http://langchain-weather-agent:10510
|
url: http://localhost:10510
|
||||||
- id: flight_agent
|
- id: flight_agent
|
||||||
url: http://crewai-flight-agent:10520
|
url: http://localhost:10520
|
||||||
|
|
||||||
model_providers:
|
model_providers:
|
||||||
- model: openai/gpt-4o
|
- model: openai/gpt-4o
|
||||||
|
|
|
||||||
|
|
@ -1,27 +1,5 @@
|
||||||
|
|
||||||
services:
|
services:
|
||||||
crewai-flight-agent:
|
|
||||||
build:
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- "10520:10520"
|
|
||||||
environment:
|
|
||||||
- LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
|
|
||||||
- AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
|
|
||||||
- PYTHONUNBUFFERED=1
|
|
||||||
command: ["python", "-u", "crewai/flight_agent.py"]
|
|
||||||
|
|
||||||
langchain-weather-agent:
|
|
||||||
build:
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- "10510:10510"
|
|
||||||
environment:
|
|
||||||
- LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
|
|
||||||
command: ["python", "-u", "langchain/weather_agent.py"]
|
|
||||||
|
|
||||||
anythingllm:
|
anythingllm:
|
||||||
image: mintplexlabs/anythingllm
|
image: mintplexlabs/anythingllm
|
||||||
restart: always
|
restart: always
|
||||||
|
|
@ -36,6 +14,8 @@ services:
|
||||||
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
|
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
|
||||||
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
|
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
|
||||||
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
|
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
|
||||||
jaeger:
|
jaeger:
|
||||||
build:
|
build:
|
||||||
|
|
@ -44,3 +24,4 @@ services:
|
||||||
ports:
|
ports:
|
||||||
- "16686:16686" # Jaeger UI
|
- "16686:16686" # Jaeger UI
|
||||||
- "4317:4317" # OTLP gRPC receiver
|
- "4317:4317" # OTLP gRPC receiver
|
||||||
|
- "4318:4318" # OTLP HTTP receiver
|
||||||
|
|
|
||||||
|
|
@ -12,33 +12,38 @@ start_demo() {
|
||||||
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ -z "$AEROAPI_KEY" ]; then
|
|
||||||
echo "Error: AEROAPI_KEY environment variable is not set for the demo."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Creating .env file..."
|
echo "Creating .env file..."
|
||||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
||||||
echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
|
|
||||||
echo ".env file created with API keys."
|
echo ".env file created with API keys."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start agents and services
|
# Step 5: Start agents natively
|
||||||
echo "Starting agents using Docker Compose..."
|
echo "Starting agents..."
|
||||||
docker compose up -d
|
bash start_agents.sh &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop agents
|
||||||
echo "Stopping Docker Compose services..."
|
echo "Stopping agents..."
|
||||||
docker compose down
|
pkill -f start_agents.sh 2>/dev/null || true
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Docker Compose services if running
|
||||||
|
docker compose down 2>/dev/null || true
|
||||||
|
|
||||||
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -47,5 +52,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
start_demo
|
start_demo "$1"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
30
demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
Executable file
30
demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
Executable file
|
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
PIDS=()
|
||||||
|
|
||||||
|
log() { echo "$(date '+%F %T') - $*"; }
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
log "Stopping agents..."
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
kill $PID 2>/dev/null && log "Stopped process $PID"
|
||||||
|
done
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
|
||||||
|
|
||||||
|
log "Starting langchain weather_agent on port 10510..."
|
||||||
|
uv run python langchain/weather_agent.py &
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
log "Starting crewai flight_agent on port 10520..."
|
||||||
|
uv run python crewai/flight_agent.py &
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
wait "$PID"
|
||||||
|
done
|
||||||
|
|
@ -23,9 +23,10 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
|
- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
|
||||||
- Docker and Docker Compose (for agent services)
|
- [uv](https://docs.astral.sh/uv/) installed (for running agents natively)
|
||||||
- [OpenAI API key](https://platform.openai.com/api-keys)
|
- [OpenAI API key](https://platform.openai.com/api-keys)
|
||||||
- [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)
|
- [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)
|
||||||
|
- Docker and Docker Compose (optional, only needed for `--with-ui`)
|
||||||
|
|
||||||
> **Note:** You'll need to obtain a FlightAware AeroAPI key for live flight data. Visit [https://www.flightaware.com/aeroapi/portal](https://www.flightaware.com/aeroapi/portal) to get your API key.
|
> **Note:** You'll need to obtain a FlightAware AeroAPI key for live flight data. Visit [https://www.flightaware.com/aeroapi/portal](https://www.flightaware.com/aeroapi/portal) to get your API key.
|
||||||
|
|
||||||
|
|
@ -46,16 +47,34 @@ export OPENAI_API_KEY="your OpenAI api key"
|
||||||
./run_demo.sh
|
./run_demo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts Plano natively and brings up via Docker Compose:
|
This starts Plano natively and runs agents as local processes:
|
||||||
- Weather Agent on port 10510
|
- Weather Agent on port 10510
|
||||||
- Flight Agent on port 10520
|
- Flight Agent on port 10520
|
||||||
- Open WebUI on port 8080
|
|
||||||
|
|
||||||
Plano runs natively on the host (port 8001).
|
Plano runs natively on the host (port 8001).
|
||||||
|
|
||||||
|
To also start Open WebUI, Jaeger tracing, and other optional services, pass `--with-ui`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./run_demo.sh --with-ui
|
||||||
|
```
|
||||||
|
|
||||||
|
This additionally starts:
|
||||||
|
- Open WebUI on port 8080
|
||||||
|
- Jaeger tracing UI on port 16686
|
||||||
|
|
||||||
### 4. Test the System
|
### 4. Test the System
|
||||||
|
|
||||||
Use Open WebUI at http://localhost:8080
|
**Option A: Using curl**
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8001/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Option B: Using Open WebUI (requires `--with-ui`)**
|
||||||
|
|
||||||
|
Navigate to http://localhost:8080
|
||||||
|
|
||||||
> **Note:** The Open WebUI may take a few minutes to start up and be fully ready. Please wait for the container to finish initializing before accessing the interface. Once ready, make sure to select the **gpt-5.2** model from the model dropdown menu in the UI.
|
> **Note:** The Open WebUI may take a few minutes to start up and be fully ready. Please wait for the container to finish initializing before accessing the interface. Once ready, make sure to select the **gpt-5.2** model from the model dropdown menu in the UI.
|
||||||
|
|
||||||
|
|
@ -102,7 +121,7 @@ Each agent:
|
||||||
3. Generates response using GPT-5.2
|
3. Generates response using GPT-5.2
|
||||||
4. Streams response back to user
|
4. Streams response back to user
|
||||||
|
|
||||||
Both agents run as Docker containers and communicate with Plano running natively on the host.
|
Both agents run as native local processes and communicate with Plano running natively on the host.
|
||||||
|
|
||||||
## Observability
|
## Observability
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,5 @@
|
||||||
|
|
||||||
services:
|
services:
|
||||||
weather-agent:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: weather-agent
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- "10510:10510"
|
|
||||||
environment:
|
|
||||||
- LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
|
|
||||||
command: ["uv", "run", "python", "src/travel_agents/weather_agent.py"]
|
|
||||||
extra_hosts:
|
|
||||||
- "host.docker.internal:host-gateway"
|
|
||||||
flight-agent:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: flight-agent
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- "10520:10520"
|
|
||||||
environment:
|
|
||||||
- LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
|
|
||||||
- AEROAPI_KEY=${AEROAPI_KEY:? AEROAPI_KEY environment variable is required but not set}
|
|
||||||
command: ["uv", "run", "python", "src/travel_agents/flight_agent.py"]
|
|
||||||
extra_hosts:
|
|
||||||
- "host.docker.internal:host-gateway"
|
|
||||||
open-web-ui:
|
open-web-ui:
|
||||||
image: dyrnq/open-webui:main
|
image: dyrnq/open-webui:main
|
||||||
restart: always
|
restart: always
|
||||||
|
|
@ -40,9 +13,8 @@ services:
|
||||||
- ENABLE_TITLE_GENERATION=false
|
- ENABLE_TITLE_GENERATION=false
|
||||||
- ENABLE_TAGS_GENERATION=false
|
- ENABLE_TAGS_GENERATION=false
|
||||||
- ENABLE_AUTOCOMPLETE_GENERATION=false
|
- ENABLE_AUTOCOMPLETE_GENERATION=false
|
||||||
depends_on:
|
extra_hosts:
|
||||||
- weather-agent
|
- "host.docker.internal:host-gateway"
|
||||||
- flight-agent
|
|
||||||
jaeger:
|
jaeger:
|
||||||
build:
|
build:
|
||||||
context: ../../shared/jaeger
|
context: ../../shared/jaeger
|
||||||
|
|
|
||||||
|
|
@ -23,22 +23,32 @@ start_demo() {
|
||||||
echo ".env file created with API keys."
|
echo ".env file created with API keys."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (Open WebUI, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (Open WebUI, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start agents and services
|
# Step 5: Start agents natively
|
||||||
echo "Starting agents using Docker Compose..."
|
echo "Starting agents..."
|
||||||
docker compose up -d
|
bash start_agents.sh &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop agents
|
||||||
echo "Stopping Docker Compose services..."
|
echo "Stopping agents..."
|
||||||
docker compose down
|
pkill -f start_agents.sh 2>/dev/null || true
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Docker Compose services if running
|
||||||
|
docker compose down 2>/dev/null || true
|
||||||
|
|
||||||
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -47,5 +57,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
start_demo
|
start_demo "$1"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
30
demos/agent_orchestration/travel_agents/start_agents.sh
Executable file
30
demos/agent_orchestration/travel_agents/start_agents.sh
Executable file
|
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
PIDS=()
|
||||||
|
|
||||||
|
log() { echo "$(date '+%F %T') - $*"; }
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
log "Stopping agents..."
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
kill $PID 2>/dev/null && log "Stopped process $PID"
|
||||||
|
done
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
|
||||||
|
|
||||||
|
log "Starting weather_agent on port 10510..."
|
||||||
|
uv run python src/travel_agents/weather_agent.py &
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
log "Starting flight_agent on port 10520..."
|
||||||
|
uv run python src/travel_agents/flight_agent.py &
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
wait "$PID"
|
||||||
|
done
|
||||||
|
|
@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
|
||||||
./run_demo.sh
|
./run_demo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts Plano natively and brings up via Docker Compose:
|
This starts Plano natively and runs agents as local processes:
|
||||||
- Input Guards MCP server on port 10500
|
- Input Guards HTTP server on port 10500
|
||||||
- Query Rewriter MCP server on port 10501
|
- Query Rewriter HTTP server on port 10501
|
||||||
- Context Builder MCP server on port 10502
|
- Context Builder HTTP server on port 10502
|
||||||
- RAG Agent REST server on port 10505
|
- RAG Agent REST server on port 10505
|
||||||
- Jaeger UI for viewing traces at http://localhost:16686
|
|
||||||
- AnythingLLM at http://localhost:3001 for interactive queries
|
|
||||||
|
|
||||||
Plano runs natively on the host (port 8001 and 12000).
|
Plano runs natively on the host (port 8001 and 12000).
|
||||||
|
|
||||||
|
To also start AnythingLLM (chat UI) and Jaeger (tracing):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./run_demo.sh --with-ui
|
||||||
|
```
|
||||||
|
|
||||||
|
This additionally starts:
|
||||||
|
- Jaeger UI for viewing traces at http://localhost:16686
|
||||||
|
- AnythingLLM at http://localhost:3001 for interactive queries
|
||||||
|
|
||||||
### 2. Test the system
|
### 2. Test the system
|
||||||
|
|
||||||
**Option A: Using AnythingLLM (recommended)**
|
**Option A: Using curl (recommended)**
|
||||||
|
|
||||||
Navigate to http://localhost:3001 and send queries through the chat interface.
|
|
||||||
|
|
||||||
**Option B: Using curl**
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST http://localhost:8001/v1/chat/completions \
|
curl -X POST http://localhost:8001/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
|
|
||||||
|
|
@ -2,23 +2,23 @@ version: v0.3.0
|
||||||
|
|
||||||
agents:
|
agents:
|
||||||
- id: rag_agent
|
- id: rag_agent
|
||||||
url: http://rag-agents:10505
|
url: http://localhost:10505
|
||||||
|
|
||||||
filters:
|
filters:
|
||||||
- id: input_guards
|
- id: input_guards
|
||||||
url: http://rag-agents:10500
|
url: http://localhost:10500
|
||||||
type: http
|
type: http
|
||||||
# type: mcp (default)
|
# type: mcp (default)
|
||||||
# transport: streamable-http (default)
|
# transport: streamable-http (default)
|
||||||
# tool: input_guards (default - same as filter id)
|
# tool: input_guards (default - same as filter id)
|
||||||
- id: query_rewriter
|
- id: query_rewriter
|
||||||
url: http://rag-agents:10501
|
url: http://localhost:10501
|
||||||
type: http
|
type: http
|
||||||
# type: mcp (default)
|
# type: mcp (default)
|
||||||
# transport: streamable-http (default)
|
# transport: streamable-http (default)
|
||||||
# tool: query_rewriter (default - same as filter id)
|
# tool: query_rewriter (default - same as filter id)
|
||||||
- id: context_builder
|
- id: context_builder
|
||||||
url: http://rag-agents:10502
|
url: http://localhost:10502
|
||||||
type: http
|
type: http
|
||||||
|
|
||||||
model_providers:
|
model_providers:
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,4 @@
|
||||||
services:
|
services:
|
||||||
rag-agents:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
ports:
|
|
||||||
- "10500:10500"
|
|
||||||
- "10501:10501"
|
|
||||||
- "10502:10502"
|
|
||||||
- "10505:10505"
|
|
||||||
environment:
|
|
||||||
- LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
|
|
||||||
jaeger:
|
jaeger:
|
||||||
build:
|
build:
|
||||||
context: ../../shared/jaeger
|
context: ../../shared/jaeger
|
||||||
|
|
@ -32,3 +20,5 @@ services:
|
||||||
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
|
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
|
||||||
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
|
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
|
||||||
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
|
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
|
|
||||||
|
|
@ -18,22 +18,32 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start services
|
# Step 5: Start agents natively
|
||||||
echo "Starting services using Docker Compose..."
|
echo "Starting agents..."
|
||||||
docker compose up -d
|
bash start_agents.sh &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop agents
|
||||||
echo "Stopping Docker Compose services..."
|
echo "Stopping agents..."
|
||||||
docker compose down
|
pkill -f start_agents.sh 2>/dev/null || true
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Docker Compose services if running
|
||||||
|
docker compose down 2>/dev/null || true
|
||||||
|
|
||||||
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,5 +52,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
start_demo
|
start_demo "$1"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
84
demos/filter_chains/http_filter/start_agents.sh
Normal file → Executable file
84
demos/filter_chains/http_filter/start_agents.sh
Normal file → Executable file
|
|
@ -1,78 +1,38 @@
|
||||||
# #!/bin/bash
|
|
||||||
# set -e
|
|
||||||
|
|
||||||
# WAIT_FOR_PIDS=()
|
|
||||||
|
|
||||||
# log() {
|
|
||||||
# timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
|
|
||||||
# message="$*"
|
|
||||||
# echo "$timestamp - $message"
|
|
||||||
# }
|
|
||||||
|
|
||||||
# cleanup() {
|
|
||||||
# log "Caught signal, terminating all user processes ..."
|
|
||||||
# for PID in "${WAIT_FOR_PIDS[@]}"; do
|
|
||||||
# if kill $PID 2> /dev/null; then
|
|
||||||
# log "killed process: $PID"
|
|
||||||
# fi
|
|
||||||
# done
|
|
||||||
# exit 1
|
|
||||||
# }
|
|
||||||
|
|
||||||
# trap cleanup EXIT
|
|
||||||
|
|
||||||
# log "Starting input_guards agent on port 10500/mcp..."
|
|
||||||
# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent input_guards &
|
|
||||||
# WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
# log "Starting query_rewriter agent on port 10501/mcp..."
|
|
||||||
# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent query_rewriter &
|
|
||||||
# WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
# log "Starting context_builder agent on port 10502/mcp..."
|
|
||||||
# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent context_builder &
|
|
||||||
# WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
# # log "Starting response_generator agent on port 10400..."
|
|
||||||
# # uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
|
|
||||||
# # WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
# log "Starting response_generator agent on port 10505..."
|
|
||||||
# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
|
|
||||||
# WAIT_FOR_PIDS+=($!)
|
|
||||||
|
|
||||||
# for PID in "${WAIT_FOR_PIDS[@]}"; do
|
|
||||||
# wait "$PID"
|
|
||||||
# done
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
export PYTHONPATH=/app/src
|
PIDS=()
|
||||||
|
|
||||||
pids=()
|
|
||||||
|
|
||||||
log() { echo "$(date '+%F %T') - $*"; }
|
log() { echo "$(date '+%F %T') - $*"; }
|
||||||
|
|
||||||
log "Starting input_guards HTTP server on :10500"
|
cleanup() {
|
||||||
|
log "Stopping agents..."
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
kill $PID 2>/dev/null && log "Stopped process $PID"
|
||||||
|
done
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
export PYTHONPATH=./src
|
||||||
|
|
||||||
|
log "Starting input_guards HTTP server on port 10500..."
|
||||||
uv run uvicorn rag_agent.input_guards:app --host 0.0.0.0 --port 10500 &
|
uv run uvicorn rag_agent.input_guards:app --host 0.0.0.0 --port 10500 &
|
||||||
pids+=($!)
|
PIDS+=($!)
|
||||||
|
|
||||||
log "Starting query_rewriter HTTP server on :10501"
|
log "Starting query_rewriter HTTP server on port 10501..."
|
||||||
uv run uvicorn rag_agent.query_rewriter:app --host 0.0.0.0 --port 10501 &
|
uv run uvicorn rag_agent.query_rewriter:app --host 0.0.0.0 --port 10501 &
|
||||||
pids+=($!)
|
PIDS+=($!)
|
||||||
|
|
||||||
log "Starting context_builder HTTP server on :10502"
|
log "Starting context_builder HTTP server on port 10502..."
|
||||||
uv run uvicorn rag_agent.context_builder:app --host 0.0.0.0 --port 10502 &
|
uv run uvicorn rag_agent.context_builder:app --host 0.0.0.0 --port 10502 &
|
||||||
pids+=($!)
|
PIDS+=($!)
|
||||||
|
|
||||||
log "Starting response_generator (OpenAI-compatible) on :10505"
|
log "Starting response_generator (OpenAI-compatible) on port 10505..."
|
||||||
uv run uvicorn rag_agent.rag_agent:app --host 0.0.0.0 --port 10505 &
|
uv run uvicorn rag_agent.rag_agent:app --host 0.0.0.0 --port 10505 &
|
||||||
pids+=($!)
|
PIDS+=($!)
|
||||||
|
|
||||||
for PID in "${pids[@]}"; do
|
for PID in "${PIDS[@]}"; do
|
||||||
wait "$PID"
|
wait "$PID"
|
||||||
done
|
done
|
||||||
|
|
|
||||||
|
|
@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
|
||||||
./run_demo.sh
|
./run_demo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
This starts Plano natively and brings up via Docker Compose:
|
This starts Plano natively and runs agents as local processes:
|
||||||
- Input Guards MCP server on port 10500
|
- Input Guards MCP server on port 10500
|
||||||
- Query Rewriter MCP server on port 10501
|
- Query Rewriter MCP server on port 10501
|
||||||
- Context Builder MCP server on port 10502
|
- Context Builder MCP server on port 10502
|
||||||
- RAG Agent REST server on port 10505
|
- RAG Agent REST server on port 10505
|
||||||
- Jaeger UI for viewing traces at http://localhost:16686
|
|
||||||
- AnythingLLM at http://localhost:3001 for interactive queries
|
|
||||||
|
|
||||||
Plano runs natively on the host (port 8001 and 12000).
|
Plano runs natively on the host (port 8001 and 12000).
|
||||||
|
|
||||||
|
To also start AnythingLLM (chat UI) and Jaeger (tracing):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./run_demo.sh --with-ui
|
||||||
|
```
|
||||||
|
|
||||||
|
This additionally starts:
|
||||||
|
- Jaeger UI for viewing traces at http://localhost:16686
|
||||||
|
- AnythingLLM at http://localhost:3001 for interactive queries
|
||||||
|
|
||||||
### 2. Test the system
|
### 2. Test the system
|
||||||
|
|
||||||
**Option A: Using AnythingLLM (recommended)**
|
**Option A: Using curl (recommended)**
|
||||||
|
|
||||||
Navigate to http://localhost:3001 and send queries through the chat interface.
|
|
||||||
|
|
||||||
**Option B: Using curl**
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST http://localhost:8001/v1/chat/completions \
|
curl -X POST http://localhost:8001/v1/chat/completions \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,4 @@
|
||||||
services:
|
services:
|
||||||
rag-agents:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
ports:
|
|
||||||
- "10500:10500"
|
|
||||||
- "10501:10501"
|
|
||||||
- "10502:10502"
|
|
||||||
- "10505:10505"
|
|
||||||
environment:
|
|
||||||
- LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
|
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
|
|
||||||
jaeger:
|
jaeger:
|
||||||
build:
|
build:
|
||||||
context: ../../shared/jaeger
|
context: ../../shared/jaeger
|
||||||
|
|
@ -32,3 +20,5 @@ services:
|
||||||
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
|
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
|
||||||
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
|
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
|
||||||
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
|
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
|
|
||||||
|
|
@ -18,22 +18,32 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start services
|
# Step 5: Start agents natively
|
||||||
echo "Starting services using Docker Compose..."
|
echo "Starting agents..."
|
||||||
docker compose up -d
|
bash start_agents.sh &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop agents
|
||||||
echo "Stopping Docker Compose services..."
|
echo "Stopping agents..."
|
||||||
docker compose down
|
pkill -f start_agents.sh 2>/dev/null || true
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Docker Compose services if running
|
||||||
|
docker compose down 2>/dev/null || true
|
||||||
|
|
||||||
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,5 +52,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
start_demo
|
start_demo "$1"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,14 @@ This demo shows how you can use Plano gateway to manage keys and route to upstre
|
||||||
```sh
|
```sh
|
||||||
sh run_demo.sh
|
sh run_demo.sh
|
||||||
```
|
```
|
||||||
1. Navigate to http://localhost:3001/
|
1. Test with curl (see example below)
|
||||||
|
|
||||||
|
To also start the AnythingLLM chat UI and Jaeger tracing, pass `--with-ui`:
|
||||||
|
```sh
|
||||||
|
sh run_demo.sh --with-ui
|
||||||
|
```
|
||||||
|
|
||||||
|
Then navigate to http://localhost:3001/ for AnythingLLM.
|
||||||
|
|
||||||
Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
|
Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
|
||||||
|
|
||||||
|
|
@ -47,7 +54,7 @@ $ curl --header 'Content-Type: application/json' \
|
||||||
```
|
```
|
||||||
|
|
||||||
# Observability
|
# Observability
|
||||||
For tracing you can head over to http://localhost:16686/ to view recent traces.
|
For tracing, start with `--with-ui` and head over to http://localhost:16686/ to view recent traces.
|
||||||
|
|
||||||
Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM,
|
Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM,
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,22 +18,24 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start LLM Routing
|
|
||||||
echo "Starting LLM Routing using Docker Compose..."
|
|
||||||
docker compose up -d # Run in detached mode
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop Docker Compose services if running
|
||||||
echo "Stopping LLM Routing using Docker Compose..."
|
docker compose down 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,6 +44,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Default action is to bring the demo up
|
start_demo "$1"
|
||||||
start_demo
|
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -10,15 +10,26 @@ This demo shows how you can use Plano's core function calling capabilities.
|
||||||
3. ```sh
|
3. ```sh
|
||||||
sh run_demo.sh
|
sh run_demo.sh
|
||||||
```
|
```
|
||||||
4. Navigate to http://localhost:3001/
|
4. Test with curl:
|
||||||
5. You can type in queries like "how is the weather?"
|
```sh
|
||||||
|
curl http://localhost:10000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "how is the weather in San Francisco?"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
Here is a sample interaction,
|
Here is a sample interaction,
|
||||||
<img width="575" alt="image" src="https://github.com/user-attachments/assets/e0929490-3eb2-4130-ae87-a732aea4d059">
|
<img width="575" alt="image" src="https://github.com/user-attachments/assets/e0929490-3eb2-4130-ae87-a732aea4d059">
|
||||||
|
|
||||||
## Tracing
|
## Using the Chat UI and Tracing (optional)
|
||||||
|
|
||||||
To see a tracing dashboard, navigate to http://localhost:16686/ to open Jaeger UI.
|
To start AnythingLLM (chat UI) and other optional services, pass `--with-ui`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sh run_demo.sh --with-ui
|
||||||
|
```
|
||||||
|
|
||||||
|
- Navigate to http://localhost:3001/ for AnythingLLM
|
||||||
|
- Navigate to http://localhost:16686/ for Jaeger tracing UI
|
||||||
|
|
||||||
### Stopping Demo
|
### Stopping Demo
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,4 @@
|
||||||
services:
|
services:
|
||||||
weather_forecast_service:
|
|
||||||
build:
|
|
||||||
context: ./
|
|
||||||
environment:
|
|
||||||
- OLTP_HOST=http://jaeger:4317
|
|
||||||
extra_hosts:
|
|
||||||
- "host.docker.internal:host-gateway"
|
|
||||||
ports:
|
|
||||||
- "18083:80"
|
|
||||||
|
|
||||||
anythingllm:
|
anythingllm:
|
||||||
image: mintplexlabs/anythingllm
|
image: mintplexlabs/anythingllm
|
||||||
restart: always
|
restart: always
|
||||||
|
|
|
||||||
|
|
@ -72,23 +72,32 @@ start_demo() {
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 4: Start Plano
|
# Step 4: Optionally start UI services (AnythingLLM, Jaeger, etc.)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ] || [ "$2" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services with $COMPOSE_FILE..."
|
||||||
|
docker compose -f "$COMPOSE_FILE" up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 5: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 5: Start Network Agent with the chosen Docker Compose file
|
# Step 6: Start agents natively
|
||||||
echo "Starting Network Agent with $COMPOSE_FILE..."
|
echo "Starting agents..."
|
||||||
docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode
|
bash start_agents.sh &
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
echo "Stopping all Docker Compose services..."
|
# Stop agents
|
||||||
|
echo "Stopping agents..."
|
||||||
|
pkill -f start_agents.sh 2>/dev/null || true
|
||||||
|
|
||||||
# Stop all services by iterating through all configurations
|
# Stop all Docker Compose services if running
|
||||||
|
echo "Stopping Docker Compose services..."
|
||||||
for compose_file in ./docker-compose*.yaml; do
|
for compose_file in ./docker-compose*.yaml; do
|
||||||
echo "Stopping services in $compose_file..."
|
docker compose -f "$compose_file" down 2>/dev/null || true
|
||||||
docker compose -f "$compose_file" down
|
|
||||||
done
|
done
|
||||||
|
|
||||||
# Stop Plano
|
# Stop Plano
|
||||||
|
|
@ -101,6 +110,6 @@ if [ "$1" == "down" ]; then
|
||||||
# Call stop_demo with the second argument as the demo to stop
|
# Call stop_demo with the second argument as the demo to stop
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Use the argument (jaeger, logfire, signoz) to determine the compose file
|
# Use the argument (jaeger, logfire, signoz, --with-ui) to determine the compose file
|
||||||
start_demo "$1"
|
start_demo "$1" "$2"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
24
demos/getting_started/weather_forecast/start_agents.sh
Executable file
24
demos/getting_started/weather_forecast/start_agents.sh
Executable file
|
|
@ -0,0 +1,24 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
PIDS=()
|
||||||
|
|
||||||
|
log() { echo "$(date '+%F %T') - $*"; }
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
log "Stopping agents..."
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
kill $PID 2>/dev/null && log "Stopped process $PID"
|
||||||
|
done
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
trap cleanup EXIT INT TERM
|
||||||
|
|
||||||
|
log "Starting weather_forecast_service on port 18083..."
|
||||||
|
uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
|
||||||
|
PIDS+=($!)
|
||||||
|
|
||||||
|
for PID in "${PIDS[@]}"; do
|
||||||
|
wait "$PID"
|
||||||
|
done
|
||||||
|
|
@ -7,33 +7,58 @@ start_demo() {
|
||||||
if [ -f ".env" ]; then
|
if [ -f ".env" ]; then
|
||||||
echo ".env file already exists. Skipping creation."
|
echo ".env file already exists. Skipping creation."
|
||||||
else
|
else
|
||||||
# Step 2: Create `.env` file and set OpenAI key
|
# Step 2: Create `.env` file and set API keys
|
||||||
if [ -z "$OPENAI_API_KEY" ]; then
|
if [ -z "$OPENAI_API_KEY" ]; then
|
||||||
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
if [ -z "$ANTHROPIC_API_KEY" ]; then
|
||||||
|
echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Creating .env file..."
|
echo "Creating .env file..."
|
||||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
if [ -n "$ANTHROPIC_API_KEY" ]; then
|
||||||
|
echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
|
||||||
|
fi
|
||||||
|
echo ".env file created with API keys."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
echo "Starting Plano with config.yaml..."
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
planoai up config.yaml
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
# Step 4: Start developer services
|
# Step 4: Start Plano
|
||||||
echo "Starting Network Agent using Docker Compose..."
|
echo "Starting Plano with arch_config_with_aliases.yaml..."
|
||||||
docker compose up -d # Run in detached mode
|
planoai up arch_config_with_aliases.yaml
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Plano started successfully."
|
||||||
|
echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file."
|
||||||
|
echo ""
|
||||||
|
echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \
|
||||||
|
-H \"Authorization: Bearer test-key\" \
|
||||||
|
-H \"Content-Type: application/json\" \
|
||||||
|
-d '{
|
||||||
|
\"model\": \"arch.summarize.v1\",
|
||||||
|
\"max_tokens\": 50,
|
||||||
|
\"messages\": [
|
||||||
|
{ \"role\": \"user\",
|
||||||
|
\"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}' | jq ."
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop Docker Compose services if running
|
||||||
echo "Stopping Network Agent using Docker Compose..."
|
docker compose down 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,6 +67,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Default action is to bring the demo up
|
start_demo "$1"
|
||||||
start_demo
|
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -18,22 +18,24 @@ start_demo() {
|
||||||
echo ".env file created with OPENAI_API_KEY."
|
echo ".env file created with OPENAI_API_KEY."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start developer services
|
|
||||||
echo "Starting Network Agent using Docker Compose..."
|
|
||||||
docker compose up -d # Run in detached mode
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop Docker Compose services if running
|
||||||
echo "Stopping Network Agent using Docker Compose..."
|
docker compose down 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -42,6 +44,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
# Default action is to bring the demo up
|
start_demo "$1"
|
||||||
start_demo
|
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -10,19 +10,58 @@ cd demos/llm_routing/preference_based_routing
|
||||||
./run_demo.sh
|
./run_demo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
Or manually:
|
To also start AnythingLLM (chat UI) and Jaeger (tracing):
|
||||||
|
|
||||||
1. Start Plano
|
|
||||||
```bash
|
```bash
|
||||||
planoai up config.yaml
|
./run_demo.sh --with-ui
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Start AnythingLLM
|
Then open AnythingLLM at http://localhost:3001/
|
||||||
|
|
||||||
|
Or start manually:
|
||||||
|
|
||||||
|
1. (Optional) Start AnythingLLM and Jaeger
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d
|
docker compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
3. open AnythingLLM http://localhost:3001/
|
2. Start Plano
|
||||||
|
```bash
|
||||||
|
planoai up config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test with curl or open AnythingLLM http://localhost:3001/
|
||||||
|
|
||||||
|
## Running with local Arch-Router (via Ollama)
|
||||||
|
|
||||||
|
By default, Plano uses a hosted Arch-Router endpoint. To self-host Arch-Router locally using Ollama:
|
||||||
|
|
||||||
|
1. Install [Ollama](https://ollama.ai) and pull the model:
|
||||||
|
```bash
|
||||||
|
ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Make sure Ollama is running (`ollama serve` or the macOS app).
|
||||||
|
|
||||||
|
3. Start Plano with the local config:
|
||||||
|
```bash
|
||||||
|
planoai up plano_config_local.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Test routing:
|
||||||
|
```bash
|
||||||
|
curl -s "http://localhost:12000/routing/v1/messages" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Create a REST API endpoint in Rust using actix-web"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see the router select the appropriate model based on the routing preferences defined in `plano_config_local.yaml`.
|
||||||
|
|
||||||
# Testing out preference based routing
|
# Testing out preference based routing
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,22 +24,24 @@ start_demo() {
|
||||||
echo ".env file created with API keys."
|
echo ".env file created with API keys."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Start Plano
|
# Step 3: Optionally start UI services (AnythingLLM, Jaeger)
|
||||||
|
# Jaeger must start before Plano so it can bind the OTEL port (4317)
|
||||||
|
if [ "$1" == "--with-ui" ]; then
|
||||||
|
echo "Starting UI services (AnythingLLM, Jaeger)..."
|
||||||
|
docker compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 4: Start Plano
|
||||||
echo "Starting Plano with config.yaml..."
|
echo "Starting Plano with config.yaml..."
|
||||||
planoai up config.yaml
|
planoai up config.yaml
|
||||||
|
|
||||||
# Step 4: Start services
|
|
||||||
echo "Starting services using Docker Compose..."
|
|
||||||
docker compose up -d
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to stop the demo
|
# Function to stop the demo
|
||||||
stop_demo() {
|
stop_demo() {
|
||||||
# Step 1: Stop Docker Compose services
|
# Stop Docker Compose services if running
|
||||||
echo "Stopping Docker Compose services..."
|
docker compose down 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Step 2: Stop Plano
|
# Stop Plano
|
||||||
echo "Stopping Plano..."
|
echo "Stopping Plano..."
|
||||||
planoai down
|
planoai down
|
||||||
}
|
}
|
||||||
|
|
@ -48,5 +50,5 @@ stop_demo() {
|
||||||
if [ "$1" == "down" ]; then
|
if [ "$1" == "down" ]; then
|
||||||
stop_demo
|
stop_demo
|
||||||
else
|
else
|
||||||
start_demo
|
start_demo "$1"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -228,6 +228,129 @@ In summary, Arch-Router demonstrates:
|
||||||
- **Production-Ready Performance**: Optimized for low-latency, high-throughput applications in multi-model environments.
|
- **Production-Ready Performance**: Optimized for low-latency, high-throughput applications in multi-model environments.
|
||||||
|
|
||||||
|
|
||||||
|
Self-hosting Arch-Router
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
By default, Plano uses a hosted Arch-Router endpoint. To run Arch-Router locally, you can serve the model yourself using either **Ollama** or **vLLM**.
|
||||||
|
|
||||||
|
Using Ollama (recommended for local development)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
1. **Install Ollama**
|
||||||
|
|
||||||
|
Download and install from `ollama.ai <https://ollama.ai>`_.
|
||||||
|
|
||||||
|
2. **Pull and serve Arch-Router**
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||||
|
ollama serve
|
||||||
|
|
||||||
|
This downloads the quantized GGUF model from HuggingFace and starts serving on ``http://localhost:11434``.
|
||||||
|
|
||||||
|
3. **Configure Plano to use local Arch-Router**
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
|
||||||
|
routing:
|
||||||
|
model: Arch-Router
|
||||||
|
llm_provider: arch-router
|
||||||
|
|
||||||
|
model_providers:
|
||||||
|
- name: arch-router
|
||||||
|
model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||||
|
base_url: http://localhost:11434
|
||||||
|
|
||||||
|
- model: openai/gpt-5.2
|
||||||
|
access_key: $OPENAI_API_KEY
|
||||||
|
default: true
|
||||||
|
|
||||||
|
- model: anthropic/claude-sonnet-4-5
|
||||||
|
access_key: $ANTHROPIC_API_KEY
|
||||||
|
routing_preferences:
|
||||||
|
- name: creative writing
|
||||||
|
description: creative content generation, storytelling, and writing assistance
|
||||||
|
|
||||||
|
4. **Verify the model is running**
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
curl http://localhost:11434/v1/models
|
||||||
|
|
||||||
|
You should see ``Arch-Router-1.5B`` listed in the response.
|
||||||
|
|
||||||
|
Using vLLM (recommended for production / EC2)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
vLLM provides higher throughput and GPU optimizations suitable for production deployments.
|
||||||
|
|
||||||
|
1. **Install vLLM**
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install vllm
|
||||||
|
|
||||||
|
2. **Download the model weights**
|
||||||
|
|
||||||
|
The GGUF weights are downloaded automatically from HuggingFace on first use. To pre-download:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install huggingface_hub
|
||||||
|
huggingface-cli download katanemo/Arch-Router-1.5B.gguf
|
||||||
|
|
||||||
|
3. **Start the vLLM server**
|
||||||
|
|
||||||
|
After downloading, find the GGUF file and Jinja template in the HuggingFace cache:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# Find the downloaded files
|
||||||
|
SNAPSHOT_DIR=$(ls -d ~/.cache/huggingface/hub/models--katanemo--Arch-Router-1.5B.gguf/snapshots/*/ | head -1)
|
||||||
|
|
||||||
|
vllm serve ${SNAPSHOT_DIR}Arch-Router-1.5B-Q4_K_M.gguf \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--port 10000 \
|
||||||
|
--load-format gguf \
|
||||||
|
--chat-template ${SNAPSHOT_DIR}template.jinja \
|
||||||
|
--tokenizer katanemo/Arch-Router-1.5B \
|
||||||
|
--served-model-name Arch-Router \
|
||||||
|
--gpu-memory-utilization 0.3 \
|
||||||
|
--tensor-parallel-size 1 \
|
||||||
|
--enable-prefix-caching
|
||||||
|
|
||||||
|
4. **Configure Plano to use the vLLM endpoint**
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
|
||||||
|
routing:
|
||||||
|
model: Arch-Router
|
||||||
|
llm_provider: arch-router
|
||||||
|
|
||||||
|
model_providers:
|
||||||
|
- name: arch-router
|
||||||
|
model: Arch-Router
|
||||||
|
base_url: http://<your-server-ip>:10000
|
||||||
|
|
||||||
|
- model: openai/gpt-5.2
|
||||||
|
access_key: $OPENAI_API_KEY
|
||||||
|
default: true
|
||||||
|
|
||||||
|
- model: anthropic/claude-sonnet-4-5
|
||||||
|
access_key: $ANTHROPIC_API_KEY
|
||||||
|
routing_preferences:
|
||||||
|
- name: creative writing
|
||||||
|
description: creative content generation, storytelling, and writing assistance
|
||||||
|
|
||||||
|
5. **Verify the server is running**
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
curl http://localhost:10000/health
|
||||||
|
curl http://localhost:10000/v1/models
|
||||||
|
|
||||||
|
|
||||||
Combining Routing Methods
|
Combining Routing Methods
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,10 +21,11 @@ trap 'print_debug' INT TERM ERR
|
||||||
|
|
||||||
log starting > ../build.log
|
log starting > ../build.log
|
||||||
|
|
||||||
log building and running function_calling demo
|
log starting weather_forecast agent natively
|
||||||
log ===========================================
|
log ===========================================
|
||||||
cd ../../demos/getting_started/weather_forecast/
|
cd ../../demos/getting_started/weather_forecast/
|
||||||
docker compose up weather_forecast_service --build -d
|
bash start_agents.sh &
|
||||||
|
AGENTS_PID=$!
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
log building and installing plano cli
|
log building and installing plano cli
|
||||||
|
|
@ -78,8 +79,6 @@ log running e2e tests for openai responses api client
|
||||||
log ========================================
|
log ========================================
|
||||||
uv run pytest test_openai_responses_api_client_with_state.py
|
uv run pytest test_openai_responses_api_client_with_state.py
|
||||||
|
|
||||||
log shutting down the weather_forecast demo
|
log shutting down the weather_forecast agent
|
||||||
log =======================================
|
log =======================================
|
||||||
cd ../../demos/getting_started/weather_forecast
|
kill $AGENTS_PID 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
cd -
|
|
||||||
|
|
|
||||||
|
|
@ -32,10 +32,11 @@ cd -
|
||||||
# Re-sync e2e deps
|
# Re-sync e2e deps
|
||||||
uv sync
|
uv sync
|
||||||
|
|
||||||
# Start weather_forecast service (needed for prompt_gateway tests)
|
# Start weather_forecast service natively (needed for prompt_gateway tests)
|
||||||
log "building and running weather_forecast service"
|
log "starting weather_forecast agent natively"
|
||||||
cd ../../demos/getting_started/weather_forecast/
|
cd ../../demos/getting_started/weather_forecast/
|
||||||
docker compose up weather_forecast_service --build -d
|
bash start_agents.sh &
|
||||||
|
AGENTS_PID=$!
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
# Start gateway with prompt_gateway config
|
# Start gateway with prompt_gateway config
|
||||||
|
|
@ -52,6 +53,4 @@ uv run pytest test_prompt_gateway.py
|
||||||
# Cleanup
|
# Cleanup
|
||||||
log "shutting down"
|
log "shutting down"
|
||||||
planoai down --docker || true
|
planoai down --docker || true
|
||||||
cd ../../demos/getting_started/weather_forecast
|
kill $AGENTS_PID 2>/dev/null || true
|
||||||
docker compose down
|
|
||||||
cd -
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue