diff --git a/demos/advanced/currency_exchange/run_demo.sh b/demos/advanced/currency_exchange/run_demo.sh
index 6623dee5..e430a1cd 100644
--- a/demos/advanced/currency_exchange/run_demo.sh
+++ b/demos/advanced/currency_exchange/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
-
- # Step 4: Start developer services
- echo "Starting Network Agent using Docker Compose..."
- docker compose up -d # Run in detached mode
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Network Agent using Docker Compose..."
- docker compose down
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,6 +44,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- # Default action is to bring the demo up
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/advanced/multi_turn_rag/docker-compose.yaml b/demos/advanced/multi_turn_rag/docker-compose.yaml
index 1c3ed73c..f36987e4 100644
--- a/demos/advanced/multi_turn_rag/docker-compose.yaml
+++ b/demos/advanced/multi_turn_rag/docker-compose.yaml
@@ -1,15 +1,4 @@
services:
- rag_energy_source_agent:
- build:
- context: .
- dockerfile: Dockerfile
- ports:
- - "18083:80"
- healthcheck:
- test: ["CMD", "curl" ,"http://localhost:80/healthz"]
- interval: 5s
- retries: 20
-
anythingllm:
image: mintplexlabs/anythingllm
restart: always
diff --git a/demos/advanced/multi_turn_rag/pyproject.toml b/demos/advanced/multi_turn_rag/pyproject.toml
new file mode 100644
index 00000000..05824bd6
--- /dev/null
+++ b/demos/advanced/multi_turn_rag/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "multi-turn-rag"
+version = "0.1.0"
+requires-python = ">=3.12"
+dependencies = [
+ "fastapi",
+ "uvicorn",
+ "pydantic>=2.8",
+ "httpx>=0.27",
+ "openai>=1.51",
+ "python-dotenv>=1.0",
+]
diff --git a/demos/advanced/multi_turn_rag/run_demo.sh b/demos/advanced/multi_turn_rag/run_demo.sh
index f9434aa2..5bec6368 100644
--- a/demos/advanced/multi_turn_rag/run_demo.sh
+++ b/demos/advanced/multi_turn_rag/run_demo.sh
@@ -18,22 +18,32 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM)
+ # UI services must start before Plano to avoid OTEL port conflicts
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
- # Step 4: Start Network Agent
- echo "Starting HR Agent using Docker Compose..."
- docker compose up -d # Run in detached mode
+ # Step 5: Start agents natively
+ echo "Starting agents..."
+ bash start_agents.sh &
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping HR Agent using Docker Compose..."
- docker compose down -v
+ # Stop agents
+ echo "Stopping agents..."
+ pkill -f start_agents.sh 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
+
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,6 +52,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- # Default action is to bring the demo up
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/advanced/multi_turn_rag/start_agents.sh b/demos/advanced/multi_turn_rag/start_agents.sh
new file mode 100755
index 00000000..00b7f1b1
--- /dev/null
+++ b/demos/advanced/multi_turn_rag/start_agents.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+ log "Stopping agents..."
+ for PID in "${PIDS[@]}"; do
+ kill $PID 2>/dev/null && log "Stopped process $PID"
+ done
+ exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+log "Starting rag_energy_source_agent on port 18083..."
+uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+ wait "$PID"
+done
diff --git a/demos/advanced/stock_quote/run_demo.sh b/demos/advanced/stock_quote/run_demo.sh
index 6623dee5..e430a1cd 100644
--- a/demos/advanced/stock_quote/run_demo.sh
+++ b/demos/advanced/stock_quote/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
-
- # Step 4: Start developer services
- echo "Starting Network Agent using Docker Compose..."
- docker compose up -d # Run in detached mode
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Network Agent using Docker Compose..."
- docker compose down
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,6 +44,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- # Default action is to bring the demo up
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
index e2fe23fb..97d71e7f 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@@ -41,21 +41,36 @@ cd demos/agent_orchestration/multi_agent_crewai_langchain
./run_demo.sh
```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
- **CrewAI Flight Agent** (port 10520) - flight search
- **LangChain Weather Agent** (port 10510) - weather forecasts
-- **AnythingLLM** (port 3001) - chat interface
-- **Jaeger** (port 16686) - distributed tracing
Plano runs natively on the host (ports 12000, 8001).
+To also start AnythingLLM (chat UI), Jaeger (tracing), and other optional services:
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- **AnythingLLM** (port 3001) - chat interface
+- **Jaeger** (port 16686) - distributed tracing
+
### Try It Out
-1. **Open the Chat Interface**
+1. **Using curl**
+ ```bash
+ curl -X POST http://localhost:8001/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "What is the weather in San Francisco?"}]}'
+ ```
+
+2. **Using AnythingLLM (requires `--with-ui`)**
- Navigate to [http://localhost:3001](http://localhost:3001)
- Create an account (stored locally)
-2. **Ask Multi-Agent Questions**
+3. **Ask Multi-Agent Questions**
```
"What's the weather in San Francisco and can you find flights from Seattle to San Francisco?"
```
@@ -65,7 +80,7 @@ Plano runs natively on the host (ports 12000, 8001).
- Routes the flight part to the CrewAI agent
- Combines responses seamlessly
-3. **View Distributed Traces**
+4. **View Distributed Traces (requires `--with-ui`)**
- Open [http://localhost:16686](http://localhost:16686) (Jaeger UI)
- See how requests flow through both agents
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
index b3a204f3..ef522337 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
@@ -2,9 +2,9 @@ version: v0.3.0
agents:
- id: weather_agent
- url: http://langchain-weather-agent:10510
+ url: http://localhost:10510
- id: flight_agent
- url: http://crewai-flight-agent:10520
+ url: http://localhost:10520
model_providers:
- model: openai/gpt-4o
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
index 2d9c180b..74954562 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
@@ -1,27 +1,5 @@
services:
- crewai-flight-agent:
- build:
- dockerfile: Dockerfile
- restart: always
- ports:
- - "10520:10520"
- environment:
- - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
- - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
- - PYTHONUNBUFFERED=1
- command: ["python", "-u", "crewai/flight_agent.py"]
-
- langchain-weather-agent:
- build:
- dockerfile: Dockerfile
- restart: always
- ports:
- - "10510:10510"
- environment:
- - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
- command: ["python", "-u", "langchain/weather_agent.py"]
-
anythingllm:
image: mintplexlabs/anythingllm
restart: always
@@ -36,6 +14,8 @@ services:
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
+ extra_hosts:
+ - "host.docker.internal:host-gateway"
jaeger:
build:
@@ -44,3 +24,4 @@ services:
ports:
- "16686:16686" # Jaeger UI
- "4317:4317" # OTLP gRPC receiver
+ - "4318:4318" # OTLP HTTP receiver
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
index b7dc0fad..35bbbbdd 100755
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
@@ -12,33 +12,38 @@ start_demo() {
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
exit 1
fi
- if [ -z "$AEROAPI_KEY" ]; then
- echo "Error: AEROAPI_KEY environment variable is not set for the demo."
- exit 1
- fi
echo "Creating .env file..."
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
- echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
echo ".env file created with API keys."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
- # Step 4: Start agents and services
- echo "Starting agents using Docker Compose..."
- docker compose up -d
+ # Step 5: Start agents natively
+ echo "Starting agents..."
+ bash start_agents.sh &
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Docker Compose services..."
- docker compose down
+ # Stop agents
+ echo "Stopping agents..."
+ pkill -f start_agents.sh 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
+
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -47,5 +52,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
new file mode 100755
index 00000000..78d2fecb
--- /dev/null
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+ log "Stopping agents..."
+ for PID in "${PIDS[@]}"; do
+ kill $PID 2>/dev/null && log "Stopped process $PID"
+ done
+ exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
+
+log "Starting langchain weather_agent on port 10510..."
+uv run python langchain/weather_agent.py &
+PIDS+=($!)
+
+log "Starting crewai flight_agent on port 10520..."
+uv run python crewai/flight_agent.py &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+ wait "$PID"
+done
diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md
index d6468612..7886539d 100644
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@@ -23,9 +23,10 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque
## Prerequisites
- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
-- Docker and Docker Compose (for agent services)
+- [uv](https://docs.astral.sh/uv/) installed (for running agents natively)
- [OpenAI API key](https://platform.openai.com/api-keys)
- [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)
+- Docker and Docker Compose (optional, only needed for `--with-ui`)
> **Note:** You'll need to obtain a FlightAware AeroAPI key for live flight data. Visit [https://www.flightaware.com/aeroapi/portal](https://www.flightaware.com/aeroapi/portal) to get your API key.
@@ -46,16 +47,34 @@ export OPENAI_API_KEY="your OpenAI api key"
./run_demo.sh
```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
- Weather Agent on port 10510
- Flight Agent on port 10520
-- Open WebUI on port 8080
Plano runs natively on the host (port 8001).
+To also start Open WebUI, Jaeger tracing, and other optional services, pass `--with-ui`:
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- Open WebUI on port 8080
+- Jaeger tracing UI on port 16686
+
### 4. Test the System
-Use Open WebUI at http://localhost:8080
+**Option A: Using curl**
+```bash
+curl -X POST http://localhost:8001/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}'
+```
+
+**Option B: Using Open WebUI (requires `--with-ui`)**
+
+Navigate to http://localhost:8080
> **Note:** The Open WebUI may take a few minutes to start up and be fully ready. Please wait for the container to finish initializing before accessing the interface. Once ready, make sure to select the **gpt-5.2** model from the model dropdown menu in the UI.
@@ -102,7 +121,7 @@ Each agent:
3. Generates response using GPT-5.2
4. Streams response back to user
-Both agents run as Docker containers and communicate with Plano running natively on the host.
+Both agents run as native local processes and communicate with Plano running natively on the host.
## Observability
diff --git a/demos/agent_orchestration/travel_agents/docker-compose.yaml b/demos/agent_orchestration/travel_agents/docker-compose.yaml
index f0fb78e5..66edbdc3 100644
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
@@ -1,32 +1,5 @@
services:
- weather-agent:
- build:
- context: .
- dockerfile: Dockerfile
- container_name: weather-agent
- restart: always
- ports:
- - "10510:10510"
- environment:
- - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
- command: ["uv", "run", "python", "src/travel_agents/weather_agent.py"]
- extra_hosts:
- - "host.docker.internal:host-gateway"
- flight-agent:
- build:
- context: .
- dockerfile: Dockerfile
- container_name: flight-agent
- restart: always
- ports:
- - "10520:10520"
- environment:
- - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
- - AEROAPI_KEY=${AEROAPI_KEY:? AEROAPI_KEY environment variable is required but not set}
- command: ["uv", "run", "python", "src/travel_agents/flight_agent.py"]
- extra_hosts:
- - "host.docker.internal:host-gateway"
open-web-ui:
image: dyrnq/open-webui:main
restart: always
@@ -40,9 +13,8 @@ services:
- ENABLE_TITLE_GENERATION=false
- ENABLE_TAGS_GENERATION=false
- ENABLE_AUTOCOMPLETE_GENERATION=false
- depends_on:
- - weather-agent
- - flight-agent
+ extra_hosts:
+ - "host.docker.internal:host-gateway"
jaeger:
build:
context: ../../shared/jaeger
diff --git a/demos/agent_orchestration/travel_agents/run_demo.sh b/demos/agent_orchestration/travel_agents/run_demo.sh
index b7dc0fad..643a0aa2 100755
--- a/demos/agent_orchestration/travel_agents/run_demo.sh
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@@ -23,22 +23,32 @@ start_demo() {
echo ".env file created with API keys."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (Open WebUI, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (Open WebUI, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
- # Step 4: Start agents and services
- echo "Starting agents using Docker Compose..."
- docker compose up -d
+ # Step 5: Start agents natively
+ echo "Starting agents..."
+ bash start_agents.sh &
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Docker Compose services..."
- docker compose down
+ # Stop agents
+ echo "Stopping agents..."
+ pkill -f start_agents.sh 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
+
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -47,5 +57,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/agent_orchestration/travel_agents/start_agents.sh b/demos/agent_orchestration/travel_agents/start_agents.sh
new file mode 100755
index 00000000..4f2e32a7
--- /dev/null
+++ b/demos/agent_orchestration/travel_agents/start_agents.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+ log "Stopping agents..."
+ for PID in "${PIDS[@]}"; do
+ kill $PID 2>/dev/null && log "Stopped process $PID"
+ done
+ exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
+
+log "Starting weather_agent on port 10510..."
+uv run python src/travel_agents/weather_agent.py &
+PIDS+=($!)
+
+log "Starting flight_agent on port 10520..."
+uv run python src/travel_agents/flight_agent.py &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+ wait "$PID"
+done
diff --git a/demos/filter_chains/http_filter/README.md b/demos/filter_chains/http_filter/README.md
index 5e675113..86748217 100644
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
@@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
./run_demo.sh
```
-This starts Plano natively and brings up via Docker Compose:
-- Input Guards MCP server on port 10500
-- Query Rewriter MCP server on port 10501
-- Context Builder MCP server on port 10502
+This starts Plano natively and runs agents as local processes:
+- Input Guards HTTP server on port 10500
+- Query Rewriter HTTP server on port 10501
+- Context Builder HTTP server on port 10502
- RAG Agent REST server on port 10505
-- Jaeger UI for viewing traces at http://localhost:16686
-- AnythingLLM at http://localhost:3001 for interactive queries
Plano runs natively on the host (port 8001 and 12000).
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- Jaeger UI for viewing traces at http://localhost:16686
+- AnythingLLM at http://localhost:3001 for interactive queries
+
### 2. Test the system
-**Option A: Using AnythingLLM (recommended)**
-
-Navigate to http://localhost:3001 and send queries through the chat interface.
-
-**Option B: Using curl**
+**Option A: Using curl (recommended)**
```bash
curl -X POST http://localhost:8001/v1/chat/completions \
-H "Content-Type: application/json" \
diff --git a/demos/filter_chains/http_filter/config.yaml b/demos/filter_chains/http_filter/config.yaml
index 117931e2..014a141a 100644
--- a/demos/filter_chains/http_filter/config.yaml
+++ b/demos/filter_chains/http_filter/config.yaml
@@ -2,23 +2,23 @@ version: v0.3.0
agents:
- id: rag_agent
- url: http://rag-agents:10505
+ url: http://localhost:10505
filters:
- id: input_guards
- url: http://rag-agents:10500
+ url: http://localhost:10500
type: http
# type: mcp (default)
# transport: streamable-http (default)
# tool: input_guards (default - same as filter id)
- id: query_rewriter
- url: http://rag-agents:10501
+ url: http://localhost:10501
type: http
# type: mcp (default)
# transport: streamable-http (default)
# tool: query_rewriter (default - same as filter id)
- id: context_builder
- url: http://rag-agents:10502
+ url: http://localhost:10502
type: http
model_providers:
diff --git a/demos/filter_chains/http_filter/docker-compose.yaml b/demos/filter_chains/http_filter/docker-compose.yaml
index 64962bce..0361926c 100644
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
@@ -1,16 +1,4 @@
services:
- rag-agents:
- build:
- context: .
- dockerfile: Dockerfile
- ports:
- - "10500:10500"
- - "10501:10501"
- - "10502:10502"
- - "10505:10505"
- environment:
- - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
- - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
jaeger:
build:
context: ../../shared/jaeger
@@ -32,3 +20,5 @@ services:
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
+ extra_hosts:
+ - "host.docker.internal:host-gateway"
diff --git a/demos/filter_chains/http_filter/run_demo.sh b/demos/filter_chains/http_filter/run_demo.sh
index bed84f16..f203f5b1 100755
--- a/demos/filter_chains/http_filter/run_demo.sh
+++ b/demos/filter_chains/http_filter/run_demo.sh
@@ -18,22 +18,32 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
- # Step 4: Start services
- echo "Starting services using Docker Compose..."
- docker compose up -d
+ # Step 5: Start agents natively
+ echo "Starting agents..."
+ bash start_agents.sh &
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Docker Compose services..."
- docker compose down
+ # Stop agents
+ echo "Stopping agents..."
+ pkill -f start_agents.sh 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
+
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,5 +52,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/filter_chains/http_filter/start_agents.sh b/demos/filter_chains/http_filter/start_agents.sh
old mode 100644
new mode 100755
index 06cabeec..8dfdc0f4
--- a/demos/filter_chains/http_filter/start_agents.sh
+++ b/demos/filter_chains/http_filter/start_agents.sh
@@ -1,78 +1,38 @@
-# #!/bin/bash
-# set -e
-
-# WAIT_FOR_PIDS=()
-
-# log() {
-# timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
-# message="$*"
-# echo "$timestamp - $message"
-# }
-
-# cleanup() {
-# log "Caught signal, terminating all user processes ..."
-# for PID in "${WAIT_FOR_PIDS[@]}"; do
-# if kill $PID 2> /dev/null; then
-# log "killed process: $PID"
-# fi
-# done
-# exit 1
-# }
-
-# trap cleanup EXIT
-
-# log "Starting input_guards agent on port 10500/mcp..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent input_guards &
-# WAIT_FOR_PIDS+=($!)
-
-# log "Starting query_rewriter agent on port 10501/mcp..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent query_rewriter &
-# WAIT_FOR_PIDS+=($!)
-
-# log "Starting context_builder agent on port 10502/mcp..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent context_builder &
-# WAIT_FOR_PIDS+=($!)
-
-# # log "Starting response_generator agent on port 10400..."
-# # uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
-# # WAIT_FOR_PIDS+=($!)
-
-# log "Starting response_generator agent on port 10505..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
-# WAIT_FOR_PIDS+=($!)
-
-# for PID in "${WAIT_FOR_PIDS[@]}"; do
-# wait "$PID"
-# done
-
-
-
-
#!/bin/bash
set -e
-export PYTHONPATH=/app/src
-
-pids=()
+PIDS=()
log() { echo "$(date '+%F %T') - $*"; }
-log "Starting input_guards HTTP server on :10500"
+cleanup() {
+ log "Stopping agents..."
+ for PID in "${PIDS[@]}"; do
+ kill $PID 2>/dev/null && log "Stopped process $PID"
+ done
+ exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+export PYTHONPATH=./src
+
+log "Starting input_guards HTTP server on port 10500..."
uv run uvicorn rag_agent.input_guards:app --host 0.0.0.0 --port 10500 &
-pids+=($!)
+PIDS+=($!)
-log "Starting query_rewriter HTTP server on :10501"
+log "Starting query_rewriter HTTP server on port 10501..."
uv run uvicorn rag_agent.query_rewriter:app --host 0.0.0.0 --port 10501 &
-pids+=($!)
+PIDS+=($!)
-log "Starting context_builder HTTP server on :10502"
+log "Starting context_builder HTTP server on port 10502..."
uv run uvicorn rag_agent.context_builder:app --host 0.0.0.0 --port 10502 &
-pids+=($!)
+PIDS+=($!)
-log "Starting response_generator (OpenAI-compatible) on :10505"
+log "Starting response_generator (OpenAI-compatible) on port 10505..."
uv run uvicorn rag_agent.rag_agent:app --host 0.0.0.0 --port 10505 &
-pids+=($!)
+PIDS+=($!)
-for PID in "${pids[@]}"; do
+for PID in "${PIDS[@]}"; do
wait "$PID"
done
diff --git a/demos/filter_chains/mcp_filter/README.md b/demos/filter_chains/mcp_filter/README.md
index 5e675113..798015e2 100644
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
@@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
./run_demo.sh
```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
- Input Guards MCP server on port 10500
- Query Rewriter MCP server on port 10501
- Context Builder MCP server on port 10502
- RAG Agent REST server on port 10505
-- Jaeger UI for viewing traces at http://localhost:16686
-- AnythingLLM at http://localhost:3001 for interactive queries
Plano runs natively on the host (port 8001 and 12000).
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- Jaeger UI for viewing traces at http://localhost:16686
+- AnythingLLM at http://localhost:3001 for interactive queries
+
### 2. Test the system
-**Option A: Using AnythingLLM (recommended)**
-
-Navigate to http://localhost:3001 and send queries through the chat interface.
-
-**Option B: Using curl**
+**Option A: Using curl (recommended)**
```bash
curl -X POST http://localhost:8001/v1/chat/completions \
-H "Content-Type: application/json" \
diff --git a/demos/filter_chains/mcp_filter/docker-compose.yaml b/demos/filter_chains/mcp_filter/docker-compose.yaml
index 64962bce..0361926c 100644
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
@@ -1,16 +1,4 @@
services:
- rag-agents:
- build:
- context: .
- dockerfile: Dockerfile
- ports:
- - "10500:10500"
- - "10501:10501"
- - "10502:10502"
- - "10505:10505"
- environment:
- - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
- - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
jaeger:
build:
context: ../../shared/jaeger
@@ -32,3 +20,5 @@ services:
- GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
- GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
- GENERIC_OPEN_AI_API_KEY=sk-placeholder
+ extra_hosts:
+ - "host.docker.internal:host-gateway"
diff --git a/demos/filter_chains/mcp_filter/run_demo.sh b/demos/filter_chains/mcp_filter/run_demo.sh
index bed84f16..f203f5b1 100755
--- a/demos/filter_chains/mcp_filter/run_demo.sh
+++ b/demos/filter_chains/mcp_filter/run_demo.sh
@@ -18,22 +18,32 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
- # Step 4: Start services
- echo "Starting services using Docker Compose..."
- docker compose up -d
+ # Step 5: Start agents natively
+ echo "Starting agents..."
+ bash start_agents.sh &
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Docker Compose services..."
- docker compose down
+ # Stop agents
+ echo "Stopping agents..."
+ pkill -f start_agents.sh 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
+
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,5 +52,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/getting_started/llm_gateway/README.md b/demos/getting_started/llm_gateway/README.md
index e87467fc..b29397b6 100644
--- a/demos/getting_started/llm_gateway/README.md
+++ b/demos/getting_started/llm_gateway/README.md
@@ -7,7 +7,14 @@ This demo shows how you can use Plano gateway to manage keys and route to upstre
```sh
sh run_demo.sh
```
-1. Navigate to http://localhost:3001/
+1. Test with curl (see example below)
+
+To also start the AnythingLLM chat UI and Jaeger tracing, pass `--with-ui`:
+```sh
+sh run_demo.sh --with-ui
+```
+
+Then navigate to http://localhost:3001/ for AnythingLLM.
Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
@@ -47,7 +54,7 @@ $ curl --header 'Content-Type: application/json' \
```
# Observability
-For tracing you can head over to http://localhost:16686/ to view recent traces.
+For tracing, start with `--with-ui` and head over to http://localhost:16686/ to view recent traces.
Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM,
diff --git a/demos/getting_started/llm_gateway/run_demo.sh b/demos/getting_started/llm_gateway/run_demo.sh
index b049bf31..e430a1cd 100644
--- a/demos/getting_started/llm_gateway/run_demo.sh
+++ b/demos/getting_started/llm_gateway/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
-
- # Step 4: Start LLM Routing
- echo "Starting LLM Routing using Docker Compose..."
- docker compose up -d # Run in detached mode
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping LLM Routing using Docker Compose..."
- docker compose down
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,6 +44,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- # Default action is to bring the demo up
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/getting_started/weather_forecast/README.md b/demos/getting_started/weather_forecast/README.md
index 8a9eb6c0..91fa810f 100644
--- a/demos/getting_started/weather_forecast/README.md
+++ b/demos/getting_started/weather_forecast/README.md
@@ -10,15 +10,26 @@ This demo shows how you can use Plano's core function calling capabilities.
3. ```sh
sh run_demo.sh
```
-4. Navigate to http://localhost:3001/
-5. You can type in queries like "how is the weather?"
+4. Test with curl:
+ ```sh
+ curl http://localhost:10000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "how is the weather in San Francisco?"}]}'
+ ```
Here is a sample interaction,
-## Tracing
+## Using the Chat UI and Tracing (optional)
-To see a tracing dashboard, navigate to http://localhost:16686/ to open Jaeger UI.
+To start AnythingLLM (chat UI) and other optional services, pass `--with-ui`:
+
+```sh
+sh run_demo.sh --with-ui
+```
+
+- Navigate to http://localhost:3001/ for AnythingLLM
+- Navigate to http://localhost:16686/ for Jaeger tracing UI
### Stopping Demo
diff --git a/demos/getting_started/weather_forecast/docker-compose.yaml b/demos/getting_started/weather_forecast/docker-compose.yaml
index 84074ab9..f36987e4 100644
--- a/demos/getting_started/weather_forecast/docker-compose.yaml
+++ b/demos/getting_started/weather_forecast/docker-compose.yaml
@@ -1,14 +1,4 @@
services:
- weather_forecast_service:
- build:
- context: ./
- environment:
- - OLTP_HOST=http://jaeger:4317
- extra_hosts:
- - "host.docker.internal:host-gateway"
- ports:
- - "18083:80"
-
anythingllm:
image: mintplexlabs/anythingllm
restart: always
diff --git a/demos/getting_started/weather_forecast/run_demo.sh b/demos/getting_started/weather_forecast/run_demo.sh
index c8eb96e5..c77f2d83 100644
--- a/demos/getting_started/weather_forecast/run_demo.sh
+++ b/demos/getting_started/weather_forecast/run_demo.sh
@@ -72,23 +72,32 @@ start_demo() {
exit 1
fi
- # Step 4: Start Plano
+ # Step 4: Optionally start UI services (AnythingLLM, Jaeger, etc.)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ] || [ "$2" == "--with-ui" ]; then
+ echo "Starting UI services with $COMPOSE_FILE..."
+ docker compose -f "$COMPOSE_FILE" up -d
+ fi
+
+ # Step 5: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
- # Step 5: Start Network Agent with the chosen Docker Compose file
- echo "Starting Network Agent with $COMPOSE_FILE..."
- docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode
+ # Step 6: Start agents natively
+ echo "Starting agents..."
+ bash start_agents.sh &
}
# Function to stop the demo
stop_demo() {
- echo "Stopping all Docker Compose services..."
+ # Stop agents
+ echo "Stopping agents..."
+ pkill -f start_agents.sh 2>/dev/null || true
- # Stop all services by iterating through all configurations
+ # Stop all Docker Compose services if running
+ echo "Stopping Docker Compose services..."
for compose_file in ./docker-compose*.yaml; do
- echo "Stopping services in $compose_file..."
- docker compose -f "$compose_file" down
+ docker compose -f "$compose_file" down 2>/dev/null || true
done
# Stop Plano
@@ -101,6 +110,6 @@ if [ "$1" == "down" ]; then
# Call stop_demo with the second argument as the demo to stop
stop_demo
else
- # Use the argument (jaeger, logfire, signoz) to determine the compose file
- start_demo "$1"
+ # Use the argument (jaeger, logfire, signoz, --with-ui) to determine the compose file
+ start_demo "$1" "$2"
fi
diff --git a/demos/getting_started/weather_forecast/start_agents.sh b/demos/getting_started/weather_forecast/start_agents.sh
new file mode 100755
index 00000000..548f2bf7
--- /dev/null
+++ b/demos/getting_started/weather_forecast/start_agents.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+ log "Stopping agents..."
+ for PID in "${PIDS[@]}"; do
+ kill $PID 2>/dev/null && log "Stopped process $PID"
+ done
+ exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+log "Starting weather_forecast_service on port 18083..."
+uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+ wait "$PID"
+done
diff --git a/demos/integrations/ollama/run_demo.sh b/demos/integrations/ollama/run_demo.sh
index 6623dee5..5bbf183b 100644
--- a/demos/integrations/ollama/run_demo.sh
+++ b/demos/integrations/ollama/run_demo.sh
@@ -7,33 +7,58 @@ start_demo() {
if [ -f ".env" ]; then
echo ".env file already exists. Skipping creation."
else
- # Step 2: Create `.env` file and set OpenAI key
+ # Step 2: Create `.env` file and set API keys
if [ -z "$OPENAI_API_KEY" ]; then
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
exit 1
fi
+ if [ -z "$ANTHROPIC_API_KEY" ]; then
+ echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
+ fi
echo "Creating .env file..."
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
- echo ".env file created with OPENAI_API_KEY."
+ if [ -n "$ANTHROPIC_API_KEY" ]; then
+ echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+ fi
+ echo ".env file created with API keys."
fi
- # Step 3: Start Plano
- echo "Starting Plano with config.yaml..."
- planoai up config.yaml
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
- # Step 4: Start developer services
- echo "Starting Network Agent using Docker Compose..."
- docker compose up -d # Run in detached mode
+ # Step 4: Start Plano
+ echo "Starting Plano with arch_config_with_aliases.yaml..."
+ planoai up arch_config_with_aliases.yaml
+
+ echo ""
+ echo "Plano started successfully."
+ echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file."
+ echo ""
+ echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \
+ -H \"Authorization: Bearer test-key\" \
+ -H \"Content-Type: application/json\" \
+ -d '{
+ \"model\": \"arch.summarize.v1\",
+ \"max_tokens\": 50,
+ \"messages\": [
+ { \"role\": \"user\",
+ \"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\"
+ }
+ ]
+ }' | jq ."
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Network Agent using Docker Compose..."
- docker compose down
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,6 +67,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- # Default action is to bring the demo up
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/integrations/spotify_bearer_auth/run_demo.sh b/demos/integrations/spotify_bearer_auth/run_demo.sh
index 6623dee5..e430a1cd 100644
--- a/demos/integrations/spotify_bearer_auth/run_demo.sh
+++ b/demos/integrations/spotify_bearer_auth/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
echo ".env file created with OPENAI_API_KEY."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
-
- # Step 4: Start developer services
- echo "Starting Network Agent using Docker Compose..."
- docker compose up -d # Run in detached mode
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Network Agent using Docker Compose..."
- docker compose down
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -42,6 +44,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- # Default action is to bring the demo up
- start_demo
+ start_demo "$1"
fi
diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md
index 03d28cee..009002fd 100644
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@@ -10,19 +10,58 @@ cd demos/llm_routing/preference_based_routing
./run_demo.sh
```
-Or manually:
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
-1. Start Plano
```bash
-planoai up config.yaml
+./run_demo.sh --with-ui
```
-2. Start AnythingLLM
+Then open AnythingLLM at http://localhost:3001/
+
+Or start manually:
+
+1. (Optional) Start AnythingLLM and Jaeger
```bash
docker compose up -d
```
-3. open AnythingLLM http://localhost:3001/
+2. Start Plano
+```bash
+planoai up config.yaml
+```
+
+3. Test with curl or open AnythingLLM http://localhost:3001/
+
+## Running with local Arch-Router (via Ollama)
+
+By default, Plano uses a hosted Arch-Router endpoint. To self-host Arch-Router locally using Ollama:
+
+1. Install [Ollama](https://ollama.ai) and pull the model:
+```bash
+ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+```
+
+2. Make sure Ollama is running (`ollama serve` or the macOS app).
+
+3. Start Plano with the local config:
+```bash
+planoai up plano_config_local.yaml
+```
+
+4. Test routing:
+```bash
+curl -s "http://localhost:12000/routing/v1/messages" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "max_tokens": 1024,
+ "messages": [
+ {"role": "user", "content": "Create a REST API endpoint in Rust using actix-web"}
+ ]
+ }'
+```
+
+You should see the router select the appropriate model based on the routing preferences defined in `plano_config_local.yaml`.
# Testing out preference based routing
diff --git a/demos/llm_routing/preference_based_routing/run_demo.sh b/demos/llm_routing/preference_based_routing/run_demo.sh
index c9525c26..30e0c67b 100755
--- a/demos/llm_routing/preference_based_routing/run_demo.sh
+++ b/demos/llm_routing/preference_based_routing/run_demo.sh
@@ -24,22 +24,24 @@ start_demo() {
echo ".env file created with API keys."
fi
- # Step 3: Start Plano
+ # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+ # Jaeger must start before Plano so it can bind the OTEL port (4317)
+ if [ "$1" == "--with-ui" ]; then
+ echo "Starting UI services (AnythingLLM, Jaeger)..."
+ docker compose up -d
+ fi
+
+ # Step 4: Start Plano
echo "Starting Plano with config.yaml..."
planoai up config.yaml
-
- # Step 4: Start services
- echo "Starting services using Docker Compose..."
- docker compose up -d
}
# Function to stop the demo
stop_demo() {
- # Step 1: Stop Docker Compose services
- echo "Stopping Docker Compose services..."
- docker compose down
+ # Stop Docker Compose services if running
+ docker compose down 2>/dev/null || true
- # Step 2: Stop Plano
+ # Stop Plano
echo "Stopping Plano..."
planoai down
}
@@ -48,5 +50,5 @@ stop_demo() {
if [ "$1" == "down" ]; then
stop_demo
else
- start_demo
+ start_demo "$1"
fi
diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst
index 188b1e30..41c51b4a 100644
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@@ -228,6 +228,129 @@ In summary, Arch-Router demonstrates:
- **Production-Ready Performance**: Optimized for low-latency, high-throughput applications in multi-model environments.
+Self-hosting Arch-Router
+------------------------
+
+By default, Plano uses a hosted Arch-Router endpoint. To run Arch-Router locally, you can serve the model yourself using either **Ollama** or **vLLM**.
+
+Using Ollama (recommended for local development)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. **Install Ollama**
+
+ Download and install from `ollama.ai `_.
+
+2. **Pull and serve Arch-Router**
+
+ .. code-block:: bash
+
+ ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+ ollama serve
+
+ This downloads the quantized GGUF model from HuggingFace and starts serving on ``http://localhost:11434``.
+
+3. **Configure Plano to use local Arch-Router**
+
+ .. code-block:: yaml
+
+ routing:
+ model: Arch-Router
+ llm_provider: arch-router
+
+ model_providers:
+ - name: arch-router
+ model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+ base_url: http://localhost:11434
+
+ - model: openai/gpt-5.2
+ access_key: $OPENAI_API_KEY
+ default: true
+
+ - model: anthropic/claude-sonnet-4-5
+ access_key: $ANTHROPIC_API_KEY
+ routing_preferences:
+ - name: creative writing
+ description: creative content generation, storytelling, and writing assistance
+
+4. **Verify the model is running**
+
+ .. code-block:: bash
+
+ curl http://localhost:11434/v1/models
+
+ You should see ``Arch-Router-1.5B`` listed in the response.
+
+Using vLLM (recommended for production / EC2)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+vLLM provides higher throughput and GPU optimizations suitable for production deployments.
+
+1. **Install vLLM**
+
+ .. code-block:: bash
+
+ pip install vllm
+
+2. **Download the model weights**
+
+ The GGUF weights are downloaded automatically from HuggingFace on first use. To pre-download:
+
+ .. code-block:: bash
+
+ pip install huggingface_hub
+ huggingface-cli download katanemo/Arch-Router-1.5B.gguf
+
+3. **Start the vLLM server**
+
+ After downloading, find the GGUF file and Jinja template in the HuggingFace cache:
+
+ .. code-block:: bash
+
+ # Find the downloaded files
+ SNAPSHOT_DIR=$(ls -d ~/.cache/huggingface/hub/models--katanemo--Arch-Router-1.5B.gguf/snapshots/*/ | head -1)
+
+ vllm serve ${SNAPSHOT_DIR}Arch-Router-1.5B-Q4_K_M.gguf \
+ --host 0.0.0.0 \
+ --port 10000 \
+ --load-format gguf \
+ --chat-template ${SNAPSHOT_DIR}template.jinja \
+ --tokenizer katanemo/Arch-Router-1.5B \
+ --served-model-name Arch-Router \
+ --gpu-memory-utilization 0.3 \
+ --tensor-parallel-size 1 \
+ --enable-prefix-caching
+
+4. **Configure Plano to use the vLLM endpoint**
+
+ .. code-block:: yaml
+
+ routing:
+ model: Arch-Router
+ llm_provider: arch-router
+
+ model_providers:
+ - name: arch-router
+ model: Arch-Router
+ base_url: http://:10000
+
+ - model: openai/gpt-5.2
+ access_key: $OPENAI_API_KEY
+ default: true
+
+ - model: anthropic/claude-sonnet-4-5
+ access_key: $ANTHROPIC_API_KEY
+ routing_preferences:
+ - name: creative writing
+ description: creative content generation, storytelling, and writing assistance
+
+5. **Verify the server is running**
+
+ .. code-block:: bash
+
+ curl http://localhost:10000/health
+ curl http://localhost:10000/v1/models
+
+
Combining Routing Methods
-------------------------
diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh
index c24931f4..a164b7f9 100644
--- a/tests/e2e/run_e2e_tests.sh
+++ b/tests/e2e/run_e2e_tests.sh
@@ -21,10 +21,11 @@ trap 'print_debug' INT TERM ERR
log starting > ../build.log
-log building and running function_calling demo
+log starting weather_forecast agent natively
log ===========================================
cd ../../demos/getting_started/weather_forecast/
-docker compose up weather_forecast_service --build -d
+bash start_agents.sh &
+AGENTS_PID=$!
cd -
log building and installing plano cli
@@ -78,8 +79,6 @@ log running e2e tests for openai responses api client
log ========================================
uv run pytest test_openai_responses_api_client_with_state.py
-log shutting down the weather_forecast demo
+log shutting down the weather_forecast agent
log =======================================
-cd ../../demos/getting_started/weather_forecast
-docker compose down
-cd -
+kill $AGENTS_PID 2>/dev/null || true
diff --git a/tests/e2e/run_prompt_gateway_tests.sh b/tests/e2e/run_prompt_gateway_tests.sh
index 58d850d8..1e947813 100755
--- a/tests/e2e/run_prompt_gateway_tests.sh
+++ b/tests/e2e/run_prompt_gateway_tests.sh
@@ -32,10 +32,11 @@ cd -
# Re-sync e2e deps
uv sync
-# Start weather_forecast service (needed for prompt_gateway tests)
-log "building and running weather_forecast service"
+# Start weather_forecast service natively (needed for prompt_gateway tests)
+log "starting weather_forecast agent natively"
cd ../../demos/getting_started/weather_forecast/
-docker compose up weather_forecast_service --build -d
+bash start_agents.sh &
+AGENTS_PID=$!
cd -
# Start gateway with prompt_gateway config
@@ -52,6 +53,4 @@ uv run pytest test_prompt_gateway.py
# Cleanup
log "shutting down"
planoai down --docker || true
-cd ../../demos/getting_started/weather_forecast
-docker compose down
-cd -
+kill $AGENTS_PID 2>/dev/null || true