Merge remote-tracking branch 'origin/main' into adilhafeez/model-listener-filter-chain

2026-05-21 13:55:15 +02:00 · 2026-03-12 14:55:05 -07:00 · 2026-03-12 14:55:05 -07:00 · 9e5c908306
commit 9e5c908306
parent efa677683a 5400b0a2fa
36 changed files with 642 additions and 347 deletions
--- a/demos/advanced/currency_exchange/run_demo.sh
+++ b/demos/advanced/currency_exchange/run_demo.sh
@ -18,22 +18,24 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
  # Step 4: Start developer services
  echo "Starting Network Agent using Docker Compose..."
  docker compose up -d  # Run in detached mode
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop Docker Compose services if running
-  echo "Stopping Network Agent using Docker Compose..."
+  docker compose down 2>/dev/null || true
  docker compose down
-  # Step 2: Stop Plano
+  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  # Default action is to bring the demo up
+  start_demo "$1"
  start_demo
 fi
--- a/demos/advanced/multi_turn_rag/docker-compose.yaml
+++ b/demos/advanced/multi_turn_rag/docker-compose.yaml
@ -1,15 +1,4 @@
 services:
  rag_energy_source_agent:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "18083:80"
    healthcheck:
        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
        interval: 5s
        retries: 20
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
--- a/demos/advanced/multi_turn_rag/pyproject.toml
+++ b/demos/advanced/multi_turn_rag/pyproject.toml
@ -0,0 +1,12 @@
 [project]
 name = "multi-turn-rag"
 version = "0.1.0"
 requires-python = ">=3.12"
 dependencies = [
    "fastapi",
    "uvicorn",
    "pydantic>=2.8",
    "httpx>=0.27",
    "openai>=1.51",
    "python-dotenv>=1.0",
 ]
--- a/demos/advanced/multi_turn_rag/run_demo.sh
+++ b/demos/advanced/multi_turn_rag/run_demo.sh
@ -18,22 +18,32 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM)
  # UI services must start before Plano to avoid OTEL port conflicts
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
-  # Step 4: Start Network Agent
+  # Step 5: Start agents natively
-  echo "Starting HR Agent using Docker Compose..."
+  echo "Starting agents..."
-  docker compose up -d  # Run in detached mode
+  bash start_agents.sh &
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop agents
-  echo "Stopping HR Agent using Docker Compose..."
+  echo "Stopping agents..."
-  docker compose down -v
+  pkill -f start_agents.sh 2>/dev/null || true
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
  docker compose down 2>/dev/null || true
  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,6 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  # Default action is to bring the demo up
+  start_demo "$1"
  start_demo
 fi
--- a/demos/advanced/multi_turn_rag/start_agents.sh
+++ b/demos/advanced/multi_turn_rag/start_agents.sh
@ -0,0 +1,24 @@
 #!/bin/bash
 set -e
 PIDS=()
 log() { echo "$(date '+%F %T') - $*"; }
 cleanup() {
    log "Stopping agents..."
    for PID in "${PIDS[@]}"; do
        kill $PID 2>/dev/null && log "Stopped process $PID"
    done
    exit 0
 }
 trap cleanup EXIT INT TERM
 log "Starting rag_energy_source_agent on port 18083..."
 uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
 PIDS+=($!)
 for PID in "${PIDS[@]}"; do
    wait "$PID"
 done
--- a/demos/advanced/stock_quote/run_demo.sh
+++ b/demos/advanced/stock_quote/run_demo.sh
@ -18,22 +18,24 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
  # Step 4: Start developer services
  echo "Starting Network Agent using Docker Compose..."
  docker compose up -d  # Run in detached mode
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop Docker Compose services if running
-  echo "Stopping Network Agent using Docker Compose..."
+  docker compose down 2>/dev/null || true
  docker compose down
-  # Step 2: Stop Plano
+  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  # Default action is to bring the demo up
+  start_demo "$1"
  start_demo
 fi
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@ -41,21 +41,36 @@ cd demos/agent_orchestration/multi_agent_crewai_langchain
 ./run_demo.sh
 ```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
 - **CrewAI Flight Agent** (port 10520) - flight search
 - **LangChain Weather Agent** (port 10510) - weather forecasts
 - **AnythingLLM** (port 3001) - chat interface
 - **Jaeger** (port 16686) - distributed tracing
 Plano runs natively on the host (ports 12000, 8001).
 To also start AnythingLLM (chat UI), Jaeger (tracing), and other optional services:
 ```bash
 ./run_demo.sh --with-ui
 ```
 This additionally starts:
 - **AnythingLLM** (port 3001) - chat interface
 - **Jaeger** (port 16686) - distributed tracing
 ### Try It Out
-1. **Open the Chat Interface**
+1. **Using curl**
   ```bash
   curl -X POST http://localhost:8001/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "What is the weather in San Francisco?"}]}'
   ```
 2. **Using AnythingLLM (requires `--with-ui`)**
   - Navigate to [http://localhost:3001](http://localhost:3001)
   - Create an account (stored locally)
-2. **Ask Multi-Agent Questions**
+3. **Ask Multi-Agent Questions**
   ```
   "What's the weather in San Francisco and can you find flights from Seattle to San Francisco?"
   ```
@ -65,7 +80,7 @@ Plano runs natively on the host (ports 12000, 8001).
   - Routes the flight part to the CrewAI agent
   - Combines responses seamlessly
-3. **View Distributed Traces**
+4. **View Distributed Traces (requires `--with-ui`)**
   - Open [http://localhost:16686](http://localhost:16686) (Jaeger UI)
   - See how requests flow through both agents
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
@ -2,9 +2,9 @@ version: v0.3.0
 agents:
  - id: weather_agent
-    url: http://langchain-weather-agent:10510
+    url: http://localhost:10510
  - id: flight_agent
-    url: http://crewai-flight-agent:10520
+    url: http://localhost:10520
 model_providers:
  - model: openai/gpt-4o
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
@ -1,27 +1,5 @@
 services:
  crewai-flight-agent:
    build:
      dockerfile: Dockerfile
    restart: always
    ports:
      - "10520:10520"
    environment:
      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
      - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
      - PYTHONUNBUFFERED=1
    command: ["python", "-u", "crewai/flight_agent.py"]
  langchain-weather-agent:
    build:
      dockerfile: Dockerfile
    restart: always
    ports:
      - "10510:10510"
    environment:
      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
    command: ["python", "-u", "langchain/weather_agent.py"]
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
@ -36,6 +14,8 @@ services:
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
    extra_hosts:
      - "host.docker.internal:host-gateway"
  jaeger:
    build:
@ -44,3 +24,4 @@ services:
    ports:
      - "16686:16686"  # Jaeger UI
      - "4317:4317"    # OTLP gRPC receiver
      - "4318:4318"    # OTLP HTTP receiver
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
@ -12,33 +12,38 @@ start_demo() {
      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
      exit 1
    fi
    if [ -z "$AEROAPI_KEY" ]; then
      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
      exit 1
    fi
    echo "Creating .env file..."
    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
    echo ".env file created with API keys."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
-  # Step 4: Start agents and services
+  # Step 5: Start agents natively
-  echo "Starting agents using Docker Compose..."
+  echo "Starting agents..."
-  docker compose up -d
+  bash start_agents.sh &
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop agents
-  echo "Stopping Docker Compose services..."
+  echo "Stopping agents..."
-  docker compose down
+  pkill -f start_agents.sh 2>/dev/null || true
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
  docker compose down 2>/dev/null || true
  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -47,5 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
@ -0,0 +1,30 @@
 #!/bin/bash
 set -e
 PIDS=()
 log() { echo "$(date '+%F %T') - $*"; }
 cleanup() {
    log "Stopping agents..."
    for PID in "${PIDS[@]}"; do
        kill $PID 2>/dev/null && log "Stopped process $PID"
    done
    exit 0
 }
 trap cleanup EXIT INT TERM
 export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
 log "Starting langchain weather_agent on port 10510..."
 uv run python langchain/weather_agent.py &
 PIDS+=($!)
 log "Starting crewai flight_agent on port 10520..."
 uv run python crewai/flight_agent.py &
 PIDS+=($!)
 for PID in "${PIDS[@]}"; do
    wait "$PID"
 done
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@ -23,9 +23,10 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque
 ## Prerequisites
 - [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
- Docker and Docker Compose (for agent services)
+- [uv](https://docs.astral.sh/uv/) installed (for running agents natively)
 - [OpenAI API key](https://platform.openai.com/api-keys)
 - [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)
 - Docker and Docker Compose (optional, only needed for `--with-ui`)
 > **Note:** You'll need to obtain a FlightAware AeroAPI key for live flight data. Visit [https://www.flightaware.com/aeroapi/portal](https://www.flightaware.com/aeroapi/portal) to get your API key.
@ -46,16 +47,34 @@ export OPENAI_API_KEY="your OpenAI api key"
 ./run_demo.sh
 ```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
 - Weather Agent on port 10510
 - Flight Agent on port 10520
 - Open WebUI on port 8080
 Plano runs natively on the host (port 8001).
 To also start Open WebUI, Jaeger tracing, and other optional services, pass `--with-ui`:
 ```bash
 ./run_demo.sh --with-ui
 ```
 This additionally starts:
 - Open WebUI on port 8080
 - Jaeger tracing UI on port 16686
 ### 4. Test the System
-Use Open WebUI at http://localhost:8080
+**Option A: Using curl**
 ```bash
 curl -X POST http://localhost:8001/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}'
 ```
 **Option B: Using Open WebUI (requires `--with-ui`)**
 Navigate to http://localhost:8080
 > **Note:** The Open WebUI may take a few minutes to start up and be fully ready. Please wait for the container to finish initializing before accessing the interface. Once ready, make sure to select the **gpt-5.2** model from the model dropdown menu in the UI.
@ -102,7 +121,7 @@ Each agent:
 3. Generates response using GPT-5.2
 4. Streams response back to user
-Both agents run as Docker containers and communicate with Plano running natively on the host.
+Both agents run as native local processes and communicate with Plano running natively on the host.
 ## Observability
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
@ -1,32 +1,5 @@
 services:
  weather-agent:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: weather-agent
    restart: always
    ports:
      - "10510:10510"
    environment:
      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
    command: ["uv", "run", "python", "src/travel_agents/weather_agent.py"]
    extra_hosts:
      - "host.docker.internal:host-gateway"
  flight-agent:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: flight-agent
    restart: always
    ports:
      - "10520:10520"
    environment:
      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
      - AEROAPI_KEY=${AEROAPI_KEY:? AEROAPI_KEY environment variable is required but not set}
    command: ["uv", "run", "python", "src/travel_agents/flight_agent.py"]
    extra_hosts:
      - "host.docker.internal:host-gateway"
  open-web-ui:
    image: dyrnq/open-webui:main
    restart: always
@ -40,9 +13,8 @@ services:
      - ENABLE_TITLE_GENERATION=false
      - ENABLE_TAGS_GENERATION=false
      - ENABLE_AUTOCOMPLETE_GENERATION=false
-    depends_on:
+    extra_hosts:
-      - weather-agent
+      - "host.docker.internal:host-gateway"
      - flight-agent
  jaeger:
    build:
      context: ../../shared/jaeger
--- a/demos/agent_orchestration/travel_agents/run_demo.sh
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@ -23,22 +23,32 @@ start_demo() {
    echo ".env file created with API keys."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (Open WebUI, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (Open WebUI, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
-  # Step 4: Start agents and services
+  # Step 5: Start agents natively
-  echo "Starting agents using Docker Compose..."
+  echo "Starting agents..."
-  docker compose up -d
+  bash start_agents.sh &
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop agents
-  echo "Stopping Docker Compose services..."
+  echo "Stopping agents..."
-  docker compose down
+  pkill -f start_agents.sh 2>/dev/null || true
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
  docker compose down 2>/dev/null || true
  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -47,5 +57,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
--- a/demos/agent_orchestration/travel_agents/start_agents.sh
+++ b/demos/agent_orchestration/travel_agents/start_agents.sh
@ -0,0 +1,30 @@
 #!/bin/bash
 set -e
 PIDS=()
 log() { echo "$(date '+%F %T') - $*"; }
 cleanup() {
    log "Stopping agents..."
    for PID in "${PIDS[@]}"; do
        kill $PID 2>/dev/null && log "Stopped process $PID"
    done
    exit 0
 }
 trap cleanup EXIT INT TERM
 export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
 log "Starting weather_agent on port 10510..."
 uv run python src/travel_agents/weather_agent.py &
 PIDS+=($!)
 log "Starting flight_agent on port 10520..."
 uv run python src/travel_agents/flight_agent.py &
 PIDS+=($!)
 for PID in "${PIDS[@]}"; do
    wait "$PID"
 done
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
 ./run_demo.sh
 ```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
- Input Guards MCP server on port 10500
+- Input Guards HTTP server on port 10500
- Query Rewriter MCP server on port 10501
+- Query Rewriter HTTP server on port 10501
- Context Builder MCP server on port 10502
+- Context Builder HTTP server on port 10502
 - RAG Agent REST server on port 10505
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries
 Plano runs natively on the host (port 8001 and 12000).
 To also start AnythingLLM (chat UI) and Jaeger (tracing):
 ```bash
 ./run_demo.sh --with-ui
 ```
 This additionally starts:
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries
 ### 2. Test the system
-**Option A: Using AnythingLLM (recommended)**
+**Option A: Using curl (recommended)**
 Navigate to http://localhost:3001 and send queries through the chat interface.
 **Option B: Using curl**
 ```bash
 curl -X POST http://localhost:8001/v1/chat/completions \
  -H "Content-Type: application/json" \
--- a/demos/filter_chains/http_filter/config.yaml
+++ b/demos/filter_chains/http_filter/config.yaml
@ -2,23 +2,23 @@ version: v0.3.0
 agents:
  - id: rag_agent
-    url: http://rag-agents:10505
+    url: http://localhost:10505
 filters:
  - id: input_guards
-    url: http://rag-agents:10500
+    url: http://localhost:10500
    type: http
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: input_guards (default - same as filter id)
  - id: query_rewriter
-    url: http://rag-agents:10501
+    url: http://localhost:10501
    type: http
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: query_rewriter (default - same as filter id)
  - id: context_builder
-    url: http://rag-agents:10502
+    url: http://localhost:10502
    type: http
 model_providers:
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
@ -1,16 +1,4 @@
 services:
  rag-agents:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "10500:10500"
      - "10501:10501"
      - "10502:10502"
      - "10505:10505"
    environment:
      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
  jaeger:
    build:
      context: ../../shared/jaeger
@ -32,3 +20,5 @@ services:
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
    extra_hosts:
      - "host.docker.internal:host-gateway"
--- a/demos/filter_chains/http_filter/run_demo.sh
+++ b/demos/filter_chains/http_filter/run_demo.sh
@ -18,22 +18,32 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
-  # Step 4: Start services
+  # Step 5: Start agents natively
-  echo "Starting services using Docker Compose..."
+  echo "Starting agents..."
-  docker compose up -d
+  bash start_agents.sh &
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop agents
-  echo "Stopping Docker Compose services..."
+  echo "Stopping agents..."
-  docker compose down
+  pkill -f start_agents.sh 2>/dev/null || true
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
  docker compose down 2>/dev/null || true
  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,5 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
--- a/demos/filter_chains/http_filter/start_agents.sh
+++ b/demos/filter_chains/http_filter/start_agents.sh
@ -1,78 +1,38 @@
 # #!/bin/bash
 # set -e
 # WAIT_FOR_PIDS=()
 # log() {
 #   timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
 #   message="$*"
 #   echo "$timestamp - $message"
 # }
 # cleanup() {
 #     log "Caught signal, terminating all user processes ..."
 #     for PID in "${WAIT_FOR_PIDS[@]}"; do
 #         if kill $PID 2> /dev/null; then
 #             log "killed process: $PID"
 #         fi
 #     done
 #     exit 1
 # }
 # trap cleanup EXIT
 # log "Starting input_guards agent on port 10500/mcp..."
 # uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent input_guards &
 # WAIT_FOR_PIDS+=($!)
 # log "Starting query_rewriter agent on port 10501/mcp..."
 # uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent query_rewriter &
 # WAIT_FOR_PIDS+=($!)
 # log "Starting context_builder agent on port 10502/mcp..."
 # uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent context_builder &
 # WAIT_FOR_PIDS+=($!)
 # # log "Starting response_generator agent on port 10400..."
 # # uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
 # # WAIT_FOR_PIDS+=($!)
 # log "Starting response_generator agent on port 10505..."
 # uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
 # WAIT_FOR_PIDS+=($!)
 # for PID in "${WAIT_FOR_PIDS[@]}"; do
 #     wait "$PID"
 # done
 #!/bin/bash
 set -e
-export PYTHONPATH=/app/src
+PIDS=()
 pids=()
 log() { echo "$(date '+%F %T') - $*"; }
-log "Starting input_guards HTTP server on :10500"
+cleanup() {
    log "Stopping agents..."
    for PID in "${PIDS[@]}"; do
        kill $PID 2>/dev/null && log "Stopped process $PID"
    done
    exit 0
 }
 trap cleanup EXIT INT TERM
 export PYTHONPATH=./src
 log "Starting input_guards HTTP server on port 10500..."
 uv run uvicorn rag_agent.input_guards:app --host 0.0.0.0 --port 10500 &
-pids+=($!)
+PIDS+=($!)
-log "Starting query_rewriter HTTP server on :10501"
+log "Starting query_rewriter HTTP server on port 10501..."
 uv run uvicorn rag_agent.query_rewriter:app --host 0.0.0.0 --port 10501 &
-pids+=($!)
+PIDS+=($!)
-log "Starting context_builder HTTP server on :10502"
+log "Starting context_builder HTTP server on port 10502..."
 uv run uvicorn rag_agent.context_builder:app --host 0.0.0.0 --port 10502 &
-pids+=($!)
+PIDS+=($!)
-log "Starting response_generator (OpenAI-compatible) on :10505"
+log "Starting response_generator (OpenAI-compatible) on port 10505..."
 uv run uvicorn rag_agent.rag_agent:app --host 0.0.0.0 --port 10505 &
-pids+=($!)
+PIDS+=($!)
-for PID in "${pids[@]}"; do
+for PID in "${PIDS[@]}"; do
    wait "$PID"
 done
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
 ./run_demo.sh
 ```
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries
 Plano runs natively on the host (port 8001 and 12000).
 To also start AnythingLLM (chat UI) and Jaeger (tracing):
 ```bash
 ./run_demo.sh --with-ui
 ```
 This additionally starts:
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries
 ### 2. Test the system
-**Option A: Using AnythingLLM (recommended)**
+**Option A: Using curl (recommended)**
 Navigate to http://localhost:3001 and send queries through the chat interface.
 **Option B: Using curl**
 ```bash
 curl -X POST http://localhost:8001/v1/chat/completions \
  -H "Content-Type: application/json" \
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
@ -1,16 +1,4 @@
 services:
  rag-agents:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "10500:10500"
      - "10501:10501"
      - "10502:10502"
      - "10505:10505"
    environment:
      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
  jaeger:
    build:
      context: ../../shared/jaeger
@ -32,3 +20,5 @@ services:
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
    extra_hosts:
      - "host.docker.internal:host-gateway"
--- a/demos/filter_chains/mcp_filter/run_demo.sh
+++ b/demos/filter_chains/mcp_filter/run_demo.sh
@ -18,22 +18,32 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
-  # Step 4: Start services
+  # Step 5: Start agents natively
-  echo "Starting services using Docker Compose..."
+  echo "Starting agents..."
-  docker compose up -d
+  bash start_agents.sh &
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop agents
-  echo "Stopping Docker Compose services..."
+  echo "Stopping agents..."
-  docker compose down
+  pkill -f start_agents.sh 2>/dev/null || true
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
  docker compose down 2>/dev/null || true
  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,5 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
--- a/demos/getting_started/llm_gateway/README.md
+++ b/demos/getting_started/llm_gateway/README.md
@ -7,7 +7,14 @@ This demo shows how you can use Plano gateway to manage keys and route to upstre
   ```sh
   sh run_demo.sh
   ```
-1. Navigate to http://localhost:3001/
+1. Test with curl (see example below)
 To also start the AnythingLLM chat UI and Jaeger tracing, pass `--with-ui`:
 ```sh
 sh run_demo.sh --with-ui
 ```
 Then navigate to http://localhost:3001/ for AnythingLLM.
 Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
@ -47,7 +54,7 @@ $ curl --header 'Content-Type: application/json' \
 ```
 # Observability
-For tracing you can head over to http://localhost:16686/ to view recent traces.
+For tracing, start with `--with-ui` and head over to http://localhost:16686/ to view recent traces.
 Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM,
--- a/demos/getting_started/llm_gateway/run_demo.sh
+++ b/demos/getting_started/llm_gateway/run_demo.sh
@ -18,22 +18,24 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
  # Step 4: Start LLM Routing
  echo "Starting LLM Routing using Docker Compose..."
  docker compose up -d  # Run in detached mode
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop Docker Compose services if running
-  echo "Stopping LLM Routing using Docker Compose..."
+  docker compose down 2>/dev/null || true
  docker compose down
-  # Step 2: Stop Plano
+  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  # Default action is to bring the demo up
+  start_demo "$1"
  start_demo
 fi
--- a/demos/getting_started/weather_forecast/README.md
+++ b/demos/getting_started/weather_forecast/README.md
@ -10,15 +10,26 @@ This demo shows how you can use Plano's core function calling capabilities.
 3. ```sh
   sh run_demo.sh
   ```
-4. Navigate to http://localhost:3001/
+4. Test with curl:
-5. You can type in queries like "how is the weather?"
+   ```sh
   curl http://localhost:10000/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "how is the weather in San Francisco?"}]}'
   ```
 Here is a sample interaction,
 <img width="575" alt="image" src="https://github.com/user-attachments/assets/e0929490-3eb2-4130-ae87-a732aea4d059">
-## Tracing
+## Using the Chat UI and Tracing (optional)
-To see a tracing dashboard, navigate to http://localhost:16686/ to open Jaeger UI.
+To start AnythingLLM (chat UI) and other optional services, pass `--with-ui`:
 ```sh
 sh run_demo.sh --with-ui
 ```
 - Navigate to http://localhost:3001/ for AnythingLLM
 - Navigate to http://localhost:16686/ for Jaeger tracing UI
 ### Stopping Demo
--- a/demos/getting_started/weather_forecast/docker-compose.yaml
+++ b/demos/getting_started/weather_forecast/docker-compose.yaml
@ -1,14 +1,4 @@
 services:
  weather_forecast_service:
    build:
      context: ./
    environment:
      - OLTP_HOST=http://jaeger:4317
    extra_hosts:
      - "host.docker.internal:host-gateway"
    ports:
      - "18083:80"
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
--- a/demos/getting_started/weather_forecast/run_demo.sh
+++ b/demos/getting_started/weather_forecast/run_demo.sh
@ -72,23 +72,32 @@ start_demo() {
    exit 1
  fi
-  # Step 4: Start Plano
+  # Step 4: Optionally start UI services (AnythingLLM, Jaeger, etc.)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ] || [ "$2" == "--with-ui" ]; then
    echo "Starting UI services with $COMPOSE_FILE..."
    docker compose -f "$COMPOSE_FILE" up -d
  fi
  # Step 5: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
-  # Step 5: Start Network Agent with the chosen Docker Compose file
+  # Step 6: Start agents natively
-  echo "Starting Network Agent with $COMPOSE_FILE..."
+  echo "Starting agents..."
-  docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode
+  bash start_agents.sh &
 }
 # Function to stop the demo
 stop_demo() {
-  echo "Stopping all Docker Compose services..."
+  # Stop agents
  echo "Stopping agents..."
  pkill -f start_agents.sh 2>/dev/null || true
-  # Stop all services by iterating through all configurations
+  # Stop all Docker Compose services if running
  echo "Stopping Docker Compose services..."
  for compose_file in ./docker-compose*.yaml; do
-    echo "Stopping services in $compose_file..."
+    docker compose -f "$compose_file" down 2>/dev/null || true
    docker compose -f "$compose_file" down
  done
  # Stop Plano
@ -101,6 +110,6 @@ if [ "$1" == "down" ]; then
  # Call stop_demo with the second argument as the demo to stop
  stop_demo
 else
-  # Use the argument (jaeger, logfire, signoz) to determine the compose file
+  # Use the argument (jaeger, logfire, signoz, --with-ui) to determine the compose file
-  start_demo "$1"
+  start_demo "$1" "$2"
 fi
--- a/demos/getting_started/weather_forecast/start_agents.sh
+++ b/demos/getting_started/weather_forecast/start_agents.sh
@ -0,0 +1,24 @@
 #!/bin/bash
 set -e
 PIDS=()
 log() { echo "$(date '+%F %T') - $*"; }
 cleanup() {
    log "Stopping agents..."
    for PID in "${PIDS[@]}"; do
        kill $PID 2>/dev/null && log "Stopped process $PID"
    done
    exit 0
 }
 trap cleanup EXIT INT TERM
 log "Starting weather_forecast_service on port 18083..."
 uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
 PIDS+=($!)
 for PID in "${PIDS[@]}"; do
    wait "$PID"
 done
--- a/demos/integrations/ollama/run_demo.sh
+++ b/demos/integrations/ollama/run_demo.sh
@ -7,33 +7,58 @@ start_demo() {
  if [ -f ".env" ]; then
    echo ".env file already exists. Skipping creation."
  else
-    # Step 2: Create `.env` file and set OpenAI key
+    # Step 2: Create `.env` file and set API keys
    if [ -z "$OPENAI_API_KEY" ]; then
      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
      exit 1
    fi
    if [ -z "$ANTHROPIC_API_KEY" ]; then
      echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
    fi
    echo "Creating .env file..."
    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
-    echo ".env file created with OPENAI_API_KEY."
+    if [ -n "$ANTHROPIC_API_KEY" ]; then
      echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
    fi
    echo ".env file created with API keys."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
-  echo "Starting Plano with config.yaml..."
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
-  planoai up config.yaml
+  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
-  # Step 4: Start developer services
+  # Step 4: Start Plano
-  echo "Starting Network Agent using Docker Compose..."
+  echo "Starting Plano with arch_config_with_aliases.yaml..."
-  docker compose up -d  # Run in detached mode
+  planoai up arch_config_with_aliases.yaml
  echo ""
  echo "Plano started successfully."
  echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file."
  echo ""
  echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \
    -H \"Authorization: Bearer test-key\" \
    -H \"Content-Type: application/json\" \
    -d '{
      \"model\": \"arch.summarize.v1\",
      \"max_tokens\": 50,
      \"messages\": [
        { \"role\": \"user\",
          \"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\"
        }
      ]
    }' | jq ."
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop Docker Compose services if running
-  echo "Stopping Network Agent using Docker Compose..."
+  docker compose down 2>/dev/null || true
  docker compose down
-  # Step 2: Stop Plano
+  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,6 +67,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  # Default action is to bring the demo up
+  start_demo "$1"
  start_demo
 fi
--- a/demos/integrations/spotify_bearer_auth/run_demo.sh
+++ b/demos/integrations/spotify_bearer_auth/run_demo.sh
@ -18,22 +18,24 @@ start_demo() {
    echo ".env file created with OPENAI_API_KEY."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
  # Step 4: Start developer services
  echo "Starting Network Agent using Docker Compose..."
  docker compose up -d  # Run in detached mode
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop Docker Compose services if running
-  echo "Stopping Network Agent using Docker Compose..."
+  docker compose down 2>/dev/null || true
  docker compose down
-  # Step 2: Stop Plano
+  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  # Default action is to bring the demo up
+  start_demo "$1"
  start_demo
 fi
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@ -10,19 +10,58 @@ cd demos/llm_routing/preference_based_routing
 ./run_demo.sh
 ```
-Or manually:
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
 1. Start Plano
 ```bash
-planoai up config.yaml
+./run_demo.sh --with-ui
 ```
-2. Start AnythingLLM
+Then open AnythingLLM at http://localhost:3001/
 Or start manually:
 1. (Optional) Start AnythingLLM and Jaeger
 ```bash
 docker compose up -d
 ```
-3. open AnythingLLM http://localhost:3001/
+2. Start Plano
 ```bash
 planoai up config.yaml
 ```
 3. Test with curl or open AnythingLLM http://localhost:3001/
 ## Running with local Arch-Router (via Ollama)
 By default, Plano uses a hosted Arch-Router endpoint. To self-host Arch-Router locally using Ollama:
 1. Install [Ollama](https://ollama.ai) and pull the model:
 ```bash
 ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
 ```
 2. Make sure Ollama is running (`ollama serve` or the macOS app).
 3. Start Plano with the local config:
 ```bash
 planoai up plano_config_local.yaml
 ```
 4. Test routing:
 ```bash
 curl -s "http://localhost:12000/routing/v1/messages" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gpt-4o-mini",
    "max_tokens": 1024,
    "messages": [
      {"role": "user", "content": "Create a REST API endpoint in Rust using actix-web"}
    ]
  }'
 ```
 You should see the router select the appropriate model based on the routing preferences defined in `plano_config_local.yaml`.
 # Testing out preference based routing
--- a/demos/llm_routing/preference_based_routing/run_demo.sh
+++ b/demos/llm_routing/preference_based_routing/run_demo.sh
@ -24,22 +24,24 @@ start_demo() {
    echo ".env file created with API keys."
  fi
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
  # Jaeger must start before Plano so it can bind the OTEL port (4317)
  if [ "$1" == "--with-ui" ]; then
    echo "Starting UI services (AnythingLLM, Jaeger)..."
    docker compose up -d
  fi
  # Step 4: Start Plano
  echo "Starting Plano with config.yaml..."
  planoai up config.yaml
  # Step 4: Start services
  echo "Starting services using Docker Compose..."
  docker compose up -d
 }
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
+  # Stop Docker Compose services if running
-  echo "Stopping Docker Compose services..."
+  docker compose down 2>/dev/null || true
  docker compose down
-  # Step 2: Stop Plano
+  # Stop Plano
  echo "Stopping Plano..."
  planoai down
 }
@ -48,5 +50,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
  stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@ -228,6 +228,129 @@ In summary, Arch-Router demonstrates:
 - **Production-Ready Performance**: Optimized for low-latency, high-throughput applications in multi-model environments.
 Self-hosting Arch-Router
 ------------------------
 By default, Plano uses a hosted Arch-Router endpoint. To run Arch-Router locally, you can serve the model yourself using either **Ollama** or **vLLM**.
 Using Ollama (recommended for local development)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1. **Install Ollama**
   Download and install from `ollama.ai <https://ollama.ai>`_.
 2. **Pull and serve Arch-Router**
   .. code-block:: bash
       ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
       ollama serve
   This downloads the quantized GGUF model from HuggingFace and starts serving on ``http://localhost:11434``.
 3. **Configure Plano to use local Arch-Router**
   .. code-block:: yaml
       routing:
         model: Arch-Router
         llm_provider: arch-router
       model_providers:
         - name: arch-router
           model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
           base_url: http://localhost:11434
         - model: openai/gpt-5.2
           access_key: $OPENAI_API_KEY
           default: true
         - model: anthropic/claude-sonnet-4-5
           access_key: $ANTHROPIC_API_KEY
           routing_preferences:
             - name: creative writing
               description: creative content generation, storytelling, and writing assistance
 4. **Verify the model is running**
   .. code-block:: bash
       curl http://localhost:11434/v1/models
   You should see ``Arch-Router-1.5B`` listed in the response.
 Using vLLM (recommended for production / EC2)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 vLLM provides higher throughput and GPU optimizations suitable for production deployments.
 1. **Install vLLM**
   .. code-block:: bash
       pip install vllm
 2. **Download the model weights**
   The GGUF weights are downloaded automatically from HuggingFace on first use. To pre-download:
   .. code-block:: bash
       pip install huggingface_hub
       huggingface-cli download katanemo/Arch-Router-1.5B.gguf
 3. **Start the vLLM server**
   After downloading, find the GGUF file and Jinja template in the HuggingFace cache:
   .. code-block:: bash
       # Find the downloaded files
       SNAPSHOT_DIR=$(ls -d ~/.cache/huggingface/hub/models--katanemo--Arch-Router-1.5B.gguf/snapshots/*/ | head -1)
       vllm serve ${SNAPSHOT_DIR}Arch-Router-1.5B-Q4_K_M.gguf \
           --host 0.0.0.0 \
           --port 10000 \
           --load-format gguf \
           --chat-template ${SNAPSHOT_DIR}template.jinja \
           --tokenizer katanemo/Arch-Router-1.5B \
           --served-model-name Arch-Router \
           --gpu-memory-utilization 0.3 \
           --tensor-parallel-size 1 \
           --enable-prefix-caching
 4. **Configure Plano to use the vLLM endpoint**
   .. code-block:: yaml
       routing:
         model: Arch-Router
         llm_provider: arch-router
       model_providers:
         - name: arch-router
           model: Arch-Router
           base_url: http://<your-server-ip>:10000
         - model: openai/gpt-5.2
           access_key: $OPENAI_API_KEY
           default: true
         - model: anthropic/claude-sonnet-4-5
           access_key: $ANTHROPIC_API_KEY
           routing_preferences:
             - name: creative writing
               description: creative content generation, storytelling, and writing assistance
 5. **Verify the server is running**
   .. code-block:: bash
       curl http://localhost:10000/health
       curl http://localhost:10000/v1/models
 Combining Routing Methods
 -------------------------
--- a/tests/e2e/run_e2e_tests.sh
+++ b/tests/e2e/run_e2e_tests.sh
@ -21,10 +21,11 @@ trap 'print_debug' INT TERM ERR
 log starting > ../build.log
-log building and running function_calling demo
+log starting weather_forecast agent natively
 log ===========================================
 cd ../../demos/getting_started/weather_forecast/
-docker compose up weather_forecast_service --build -d
+bash start_agents.sh &
 AGENTS_PID=$!
 cd -
 log building and installing plano cli
@ -78,8 +79,6 @@ log running e2e tests for openai responses api client
 log ========================================
 uv run pytest test_openai_responses_api_client_with_state.py
-log shutting down the weather_forecast demo
+log shutting down the weather_forecast agent
 log =======================================
-cd ../../demos/getting_started/weather_forecast
+kill $AGENTS_PID 2>/dev/null || true
 docker compose down
 cd -
--- a/tests/e2e/run_prompt_gateway_tests.sh
+++ b/tests/e2e/run_prompt_gateway_tests.sh
@ -32,10 +32,11 @@ cd -
 # Re-sync e2e deps
 uv sync
-# Start weather_forecast service (needed for prompt_gateway tests)
+# Start weather_forecast service natively (needed for prompt_gateway tests)
-log "building and running weather_forecast service"
+log "starting weather_forecast agent natively"
 cd ../../demos/getting_started/weather_forecast/
-docker compose up weather_forecast_service --build -d
+bash start_agents.sh &
 AGENTS_PID=$!
 cd -
 # Start gateway with prompt_gateway config
@ -52,6 +53,4 @@ uv run pytest test_prompt_gateway.py
 # Cleanup
 log "shutting down"
 planoai down --docker || true
-cd ../../demos/getting_started/weather_forecast
+kill $AGENTS_PID 2>/dev/null || true
 docker compose down
 cd -