diff --git a/demos/advanced/currency_exchange/run_demo.sh b/demos/advanced/currency_exchange/run_demo.sh
index 6623dee5..e430a1cd 100644
--- a/demos/advanced/currency_exchange/run_demo.sh
+++ b/demos/advanced/currency_exchange/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
-
-  # Step 4: Start developer services
-  echo "Starting Network Agent using Docker Compose..."
-  docker compose up -d  # Run in detached mode
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Network Agent using Docker Compose..."
-  docker compose down
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  # Default action is to bring the demo up
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/advanced/multi_turn_rag/docker-compose.yaml b/demos/advanced/multi_turn_rag/docker-compose.yaml
index 1c3ed73c..f36987e4 100644
--- a/demos/advanced/multi_turn_rag/docker-compose.yaml
+++ b/demos/advanced/multi_turn_rag/docker-compose.yaml
@@ -1,15 +1,4 @@
 services:
-  rag_energy_source_agent:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    ports:
-      - "18083:80"
-    healthcheck:
-        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
-        interval: 5s
-        retries: 20
-
   anythingllm:
     image: mintplexlabs/anythingllm
     restart: always
diff --git a/demos/advanced/multi_turn_rag/pyproject.toml b/demos/advanced/multi_turn_rag/pyproject.toml
new file mode 100644
index 00000000..05824bd6
--- /dev/null
+++ b/demos/advanced/multi_turn_rag/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "multi-turn-rag"
+version = "0.1.0"
+requires-python = ">=3.12"
+dependencies = [
+    "fastapi",
+    "uvicorn",
+    "pydantic>=2.8",
+    "httpx>=0.27",
+    "openai>=1.51",
+    "python-dotenv>=1.0",
+]
diff --git a/demos/advanced/multi_turn_rag/run_demo.sh b/demos/advanced/multi_turn_rag/run_demo.sh
index f9434aa2..5bec6368 100644
--- a/demos/advanced/multi_turn_rag/run_demo.sh
+++ b/demos/advanced/multi_turn_rag/run_demo.sh
@@ -18,22 +18,32 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM)
+  # UI services must start before Plano to avoid OTEL port conflicts
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
 
-  # Step 4: Start Network Agent
-  echo "Starting HR Agent using Docker Compose..."
-  docker compose up -d  # Run in detached mode
+  # Step 5: Start agents natively
+  echo "Starting agents..."
+  bash start_agents.sh &
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping HR Agent using Docker Compose..."
-  docker compose down -v
+  # Stop agents
+  echo "Stopping agents..."
+  pkill -f start_agents.sh 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
+
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,6 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  # Default action is to bring the demo up
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/advanced/multi_turn_rag/start_agents.sh b/demos/advanced/multi_turn_rag/start_agents.sh
new file mode 100755
index 00000000..00b7f1b1
--- /dev/null
+++ b/demos/advanced/multi_turn_rag/start_agents.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+    log "Stopping agents..."
+    for PID in "${PIDS[@]}"; do
+        kill $PID 2>/dev/null && log "Stopped process $PID"
+    done
+    exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+log "Starting rag_energy_source_agent on port 18083..."
+uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+    wait "$PID"
+done
diff --git a/demos/advanced/stock_quote/run_demo.sh b/demos/advanced/stock_quote/run_demo.sh
index 6623dee5..e430a1cd 100644
--- a/demos/advanced/stock_quote/run_demo.sh
+++ b/demos/advanced/stock_quote/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
-
-  # Step 4: Start developer services
-  echo "Starting Network Agent using Docker Compose..."
-  docker compose up -d  # Run in detached mode
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Network Agent using Docker Compose..."
-  docker compose down
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  # Default action is to bring the demo up
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
index e2fe23fb..97d71e7f 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@@ -41,21 +41,36 @@ cd demos/agent_orchestration/multi_agent_crewai_langchain
 ./run_demo.sh
 ```
 
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
 - **CrewAI Flight Agent** (port 10520) - flight search
 - **LangChain Weather Agent** (port 10510) - weather forecasts
-- **AnythingLLM** (port 3001) - chat interface
-- **Jaeger** (port 16686) - distributed tracing
 
 Plano runs natively on the host (ports 12000, 8001).
 
+To also start AnythingLLM (chat UI), Jaeger (tracing), and other optional services:
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- **AnythingLLM** (port 3001) - chat interface
+- **Jaeger** (port 16686) - distributed tracing
+
 ### Try It Out
 
-1. **Open the Chat Interface**
+1. **Using curl**
+   ```bash
+   curl -X POST http://localhost:8001/v1/chat/completions \
+     -H "Content-Type: application/json" \
+     -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "What is the weather in San Francisco?"}]}'
+   ```
+
+2. **Using AnythingLLM (requires `--with-ui`)**
    - Navigate to [http://localhost:3001](http://localhost:3001)
    - Create an account (stored locally)
 
-2. **Ask Multi-Agent Questions**
+3. **Ask Multi-Agent Questions**
    ```
    "What's the weather in San Francisco and can you find flights from Seattle to San Francisco?"
    ```
@@ -65,7 +80,7 @@ Plano runs natively on the host (ports 12000, 8001).
    - Routes the flight part to the CrewAI agent
    - Combines responses seamlessly
 
-3. **View Distributed Traces**
+4. **View Distributed Traces (requires `--with-ui`)**
    - Open [http://localhost:16686](http://localhost:16686) (Jaeger UI)
    - See how requests flow through both agents
 
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
index b3a204f3..ef522337 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
@@ -2,9 +2,9 @@ version: v0.3.0
 
 agents:
   - id: weather_agent
-    url: http://langchain-weather-agent:10510
+    url: http://localhost:10510
   - id: flight_agent
-    url: http://crewai-flight-agent:10520
+    url: http://localhost:10520
 
 model_providers:
   - model: openai/gpt-4o
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
index 2d9c180b..74954562 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
@@ -1,27 +1,5 @@
 
 services:
-  crewai-flight-agent:
-    build:
-      dockerfile: Dockerfile
-    restart: always
-    ports:
-      - "10520:10520"
-    environment:
-      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
-      - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
-      - PYTHONUNBUFFERED=1
-    command: ["python", "-u", "crewai/flight_agent.py"]
-
-  langchain-weather-agent:
-    build:
-      dockerfile: Dockerfile
-    restart: always
-    ports:
-      - "10510:10510"
-    environment:
-      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
-    command: ["python", "-u", "langchain/weather_agent.py"]
-
   anythingllm:
     image: mintplexlabs/anythingllm
     restart: always
@@ -36,6 +14,8 @@ services:
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
 
   jaeger:
     build:
@@ -44,3 +24,4 @@ services:
     ports:
       - "16686:16686"  # Jaeger UI
       - "4317:4317"    # OTLP gRPC receiver
+      - "4318:4318"    # OTLP HTTP receiver
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
index b7dc0fad..35bbbbdd 100755
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
@@ -12,33 +12,38 @@ start_demo() {
       echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
       exit 1
     fi
-    if [ -z "$AEROAPI_KEY" ]; then
-      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
-      exit 1
-    fi
 
     echo "Creating .env file..."
     echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
-    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
     echo ".env file created with API keys."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
 
-  # Step 4: Start agents and services
-  echo "Starting agents using Docker Compose..."
-  docker compose up -d
+  # Step 5: Start agents natively
+  echo "Starting agents..."
+  bash start_agents.sh &
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Docker Compose services..."
-  docker compose down
+  # Stop agents
+  echo "Stopping agents..."
+  pkill -f start_agents.sh 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
+
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -47,5 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
new file mode 100755
index 00000000..78d2fecb
--- /dev/null
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+    log "Stopping agents..."
+    for PID in "${PIDS[@]}"; do
+        kill $PID 2>/dev/null && log "Stopped process $PID"
+    done
+    exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
+
+log "Starting langchain weather_agent on port 10510..."
+uv run python langchain/weather_agent.py &
+PIDS+=($!)
+
+log "Starting crewai flight_agent on port 10520..."
+uv run python crewai/flight_agent.py &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+    wait "$PID"
+done
diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md
index d6468612..7886539d 100644
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@@ -23,9 +23,10 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque
 ## Prerequisites
 
 - [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
-- Docker and Docker Compose (for agent services)
+- [uv](https://docs.astral.sh/uv/) installed (for running agents natively)
 - [OpenAI API key](https://platform.openai.com/api-keys)
 - [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)
+- Docker and Docker Compose (optional, only needed for `--with-ui`)
 
 > **Note:** You'll need to obtain a FlightAware AeroAPI key for live flight data. Visit [https://www.flightaware.com/aeroapi/portal](https://www.flightaware.com/aeroapi/portal) to get your API key.
 
@@ -46,16 +47,34 @@ export OPENAI_API_KEY="your OpenAI api key"
 ./run_demo.sh
 ```
 
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
 - Weather Agent on port 10510
 - Flight Agent on port 10520
-- Open WebUI on port 8080
 
 Plano runs natively on the host (port 8001).
 
+To also start Open WebUI, Jaeger tracing, and other optional services, pass `--with-ui`:
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- Open WebUI on port 8080
+- Jaeger tracing UI on port 16686
+
 ### 4. Test the System
 
-Use Open WebUI at http://localhost:8080
+**Option A: Using curl**
+```bash
+curl -X POST http://localhost:8001/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}'
+```
+
+**Option B: Using Open WebUI (requires `--with-ui`)**
+
+Navigate to http://localhost:8080
 
 > **Note:** The Open WebUI may take a few minutes to start up and be fully ready. Please wait for the container to finish initializing before accessing the interface. Once ready, make sure to select the **gpt-5.2** model from the model dropdown menu in the UI.
 
@@ -102,7 +121,7 @@ Each agent:
 3. Generates response using GPT-5.2
 4. Streams response back to user
 
-Both agents run as Docker containers and communicate with Plano running natively on the host.
+Both agents run as native local processes and communicate with Plano running natively on the host.
 
 ## Observability
 
diff --git a/demos/agent_orchestration/travel_agents/docker-compose.yaml b/demos/agent_orchestration/travel_agents/docker-compose.yaml
index f0fb78e5..66edbdc3 100644
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
@@ -1,32 +1,5 @@
 
 services:
-  weather-agent:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: weather-agent
-    restart: always
-    ports:
-      - "10510:10510"
-    environment:
-      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
-    command: ["uv", "run", "python", "src/travel_agents/weather_agent.py"]
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-  flight-agent:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: flight-agent
-    restart: always
-    ports:
-      - "10520:10520"
-    environment:
-      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
-      - AEROAPI_KEY=${AEROAPI_KEY:? AEROAPI_KEY environment variable is required but not set}
-    command: ["uv", "run", "python", "src/travel_agents/flight_agent.py"]
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
   open-web-ui:
     image: dyrnq/open-webui:main
     restart: always
@@ -40,9 +13,8 @@ services:
       - ENABLE_TITLE_GENERATION=false
       - ENABLE_TAGS_GENERATION=false
       - ENABLE_AUTOCOMPLETE_GENERATION=false
-    depends_on:
-      - weather-agent
-      - flight-agent
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
   jaeger:
     build:
       context: ../../shared/jaeger
diff --git a/demos/agent_orchestration/travel_agents/run_demo.sh b/demos/agent_orchestration/travel_agents/run_demo.sh
index b7dc0fad..643a0aa2 100755
--- a/demos/agent_orchestration/travel_agents/run_demo.sh
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@@ -23,22 +23,32 @@ start_demo() {
     echo ".env file created with API keys."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (Open WebUI, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (Open WebUI, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
 
-  # Step 4: Start agents and services
-  echo "Starting agents using Docker Compose..."
-  docker compose up -d
+  # Step 5: Start agents natively
+  echo "Starting agents..."
+  bash start_agents.sh &
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Docker Compose services..."
-  docker compose down
+  # Stop agents
+  echo "Stopping agents..."
+  pkill -f start_agents.sh 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
+
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -47,5 +57,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/agent_orchestration/travel_agents/start_agents.sh b/demos/agent_orchestration/travel_agents/start_agents.sh
new file mode 100755
index 00000000..4f2e32a7
--- /dev/null
+++ b/demos/agent_orchestration/travel_agents/start_agents.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+    log "Stopping agents..."
+    for PID in "${PIDS[@]}"; do
+        kill $PID 2>/dev/null && log "Stopped process $PID"
+    done
+    exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1
+
+log "Starting weather_agent on port 10510..."
+uv run python src/travel_agents/weather_agent.py &
+PIDS+=($!)
+
+log "Starting flight_agent on port 10520..."
+uv run python src/travel_agents/flight_agent.py &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+    wait "$PID"
+done
diff --git a/demos/filter_chains/http_filter/README.md b/demos/filter_chains/http_filter/README.md
index 5e675113..86748217 100644
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
@@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
 ./run_demo.sh
 ```
 
-This starts Plano natively and brings up via Docker Compose:
-- Input Guards MCP server on port 10500
-- Query Rewriter MCP server on port 10501
-- Context Builder MCP server on port 10502
+This starts Plano natively and runs agents as local processes:
+- Input Guards HTTP server on port 10500
+- Query Rewriter HTTP server on port 10501
+- Context Builder HTTP server on port 10502
 - RAG Agent REST server on port 10505
-- Jaeger UI for viewing traces at http://localhost:16686
-- AnythingLLM at http://localhost:3001 for interactive queries
 
 Plano runs natively on the host (port 8001 and 12000).
 
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- Jaeger UI for viewing traces at http://localhost:16686
+- AnythingLLM at http://localhost:3001 for interactive queries
+
 ### 2. Test the system
 
-**Option A: Using AnythingLLM (recommended)**
-
-Navigate to http://localhost:3001 and send queries through the chat interface.
-
-**Option B: Using curl**
+**Option A: Using curl (recommended)**
 ```bash
 curl -X POST http://localhost:8001/v1/chat/completions \
   -H "Content-Type: application/json" \
diff --git a/demos/filter_chains/http_filter/config.yaml b/demos/filter_chains/http_filter/config.yaml
index 117931e2..014a141a 100644
--- a/demos/filter_chains/http_filter/config.yaml
+++ b/demos/filter_chains/http_filter/config.yaml
@@ -2,23 +2,23 @@ version: v0.3.0
 
 agents:
   - id: rag_agent
-    url: http://rag-agents:10505
+    url: http://localhost:10505
 
 filters:
   - id: input_guards
-    url: http://rag-agents:10500
+    url: http://localhost:10500
     type: http
     # type: mcp (default)
     # transport: streamable-http (default)
     # tool: input_guards (default - same as filter id)
   - id: query_rewriter
-    url: http://rag-agents:10501
+    url: http://localhost:10501
     type: http
     # type: mcp (default)
     # transport: streamable-http (default)
     # tool: query_rewriter (default - same as filter id)
   - id: context_builder
-    url: http://rag-agents:10502
+    url: http://localhost:10502
     type: http
 
 model_providers:
diff --git a/demos/filter_chains/http_filter/docker-compose.yaml b/demos/filter_chains/http_filter/docker-compose.yaml
index 64962bce..0361926c 100644
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
@@ -1,16 +1,4 @@
 services:
-  rag-agents:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    ports:
-      - "10500:10500"
-      - "10501:10501"
-      - "10502:10502"
-      - "10505:10505"
-    environment:
-      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
   jaeger:
     build:
       context: ../../shared/jaeger
@@ -32,3 +20,5 @@ services:
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
diff --git a/demos/filter_chains/http_filter/run_demo.sh b/demos/filter_chains/http_filter/run_demo.sh
index bed84f16..f203f5b1 100755
--- a/demos/filter_chains/http_filter/run_demo.sh
+++ b/demos/filter_chains/http_filter/run_demo.sh
@@ -18,22 +18,32 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
 
-  # Step 4: Start services
-  echo "Starting services using Docker Compose..."
-  docker compose up -d
+  # Step 5: Start agents natively
+  echo "Starting agents..."
+  bash start_agents.sh &
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Docker Compose services..."
-  docker compose down
+  # Stop agents
+  echo "Stopping agents..."
+  pkill -f start_agents.sh 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
+
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,5 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/filter_chains/http_filter/start_agents.sh b/demos/filter_chains/http_filter/start_agents.sh
old mode 100644
new mode 100755
index 06cabeec..8dfdc0f4
--- a/demos/filter_chains/http_filter/start_agents.sh
+++ b/demos/filter_chains/http_filter/start_agents.sh
@@ -1,78 +1,38 @@
-# #!/bin/bash
-# set -e
-
-# WAIT_FOR_PIDS=()
-
-# log() {
-#   timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
-#   message="$*"
-#   echo "$timestamp - $message"
-# }
-
-# cleanup() {
-#     log "Caught signal, terminating all user processes ..."
-#     for PID in "${WAIT_FOR_PIDS[@]}"; do
-#         if kill $PID 2> /dev/null; then
-#             log "killed process: $PID"
-#         fi
-#     done
-#     exit 1
-# }
-
-# trap cleanup EXIT
-
-# log "Starting input_guards agent on port 10500/mcp..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent input_guards &
-# WAIT_FOR_PIDS+=($!)
-
-# log "Starting query_rewriter agent on port 10501/mcp..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent query_rewriter &
-# WAIT_FOR_PIDS+=($!)
-
-# log "Starting context_builder agent on port 10502/mcp..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent context_builder &
-# WAIT_FOR_PIDS+=($!)
-
-# # log "Starting response_generator agent on port 10400..."
-# # uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
-# # WAIT_FOR_PIDS+=($!)
-
-# log "Starting response_generator agent on port 10505..."
-# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
-# WAIT_FOR_PIDS+=($!)
-
-# for PID in "${WAIT_FOR_PIDS[@]}"; do
-#     wait "$PID"
-# done
-
-
-
-
 #!/bin/bash
 set -e
 
-export PYTHONPATH=/app/src
-
-pids=()
+PIDS=()
 
 log() { echo "$(date '+%F %T') - $*"; }
 
-log "Starting input_guards HTTP server on :10500"
+cleanup() {
+    log "Stopping agents..."
+    for PID in "${PIDS[@]}"; do
+        kill $PID 2>/dev/null && log "Stopped process $PID"
+    done
+    exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+export PYTHONPATH=./src
+
+log "Starting input_guards HTTP server on port 10500..."
 uv run uvicorn rag_agent.input_guards:app --host 0.0.0.0 --port 10500 &
-pids+=($!)
+PIDS+=($!)
 
-log "Starting query_rewriter HTTP server on :10501"
+log "Starting query_rewriter HTTP server on port 10501..."
 uv run uvicorn rag_agent.query_rewriter:app --host 0.0.0.0 --port 10501 &
-pids+=($!)
+PIDS+=($!)
 
-log "Starting context_builder HTTP server on :10502"
+log "Starting context_builder HTTP server on port 10502..."
 uv run uvicorn rag_agent.context_builder:app --host 0.0.0.0 --port 10502 &
-pids+=($!)
+PIDS+=($!)
 
-log "Starting response_generator (OpenAI-compatible) on :10505"
+log "Starting response_generator (OpenAI-compatible) on port 10505..."
 uv run uvicorn rag_agent.rag_agent:app --host 0.0.0.0 --port 10505 &
-pids+=($!)
+PIDS+=($!)
 
-for PID in "${pids[@]}"; do
+for PID in "${PIDS[@]}"; do
     wait "$PID"
 done
diff --git a/demos/filter_chains/mcp_filter/README.md b/demos/filter_chains/mcp_filter/README.md
index 5e675113..798015e2 100644
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
@@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key"
 ./run_demo.sh
 ```
 
-This starts Plano natively and brings up via Docker Compose:
+This starts Plano natively and runs agents as local processes:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
-- Jaeger UI for viewing traces at http://localhost:16686
-- AnythingLLM at http://localhost:3001 for interactive queries
 
 Plano runs natively on the host (port 8001 and 12000).
 
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
+
+```bash
+./run_demo.sh --with-ui
+```
+
+This additionally starts:
+- Jaeger UI for viewing traces at http://localhost:16686
+- AnythingLLM at http://localhost:3001 for interactive queries
+
 ### 2. Test the system
 
-**Option A: Using AnythingLLM (recommended)**
-
-Navigate to http://localhost:3001 and send queries through the chat interface.
-
-**Option B: Using curl**
+**Option A: Using curl (recommended)**
 ```bash
 curl -X POST http://localhost:8001/v1/chat/completions \
   -H "Content-Type: application/json" \
diff --git a/demos/filter_chains/mcp_filter/docker-compose.yaml b/demos/filter_chains/mcp_filter/docker-compose.yaml
index 64962bce..0361926c 100644
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
@@ -1,16 +1,4 @@
 services:
-  rag-agents:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    ports:
-      - "10500:10500"
-      - "10501:10501"
-      - "10502:10502"
-      - "10505:10505"
-    environment:
-      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
   jaeger:
     build:
       context: ../../shared/jaeger
@@ -32,3 +20,5 @@ services:
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
diff --git a/demos/filter_chains/mcp_filter/run_demo.sh b/demos/filter_chains/mcp_filter/run_demo.sh
index bed84f16..f203f5b1 100755
--- a/demos/filter_chains/mcp_filter/run_demo.sh
+++ b/demos/filter_chains/mcp_filter/run_demo.sh
@@ -18,22 +18,32 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
 
-  # Step 4: Start services
-  echo "Starting services using Docker Compose..."
-  docker compose up -d
+  # Step 5: Start agents natively
+  echo "Starting agents..."
+  bash start_agents.sh &
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Docker Compose services..."
-  docker compose down
+  # Stop agents
+  echo "Stopping agents..."
+  pkill -f start_agents.sh 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
+
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,5 +52,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/getting_started/llm_gateway/README.md b/demos/getting_started/llm_gateway/README.md
index e87467fc..b29397b6 100644
--- a/demos/getting_started/llm_gateway/README.md
+++ b/demos/getting_started/llm_gateway/README.md
@@ -7,7 +7,14 @@ This demo shows how you can use Plano gateway to manage keys and route to upstre
    ```sh
    sh run_demo.sh
    ```
-1. Navigate to http://localhost:3001/
+1. Test with curl (see example below)
+
+To also start the AnythingLLM chat UI and Jaeger tracing, pass `--with-ui`:
+```sh
+sh run_demo.sh --with-ui
+```
+
+Then navigate to http://localhost:3001/ for AnythingLLM.
 
 Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.
 
@@ -47,7 +54,7 @@ $ curl --header 'Content-Type: application/json' \
 ```
 
 # Observability
-For tracing you can head over to http://localhost:16686/ to view recent traces.
+For tracing, start with `--with-ui` and head over to http://localhost:16686/ to view recent traces.
 
 Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM,
 
diff --git a/demos/getting_started/llm_gateway/run_demo.sh b/demos/getting_started/llm_gateway/run_demo.sh
index b049bf31..e430a1cd 100644
--- a/demos/getting_started/llm_gateway/run_demo.sh
+++ b/demos/getting_started/llm_gateway/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
-
-  # Step 4: Start LLM Routing
-  echo "Starting LLM Routing using Docker Compose..."
-  docker compose up -d  # Run in detached mode
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping LLM Routing using Docker Compose..."
-  docker compose down
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  # Default action is to bring the demo up
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/getting_started/weather_forecast/README.md b/demos/getting_started/weather_forecast/README.md
index 8a9eb6c0..91fa810f 100644
--- a/demos/getting_started/weather_forecast/README.md
+++ b/demos/getting_started/weather_forecast/README.md
@@ -10,15 +10,26 @@ This demo shows how you can use Plano's core function calling capabilities.
 3. ```sh
    sh run_demo.sh
    ```
-4. Navigate to http://localhost:3001/
-5. You can type in queries like "how is the weather?"
+4. Test with curl:
+   ```sh
+   curl http://localhost:10000/v1/chat/completions \
+     -H "Content-Type: application/json" \
+     -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "how is the weather in San Francisco?"}]}'
+   ```
 
 Here is a sample interaction,
 <img width="575" alt="image" src="https://github.com/user-attachments/assets/e0929490-3eb2-4130-ae87-a732aea4d059">
 
-## Tracing
+## Using the Chat UI and Tracing (optional)
 
-To see a tracing dashboard, navigate to http://localhost:16686/ to open Jaeger UI.
+To start AnythingLLM (chat UI) and other optional services, pass `--with-ui`:
+
+```sh
+sh run_demo.sh --with-ui
+```
+
+- Navigate to http://localhost:3001/ for AnythingLLM
+- Navigate to http://localhost:16686/ for Jaeger tracing UI
 
 ### Stopping Demo
 
diff --git a/demos/getting_started/weather_forecast/docker-compose.yaml b/demos/getting_started/weather_forecast/docker-compose.yaml
index 84074ab9..f36987e4 100644
--- a/demos/getting_started/weather_forecast/docker-compose.yaml
+++ b/demos/getting_started/weather_forecast/docker-compose.yaml
@@ -1,14 +1,4 @@
 services:
-  weather_forecast_service:
-    build:
-      context: ./
-    environment:
-      - OLTP_HOST=http://jaeger:4317
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    ports:
-      - "18083:80"
-
   anythingllm:
     image: mintplexlabs/anythingllm
     restart: always
diff --git a/demos/getting_started/weather_forecast/run_demo.sh b/demos/getting_started/weather_forecast/run_demo.sh
index c8eb96e5..c77f2d83 100644
--- a/demos/getting_started/weather_forecast/run_demo.sh
+++ b/demos/getting_started/weather_forecast/run_demo.sh
@@ -72,23 +72,32 @@ start_demo() {
     exit 1
   fi
 
-  # Step 4: Start Plano
+  # Step 4: Optionally start UI services (AnythingLLM, Jaeger, etc.)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ] || [ "$2" == "--with-ui" ]; then
+    echo "Starting UI services with $COMPOSE_FILE..."
+    docker compose -f "$COMPOSE_FILE" up -d
+  fi
+
+  # Step 5: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
 
-  # Step 5: Start Network Agent with the chosen Docker Compose file
-  echo "Starting Network Agent with $COMPOSE_FILE..."
-  docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode
+  # Step 6: Start agents natively
+  echo "Starting agents..."
+  bash start_agents.sh &
 }
 
 # Function to stop the demo
 stop_demo() {
-  echo "Stopping all Docker Compose services..."
+  # Stop agents
+  echo "Stopping agents..."
+  pkill -f start_agents.sh 2>/dev/null || true
 
-  # Stop all services by iterating through all configurations
+  # Stop all Docker Compose services if running
+  echo "Stopping Docker Compose services..."
   for compose_file in ./docker-compose*.yaml; do
-    echo "Stopping services in $compose_file..."
-    docker compose -f "$compose_file" down
+    docker compose -f "$compose_file" down 2>/dev/null || true
   done
 
   # Stop Plano
@@ -101,6 +110,6 @@ if [ "$1" == "down" ]; then
   # Call stop_demo with the second argument as the demo to stop
   stop_demo
 else
-  # Use the argument (jaeger, logfire, signoz) to determine the compose file
-  start_demo "$1"
+  # Use the argument (jaeger, logfire, signoz, --with-ui) to determine the compose file
+  start_demo "$1" "$2"
 fi
diff --git a/demos/getting_started/weather_forecast/start_agents.sh b/demos/getting_started/weather_forecast/start_agents.sh
new file mode 100755
index 00000000..548f2bf7
--- /dev/null
+++ b/demos/getting_started/weather_forecast/start_agents.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+PIDS=()
+
+log() { echo "$(date '+%F %T') - $*"; }
+
+cleanup() {
+    log "Stopping agents..."
+    for PID in "${PIDS[@]}"; do
+        kill $PID 2>/dev/null && log "Stopped process $PID"
+    done
+    exit 0
+}
+
+trap cleanup EXIT INT TERM
+
+log "Starting weather_forecast_service on port 18083..."
+uv run uvicorn main:app --host 0.0.0.0 --port 18083 &
+PIDS+=($!)
+
+for PID in "${PIDS[@]}"; do
+    wait "$PID"
+done
diff --git a/demos/integrations/ollama/run_demo.sh b/demos/integrations/ollama/run_demo.sh
index 6623dee5..5bbf183b 100644
--- a/demos/integrations/ollama/run_demo.sh
+++ b/demos/integrations/ollama/run_demo.sh
@@ -7,33 +7,58 @@ start_demo() {
   if [ -f ".env" ]; then
     echo ".env file already exists. Skipping creation."
   else
-    # Step 2: Create `.env` file and set OpenAI key
+    # Step 2: Create `.env` file and set API keys
     if [ -z "$OPENAI_API_KEY" ]; then
       echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
       exit 1
     fi
+    if [ -z "$ANTHROPIC_API_KEY" ]; then
+      echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
+    fi
 
     echo "Creating .env file..."
     echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
-    echo ".env file created with OPENAI_API_KEY."
+    if [ -n "$ANTHROPIC_API_KEY" ]; then
+      echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+    fi
+    echo ".env file created with API keys."
   fi
 
-  # Step 3: Start Plano
-  echo "Starting Plano with config.yaml..."
-  planoai up config.yaml
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
 
-  # Step 4: Start developer services
-  echo "Starting Network Agent using Docker Compose..."
-  docker compose up -d  # Run in detached mode
+  # Step 4: Start Plano
+  echo "Starting Plano with arch_config_with_aliases.yaml..."
+  planoai up arch_config_with_aliases.yaml
+
+  echo ""
+  echo "Plano started successfully."
+  echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file."
+  echo ""
+  echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \
+    -H \"Authorization: Bearer test-key\" \
+    -H \"Content-Type: application/json\" \
+    -d '{
+      \"model\": \"arch.summarize.v1\",
+      \"max_tokens\": 50,
+      \"messages\": [
+        { \"role\": \"user\",
+          \"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\"
+        }
+      ]
+    }' | jq ."
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Network Agent using Docker Compose..."
-  docker compose down
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,6 +67,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  # Default action is to bring the demo up
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/integrations/spotify_bearer_auth/run_demo.sh b/demos/integrations/spotify_bearer_auth/run_demo.sh
index 6623dee5..e430a1cd 100644
--- a/demos/integrations/spotify_bearer_auth/run_demo.sh
+++ b/demos/integrations/spotify_bearer_auth/run_demo.sh
@@ -18,22 +18,24 @@ start_demo() {
     echo ".env file created with OPENAI_API_KEY."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
-
-  # Step 4: Start developer services
-  echo "Starting Network Agent using Docker Compose..."
-  docker compose up -d  # Run in detached mode
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Network Agent using Docker Compose..."
-  docker compose down
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -42,6 +44,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  # Default action is to bring the demo up
-  start_demo
+  start_demo "$1"
 fi
diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md
index 03d28cee..009002fd 100644
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@@ -10,19 +10,58 @@ cd demos/llm_routing/preference_based_routing
 ./run_demo.sh
 ```
 
-Or manually:
+To also start AnythingLLM (chat UI) and Jaeger (tracing):
 
-1. Start Plano
 ```bash
-planoai up config.yaml
+./run_demo.sh --with-ui
 ```
 
-2. Start AnythingLLM
+Then open AnythingLLM at http://localhost:3001/
+
+Or start manually:
+
+1. (Optional) Start AnythingLLM and Jaeger
 ```bash
 docker compose up -d
 ```
 
-3. open AnythingLLM http://localhost:3001/
+2. Start Plano
+```bash
+planoai up config.yaml
+```
+
+3. Test with curl or open AnythingLLM http://localhost:3001/
+
+## Running with local Arch-Router (via Ollama)
+
+By default, Plano uses a hosted Arch-Router endpoint. To self-host Arch-Router locally using Ollama:
+
+1. Install [Ollama](https://ollama.ai) and pull the model:
+```bash
+ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+```
+
+2. Make sure Ollama is running (`ollama serve` or the macOS app).
+
+3. Start Plano with the local config:
+```bash
+planoai up plano_config_local.yaml
+```
+
+4. Test routing:
+```bash
+curl -s "http://localhost:12000/routing/v1/messages" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "max_tokens": 1024,
+    "messages": [
+      {"role": "user", "content": "Create a REST API endpoint in Rust using actix-web"}
+    ]
+  }'
+```
+
+You should see the router select the appropriate model based on the routing preferences defined in `plano_config_local.yaml`.
 
 # Testing out preference based routing
 
diff --git a/demos/llm_routing/preference_based_routing/run_demo.sh b/demos/llm_routing/preference_based_routing/run_demo.sh
index c9525c26..30e0c67b 100755
--- a/demos/llm_routing/preference_based_routing/run_demo.sh
+++ b/demos/llm_routing/preference_based_routing/run_demo.sh
@@ -24,22 +24,24 @@ start_demo() {
     echo ".env file created with API keys."
   fi
 
-  # Step 3: Start Plano
+  # Step 3: Optionally start UI services (AnythingLLM, Jaeger)
+  # Jaeger must start before Plano so it can bind the OTEL port (4317)
+  if [ "$1" == "--with-ui" ]; then
+    echo "Starting UI services (AnythingLLM, Jaeger)..."
+    docker compose up -d
+  fi
+
+  # Step 4: Start Plano
   echo "Starting Plano with config.yaml..."
   planoai up config.yaml
-
-  # Step 4: Start services
-  echo "Starting services using Docker Compose..."
-  docker compose up -d
 }
 
 # Function to stop the demo
 stop_demo() {
-  # Step 1: Stop Docker Compose services
-  echo "Stopping Docker Compose services..."
-  docker compose down
+  # Stop Docker Compose services if running
+  docker compose down 2>/dev/null || true
 
-  # Step 2: Stop Plano
+  # Stop Plano
   echo "Stopping Plano..."
   planoai down
 }
@@ -48,5 +50,5 @@ stop_demo() {
 if [ "$1" == "down" ]; then
   stop_demo
 else
-  start_demo
+  start_demo "$1"
 fi
diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst
index 188b1e30..41c51b4a 100644
--- a/docs/source/guides/llm_router.rst
+++ b/docs/source/guides/llm_router.rst
@@ -228,6 +228,129 @@ In summary, Arch-Router demonstrates:
 - **Production-Ready Performance**: Optimized for low-latency, high-throughput applications in multi-model environments.
 
 
+Self-hosting Arch-Router
+------------------------
+
+By default, Plano uses a hosted Arch-Router endpoint. To run Arch-Router locally, you can serve the model yourself using either **Ollama** or **vLLM**.
+
+Using Ollama (recommended for local development)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. **Install Ollama**
+
+   Download and install from `ollama.ai <https://ollama.ai>`_.
+
+2. **Pull and serve Arch-Router**
+
+   .. code-block:: bash
+
+       ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+       ollama serve
+
+   This downloads the quantized GGUF model from HuggingFace and starts serving on ``http://localhost:11434``.
+
+3. **Configure Plano to use local Arch-Router**
+
+   .. code-block:: yaml
+
+       routing:
+         model: Arch-Router
+         llm_provider: arch-router
+
+       model_providers:
+         - name: arch-router
+           model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
+           base_url: http://localhost:11434
+
+         - model: openai/gpt-5.2
+           access_key: $OPENAI_API_KEY
+           default: true
+
+         - model: anthropic/claude-sonnet-4-5
+           access_key: $ANTHROPIC_API_KEY
+           routing_preferences:
+             - name: creative writing
+               description: creative content generation, storytelling, and writing assistance
+
+4. **Verify the model is running**
+
+   .. code-block:: bash
+
+       curl http://localhost:11434/v1/models
+
+   You should see ``Arch-Router-1.5B`` listed in the response.
+
+Using vLLM (recommended for production / EC2)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+vLLM provides higher throughput and GPU optimizations suitable for production deployments.
+
+1. **Install vLLM**
+
+   .. code-block:: bash
+
+       pip install vllm
+
+2. **Download the model weights**
+
+   The GGUF weights are downloaded automatically from HuggingFace on first use. To pre-download:
+
+   .. code-block:: bash
+
+       pip install huggingface_hub
+       huggingface-cli download katanemo/Arch-Router-1.5B.gguf
+
+3. **Start the vLLM server**
+
+   After downloading, find the GGUF file and Jinja template in the HuggingFace cache:
+
+   .. code-block:: bash
+
+       # Find the downloaded files
+       SNAPSHOT_DIR=$(ls -d ~/.cache/huggingface/hub/models--katanemo--Arch-Router-1.5B.gguf/snapshots/*/ | head -1)
+
+       vllm serve ${SNAPSHOT_DIR}Arch-Router-1.5B-Q4_K_M.gguf \
+           --host 0.0.0.0 \
+           --port 10000 \
+           --load-format gguf \
+           --chat-template ${SNAPSHOT_DIR}template.jinja \
+           --tokenizer katanemo/Arch-Router-1.5B \
+           --served-model-name Arch-Router \
+           --gpu-memory-utilization 0.3 \
+           --tensor-parallel-size 1 \
+           --enable-prefix-caching
+
+4. **Configure Plano to use the vLLM endpoint**
+
+   .. code-block:: yaml
+
+       routing:
+         model: Arch-Router
+         llm_provider: arch-router
+
+       model_providers:
+         - name: arch-router
+           model: Arch-Router
+           base_url: http://<your-server-ip>:10000
+
+         - model: openai/gpt-5.2
+           access_key: $OPENAI_API_KEY
+           default: true
+
+         - model: anthropic/claude-sonnet-4-5
+           access_key: $ANTHROPIC_API_KEY
+           routing_preferences:
+             - name: creative writing
+               description: creative content generation, storytelling, and writing assistance
+
+5. **Verify the server is running**
+
+   .. code-block:: bash
+
+       curl http://localhost:10000/health
+       curl http://localhost:10000/v1/models
+
+
 Combining Routing Methods
 -------------------------
 
diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh
index c24931f4..a164b7f9 100644
--- a/tests/e2e/run_e2e_tests.sh
+++ b/tests/e2e/run_e2e_tests.sh
@@ -21,10 +21,11 @@ trap 'print_debug' INT TERM ERR
 
 log starting > ../build.log
 
-log building and running function_calling demo
+log starting weather_forecast agent natively
 log ===========================================
 cd ../../demos/getting_started/weather_forecast/
-docker compose up weather_forecast_service --build -d
+bash start_agents.sh &
+AGENTS_PID=$!
 cd -
 
 log building and installing plano cli
@@ -78,8 +79,6 @@ log running e2e tests for openai responses api client
 log ========================================
 uv run pytest test_openai_responses_api_client_with_state.py
 
-log shutting down the weather_forecast demo
+log shutting down the weather_forecast agent
 log =======================================
-cd ../../demos/getting_started/weather_forecast
-docker compose down
-cd -
+kill $AGENTS_PID 2>/dev/null || true
diff --git a/tests/e2e/run_prompt_gateway_tests.sh b/tests/e2e/run_prompt_gateway_tests.sh
index 58d850d8..1e947813 100755
--- a/tests/e2e/run_prompt_gateway_tests.sh
+++ b/tests/e2e/run_prompt_gateway_tests.sh
@@ -32,10 +32,11 @@ cd -
 # Re-sync e2e deps
 uv sync
 
-# Start weather_forecast service (needed for prompt_gateway tests)
-log "building and running weather_forecast service"
+# Start weather_forecast service natively (needed for prompt_gateway tests)
+log "starting weather_forecast agent natively"
 cd ../../demos/getting_started/weather_forecast/
-docker compose up weather_forecast_service --build -d
+bash start_agents.sh &
+AGENTS_PID=$!
 cd -
 
 # Start gateway with prompt_gateway config
@@ -52,6 +53,4 @@ uv run pytest test_prompt_gateway.py
 # Cleanup
 log "shutting down"
 planoai down --docker || true
-cd ../../demos/getting_started/weather_forecast
-docker compose down
-cd -
+kill $AGENTS_PID 2>/dev/null || true