merge main into model-listener-filter-chain

2026-05-18 13:45:15 +02:00 · 2026-03-10 06:52:19 +00:00 · 2026-03-10 06:52:19 +00:00 · aeb8aa9a54
commit aeb8aa9a54
parent 3d2be4f8b7 028a2cd196
99 changed files with 5792 additions and 655 deletions
--- a/demos/advanced/multi_turn_rag/config.yaml
+++ b/demos/advanced/multi_turn_rag/config.yaml
@ -7,7 +7,7 @@ listeners:

 endpoints:
  rag_energy_source_agent:
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
    connect_timeout: 0.005s

 model_providers:
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@ -38,18 +38,17 @@ Plano acts as a **framework-agnostic proxy and data plane** that:
 ```bash
 # From the demo directory
 cd demos/agent_orchestration/multi_agent_crewai_langchain
-
-# Build and start all services
-docker-compose up -d
+./run_demo.sh
 ```

-This starts:
- **Plano** (ports 12000, 8001) - routing and orchestration
+This starts Plano natively and brings up via Docker Compose:
 - **CrewAI Flight Agent** (port 10520) - flight search
 - **LangChain Weather Agent** (port 10510) - weather forecasts
 - **AnythingLLM** (port 3001) - chat interface
 - **Jaeger** (port 16686) - distributed tracing

+Plano runs natively on the host (ports 12000, 8001).
+
 ### Try It Out

 1. **Open the Chat Interface**
@ -116,7 +115,7 @@ This starts:
 ## Cleanup

 ```bash
-docker-compose down
+./run_demo.sh down
 ```

 ## Next Steps
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
@ -1,21 +1,5 @@

 services:
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "8001:8001"
-      - "12000:12000"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
-      - LOG_LEVEL=${LOG_LEVEL:-info}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
  crewai-flight-agent:
    build:
      dockerfile: Dockerfile
@ -23,7 +7,7 @@ services:
    ports:
      - "10520:10520"
    environment:
-      - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
      - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
      - PYTHONUNBUFFERED=1
    command: ["python", "-u", "crewai/flight_agent.py"]
@ -35,7 +19,7 @@ services:
    ports:
      - "10510:10510"
    environment:
-      - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
    command: ["python", "-u", "langchain/weather_agent.py"]

  anythingllm:
@ -48,7 +32,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$AEROAPI_KEY" ]; then
+      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start agents and services
+  echo "Starting agents using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@ -9,7 +9,7 @@ This demo consists of two intelligent agents that work together seamlessly:
 - **Weather Agent** - Real-time weather conditions and multi-day forecasts for any city worldwide
 - **Flight Agent** - Live flight information between airports with real-time tracking

-All agents use Plano's agent orchestration LLM to intelligently route user requests to the appropriate specialized agent based on conversation context and user intent. Both agents run as Docker containers for easy deployment.
+All agents use Plano's agent orchestration LLM to intelligently route user requests to the appropriate specialized agent based on conversation context and user intent.

 ## Features

@ -22,8 +22,8 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque

 ## Prerequisites

- Docker and Docker Compose
- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed
+- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
+- Docker and Docker Compose (for agent services)
 - [OpenAI API key](https://platform.openai.com/api-keys)
 - [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)

@ -40,17 +40,18 @@ export AEROAPI_KEY="your-flightaware-api-key"
 export OPENAI_API_KEY="your OpenAI api key"
 ```

-### 2. Start All Agents & Plano with Docker
+### 2. Start the Demo

 ```bash
-docker compose up --build
+./run_demo.sh
 ```

-This starts:
+This starts Plano natively and brings up via Docker Compose:
 - Weather Agent on port 10510
 - Flight Agent on port 10520
 - Open WebUI on port 8080
- Plano Proxy on port 8001
+
+Plano runs natively on the host (port 8001).

 ### 4. Test the System

@ -92,7 +93,7 @@ Assistant: [Both weather_agent and flight_agent respond simultaneously]
 Weather     Flight
  Agent       Agent
 (10510)     (10520)
- [Docker]    [Docker]
+ (10510)     (10520)
 ```

 Each agent:
@ -101,7 +102,7 @@ Each agent:
 3. Generates response using GPT-5.2
 4. Streams response back to user

-Both agents run as Docker containers and communicate with Plano via `host.docker.internal`.
+Both agents run as Docker containers and communicate with Plano running natively on the host.

 ## Observability

--- a/demos/agent_orchestration/travel_agents/config.yaml
+++ b/demos/agent_orchestration/travel_agents/config.yaml
@ -2,9 +2,9 @@ version: v0.3.0

 agents:
  - id: weather_agent
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510
  - id: flight_agent
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520

 model_providers:
  - model: openai/gpt-5.2
@ -55,3 +55,6 @@ listeners:

 tracing:
  random_sampling: 100
+  span_attributes:
+    header_prefixes:
+      - x-acme-
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
@ -1,18 +1,5 @@

 services:
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
  weather-agent:
    build:
      context: .
--- a/demos/agent_orchestration/travel_agents/run_demo.sh
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$AEROAPI_KEY" ]; then
+      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start agents and services
+  echo "Starting agents using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/agent_orchestration/travel_agents/test.rest
+++ b/demos/agent_orchestration/travel_agents/test.rest
@ -3,9 +3,16 @@
 ### Travel Agent Chat Completion Request
 POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
 Content-Type: application/json
+X-Acme-Workspace-Id: ws_7e2c5d91b4224f59b0e6a4e0125c21b3
+X-Acme-Tenant-Id: ten_4102a8c7fa6542b084b395d2df184a9a
+X-Acme-User-Id: usr_19df7e6751b846f9ba026776e3c12abe
+X-Acme-Admin-Level: 3
+X-Acme-Environment: production
+X-Acme-Is-Internal: false
+X-Acme-Cost-Center: HD100

 {
-  "model": "gpt-4o",
+  "model": "gpt-5.2",
  "messages": [
    {
      "role": "user",
@ -20,7 +27,28 @@ Content-Type: application/json
      "content": "What is one Alaska flight that goes direct to Atlanta from Seattle?"
    }
  ],
-  "max_tokens": 1000,
+  "max_completion_tokens": 1000,
+  "stream": false,
+  "temperature": 1.0
+}
+
+
+### Travel Agent Request (prefix mismatch - ignored)
+POST {{llm_endpoint}}/v1/chat/completions HTTP/1.1
+Content-Type: application/json
+X-Other-Workspace-Id: ws_7e2c5d91b4224f59b0e6a4e0125c21b3
+X-Other-Tenant-Id: ten_4102a8c7fa6542b084b395d2df184a9a
+X-Other-User-Id: usr_19df7e6751b846f9ba026776e3c12abe
+
+{
+  "model": "gpt-5.2",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What's the weather in Seattle?"
+    }
+  ],
+  "max_completion_tokens": 1000,
  "stream": false,
  "temperature": 1.0
 }
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
@ -35,21 +35,21 @@ This demo consists of four components:

 ## Quick Start

-### 1. Start everything with Docker Compose
+### 1. Start the demo
 ```bash
-docker compose up --build
+export OPENAI_API_KEY="your-key"
+./run_demo.sh
 ```

-This brings up:
+This starts Plano natively and brings up via Docker Compose:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
- Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries

-> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
+Plano runs natively on the host (port 8001 and 12000).

 ### 2. Test the system

@ -74,16 +74,16 @@ The `config.yaml` defines how agents are connected:
 ```yaml
 filters:
  - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # tool: input_guards (default - same as filter id)

  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp (default)

  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 ```

 ## How It Works
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
@ -11,19 +11,6 @@ services:
    environment:
      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
  jaeger:
    build:
      context: ../../shared/jaeger
@ -41,7 +28,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/filter_chains/http_filter/run_demo.sh
+++ b/demos/filter_chains/http_filter/run_demo.sh
@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
@ -35,21 +35,21 @@ This demo consists of four components:

 ## Quick Start

-### 1. Start everything with Docker Compose
+### 1. Start the demo
 ```bash
-docker compose up --build
+export OPENAI_API_KEY="your-key"
+./run_demo.sh
 ```

-This brings up:
+This starts Plano natively and brings up via Docker Compose:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
- Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries

-> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
+Plano runs natively on the host (port 8001 and 12000).

 ### 2. Test the system

@ -74,16 +74,16 @@ The `config.yaml` defines how agents are connected:
 ```yaml
 filters:
  - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # tool: input_guards (default - same as filter id)

  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp (default)

  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 ```

 ## How It Works
--- a/demos/filter_chains/mcp_filter/config.yaml
+++ b/demos/filter_chains/mcp_filter/config.yaml
@ -2,21 +2,21 @@ version: v0.3.0

 agents:
  - id: rag_agent
-    url: http://host.docker.internal:10505
+    url: http://localhost:10505

 filters:
  - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: input_guards (default - same as filter id)
  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: query_rewriter (default - same as filter id)
  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502

 model_providers:
  - model: openai/gpt-4o-mini
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
@ -11,21 +11,6 @@ services:
    environment:
      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "11000:11000"
-      - "12001:12001"
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
  jaeger:
    build:
      context: ../../shared/jaeger
@ -43,7 +28,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/filter_chains/mcp_filter/run_demo.sh
+++ b/demos/filter_chains/mcp_filter/run_demo.sh
@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/getting_started/llm_gateway/config.yaml
+++ b/demos/getting_started/llm_gateway/config.yaml
@ -44,7 +44,7 @@ model_providers:
    access_key: $TOGETHER_API_KEY

  - model: custom/test-model
-    base_url: http://host.docker.internal:11223
+    base_url: http://localhost:11223
    provider_interface: openai

 tracing:
--- a/demos/getting_started/llm_gateway/docker-compose.yaml
+++ b/demos/getting_started/llm_gateway/docker-compose.yaml
@ -1,20 +1,5 @@
 services:

-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "12001:12001"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
@ -25,7 +10,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/getting_started/weather_forecast/config.yaml
+++ b/demos/getting_started/weather_forecast/config.yaml
@ -11,7 +11,7 @@ listeners:

 endpoints:
  weather_forecast_service:
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
    connect_timeout: 0.005s

 overrides:
--- a/demos/integrations/ollama/config.yaml
+++ b/demos/integrations/ollama/config.yaml
@ -9,7 +9,7 @@ model_providers:

  - model: my_llm_provider/llama3.2
    provider_interface: openai
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
    default: true

 system_prompt: |
--- a/demos/llm_routing/claude_code_router/README.md
+++ b/demos/llm_routing/claude_code_router/README.md
@ -39,8 +39,8 @@ Your Request → Plano → Suitable Model → Response
 # Install Claude Code if you haven't already
 npm install -g @anthropic-ai/claude-code

-# Ensure Docker is running
-docker --version
+# Install Plano CLI
+pip install planoai
 ```

 ### Step 1: Get Configuration
--- a/demos/llm_routing/claude_code_router/config.yaml
+++ b/demos/llm_routing/claude_code_router/config.yaml
@ -28,7 +28,7 @@ model_providers:

  # Ollama Models
  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434


 # Model aliases - friendly names that map to actual provider names
--- a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+++ b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
@ -49,7 +49,7 @@ model_providers:

  # Ollama Models
  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434

  # Grok (xAI) Models
  - model: xai/grok-4-0709
--- a/demos/llm_routing/model_routing_service/README.md
+++ b/demos/llm_routing/model_routing_service/README.md
@ -0,0 +1,92 @@
+# Model Routing Service Demo
+
+This demo shows how to use the `/routing/v1/*` endpoints to get routing decisions without proxying requests to an LLM. The endpoint accepts standard LLM request formats and returns which model Plano's router would select.
+
+## Setup
+
+Make sure you have Plano CLI installed (`pip install planoai` or `uv tool install planoai`).
+
+```bash
+export OPENAI_API_KEY=<your-key>
+export ANTHROPIC_API_KEY=<your-key>
+```
+
+Start Plano:
+```bash
+cd demos/llm_routing/model_routing_service
+planoai up config.yaml
+```
+
+## Run the demo
+
+```bash
+./demo.sh
+```
+
+## Endpoints
+
+All three LLM API formats are supported:
+
+| Endpoint | Format |
+|---|---|
+| `POST /routing/v1/chat/completions` | OpenAI Chat Completions |
+| `POST /routing/v1/messages` | Anthropic Messages |
+| `POST /routing/v1/responses` | OpenAI Responses API |
+
+## Example
+
+```bash
+curl http://localhost:12000/routing/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "messages": [{"role": "user", "content": "Write a Python function for binary search"}]
+  }'
+```
+
+Response:
+```json
+{
+    "model": "anthropic/claude-sonnet-4-20250514",
+    "route": "code_generation",
+    "trace_id": "c16d1096c1af4a17abb48fb182918a88"
+}
+```
+
+The response tells you which model would handle this request and which route was matched, without actually making the LLM call.
+
+## Demo Output
+
+```
+=== Model Routing Service Demo ===
+
+--- 1. Code generation query (OpenAI format) ---
+{
+    "model": "anthropic/claude-sonnet-4-20250514",
+    "route": "code_generation",
+    "trace_id": "c16d1096c1af4a17abb48fb182918a88"
+}
+
+--- 2. Complex reasoning query (OpenAI format) ---
+{
+    "model": "openai/gpt-4o",
+    "route": "complex_reasoning",
+    "trace_id": "30795e228aff4d7696f082ed01b75ad4"
+}
+
+--- 3. Simple query - no routing match (OpenAI format) ---
+{
+    "model": "none",
+    "route": null,
+    "trace_id": "ae0b6c3b220d499fb5298ac63f4eac0e"
+}
+
+--- 4. Code generation query (Anthropic format) ---
+{
+    "model": "anthropic/claude-sonnet-4-20250514",
+    "route": "code_generation",
+    "trace_id": "26be822bbdf14a3ba19fe198e55ea4a9"
+}
+
+=== Demo Complete ===
+```
--- a/demos/llm_routing/model_routing_service/config.yaml
+++ b/demos/llm_routing/model_routing_service/config.yaml
@ -0,0 +1,27 @@
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: complex_reasoning
+        description: complex reasoning tasks, multi-step analysis, or detailed explanations
+
+  - model: anthropic/claude-sonnet-4-20250514
+    access_key: $ANTHROPIC_API_KEY
+    routing_preferences:
+      - name: code_generation
+        description: generating new code, writing functions, or creating boilerplate
+
+tracing:
+  random_sampling: 100
--- a/demos/llm_routing/model_routing_service/demo.sh
+++ b/demos/llm_routing/model_routing_service/demo.sh
@ -0,0 +1,65 @@
+#!/bin/bash
+set -e
+
+PLANO_URL="${PLANO_URL:-http://localhost:12000}"
+
+echo "=== Model Routing Service Demo ==="
+echo ""
+echo "This demo shows how to use the /routing/v1/* endpoints to get"
+echo "routing decisions without actually proxying the request to an LLM."
+echo ""
+
+# --- Example 1: OpenAI Chat Completions format ---
+echo "--- 1. Code generation query (OpenAI format) ---"
+echo ""
+curl -s "$PLANO_URL/routing/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "messages": [
+      {"role": "user", "content": "Write a Python function that implements binary search on a sorted array"}
+    ]
+  }' | python3 -m json.tool
+echo ""
+
+# --- Example 2: Complex reasoning query ---
+echo "--- 2. Complex reasoning query (OpenAI format) ---"
+echo ""
+curl -s "$PLANO_URL/routing/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "messages": [
+      {"role": "user", "content": "Explain the trade-offs between microservices and monolithic architectures, considering scalability, team structure, and operational complexity"}
+    ]
+  }' | python3 -m json.tool
+echo ""
+
+# --- Example 3: Simple query (no routing match) ---
+echo "--- 3. Simple query - no routing match (OpenAI format) ---"
+echo ""
+curl -s "$PLANO_URL/routing/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "messages": [
+      {"role": "user", "content": "What is the capital of France?"}
+    ]
+  }' | python3 -m json.tool
+echo ""
+
+# --- Example 4: Anthropic Messages format ---
+echo "--- 4. Code generation query (Anthropic format) ---"
+echo ""
+curl -s "$PLANO_URL/routing/v1/messages" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "max_tokens": 1024,
+    "messages": [
+      {"role": "user", "content": "Create a REST API endpoint in Rust using actix-web that handles user registration"}
+    ]
+  }' | python3 -m json.tool
+echo ""
+
+echo "=== Demo Complete ==="
--- a/demos/llm_routing/openclaw_routing/README.md
+++ b/demos/llm_routing/openclaw_routing/README.md
@ -23,7 +23,6 @@ Plano uses a [preference-aligned router](https://arxiv.org/abs/2506.16655) to an

 ## Prerequisites

- **Docker** running
 - **Plano CLI**: `uv tool install planoai` or `pip install planoai`
 - **OpenClaw**: `npm install -g openclaw@latest`
 - **API keys**:
@ -43,7 +42,7 @@ export ANTHROPIC_API_KEY="your-anthropic-key"

 ```bash
 cd demos/llm_routing/openclaw_routing
-planoai up --service plano --foreground
+planoai up config.yaml
 ```

 ### 3. Set Up OpenClaw
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@ -3,25 +3,23 @@ This demo shows how you can use user preferences to route user prompts to approp

 ## How to start the demo

-Make sure your machine is up to date with [latest version of plano]([url](https://github.com/katanemo/plano/tree/main?tab=readme-ov-file#prerequisites)). And you have activated the virtual environment.
+Make sure you have Plano CLI installed (`pip install planoai` or `uv tool install planoai`).

-
-1. start anythingllm
 ```bash
-(venv) $ cd demos/llm_routing/preference_based_routing
-(venv) $ docker compose up -d
+cd demos/llm_routing/preference_based_routing
+./run_demo.sh
 ```
-2. start plano in the foreground
+
+Or manually:
+
+1. Start Plano
 ```bash
-(venv) $ planoai up --service plano --foreground
-# Or if installed with uv: uvx planoai up --service plano --foreground
-2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.8
-2025-05-30 18:00:09,953 - planoai.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/llm_routing/preference_based_routing/config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.8
-2025-05-30 18:00:10,662 - cli.core - INFO - plano status: running, health status: starting
-2025-05-30 18:00:11,712 - cli.core - INFO - plano status: running, health status: starting
-2025-05-30 18:00:12,761 - cli.core - INFO - plano is running and is healthy!
-...
+planoai up config.yaml
+```
+
+2. Start AnythingLLM
+```bash
+docker compose up -d
 ```

 3. open AnythingLLM http://localhost:3001/
--- a/demos/llm_routing/preference_based_routing/docker-compose.yaml
+++ b/demos/llm_routing/preference_based_routing/docker-compose.yaml
@ -1,23 +1,5 @@
 services:

-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "12001:12001"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?ANTHROPIC_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
-      - OTEL_TRACING_ENABLED=true
-      - RUST_LOG=debug
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
@ -28,7 +10,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml
+++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml
@ -13,7 +13,7 @@ model_providers:

  - name: arch-router
    model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434

  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY
--- a/demos/llm_routing/preference_based_routing/run_demo.sh
+++ b/demos/llm_routing/preference_based_routing/run_demo.sh
@ -0,0 +1,52 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$ANTHROPIC_API_KEY" ]; then
+      echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    if [ -n "$ANTHROPIC_API_KEY" ]; then
+      echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+    fi
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/shared/test_runner/run_demo_tests.sh
+++ b/demos/shared/test_runner/run_demo_tests.sh
@ -21,7 +21,7 @@ do
  echo "****************************************"
  cd ../../$demo
  echo "starting plano"
-  planoai up config.yaml
+  planoai up --docker config.yaml
  echo "starting docker containers"
  # only execute docker compose if demo is llm_routing/preference_based_routing
  if [ "$demo" == "llm_routing/preference_based_routing" ]; then
@ -38,7 +38,7 @@ do
    exit 1
  fi
  echo "stopping docker containers and plano"
-  planoai down
+  planoai down --docker
  docker compose down -v
  cd ../../shared/test_runner
 done