From f63d5de02c9717cca5f5b039ffa34984c6b02bb9 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Thu, 5 Mar 2026 07:35:25 -0800
Subject: [PATCH] Run plano natively by default (#744)

---
 .github/workflows/ci.yml                      |  65 ++-
 .github/workflows/publish-binaries.yml        | 109 +++++
 Dockerfile                                    |   7 +-
 cli/planoai/consts.py                         |  14 +-
 cli/planoai/core.py                           |   5 +-
 cli/planoai/docker_cli.py                     |  26 +-
 cli/planoai/main.py                           | 173 ++++++-
 cli/planoai/native_binaries.py                | 308 +++++++++++++
 cli/planoai/native_runner.py                  | 434 ++++++++++++++++++
 .../templates/coding_agent_routing.yaml       |   2 +-
 cli/pyproject.toml                            |   4 +
 config/test_passthrough.yaml                  |   4 +-
 config/validate_plano_config.sh               |  22 +-
 demos/advanced/multi_turn_rag/config.yaml     |   2 +-
 .../multi_agent_crewai_langchain/README.md    |  11 +-
 .../docker-compose.yaml                       |  22 +-
 .../multi_agent_crewai_langchain/run_demo.sh  |  51 ++
 .../travel_agents/README.md                   |  19 +-
 .../travel_agents/config.yaml                 |   4 +-
 .../travel_agents/docker-compose.yaml         |  13 -
 .../travel_agents/run_demo.sh                 |  51 ++
 demos/filter_chains/http_filter/README.md     |  16 +-
 .../http_filter/docker-compose.yaml           |  15 +-
 demos/filter_chains/http_filter/run_demo.sh   |  46 ++
 demos/filter_chains/mcp_filter/README.md      |  16 +-
 demos/filter_chains/mcp_filter/config.yaml    |   8 +-
 .../mcp_filter/docker-compose.yaml            |  17 +-
 demos/filter_chains/mcp_filter/run_demo.sh    |  46 ++
 demos/getting_started/llm_gateway/config.yaml |   2 +-
 .../llm_gateway/docker-compose.yaml           |  17 +-
 .../weather_forecast/config.yaml              |   2 +-
 demos/integrations/ollama/config.yaml         |   2 +-
 .../llm_routing/claude_code_router/README.md  |   4 +-
 .../claude_code_router/config.yaml            |   2 +-
 .../config_with_aliases.yaml                  |   2 +-
 demos/llm_routing/openclaw_routing/README.md  |   3 +-
 .../preference_based_routing/README.md        |  28 +-
 .../docker-compose.yaml                       |  20 +-
 .../plano_config_local.yaml                   |   2 +-
 .../preference_based_routing/run_demo.sh      |  52 +++
 demos/shared/test_runner/run_demo_tests.sh    |   4 +-
 .../agent/function-calling-agent.yaml         |   2 +-
 .../concepts/llm_providers/model_aliases.rst  |   2 +-
 .../llm_providers/supported_providers.rst     |   4 +-
 docs/source/get_started/quickstart.rst        |  33 +-
 .../guides/observability/monitoring.rst       |   2 +-
 docs/source/resources/deployment.rst          |  49 +-
 .../includes/agents/agents_config.yaml        |   4 +-
 .../includes/plano_config_agents_filters.yaml |   6 +-
 .../includes/plano_config_full_reference.yaml |   6 +-
 .../plano_config_full_reference_rendered.yaml |  14 +-
 tests/e2e/config_native_smoke.yaml            |  11 +
 tests/e2e/run_e2e_tests.sh                    |  12 +-
 tests/e2e/run_model_alias_tests.sh            |   6 +-
 tests/e2e/run_prompt_gateway_tests.sh         |   6 +-
 tests/e2e/run_responses_state_tests.sh        |   6 +-
 56 files changed, 1557 insertions(+), 256 deletions(-)
 create mode 100644 .github/workflows/publish-binaries.yml
 create mode 100644 cli/planoai/native_binaries.py
 create mode 100644 cli/planoai/native_runner.py
 create mode 100755 demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
 create mode 100755 demos/agent_orchestration/travel_agents/run_demo.sh
 create mode 100755 demos/filter_chains/http_filter/run_demo.sh
 create mode 100755 demos/filter_chains/mcp_filter/run_demo.sh
 create mode 100755 demos/llm_routing/preference_based_routing/run_demo.sh
 create mode 100644 tests/e2e/config_native_smoke.yaml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9bbca4e7..cf1ebd49 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -53,6 +53,60 @@ jobs:
       - name: Run tests
         run: uv run pytest
 
+  # ──────────────────────────────────────────────
+  # Native mode smoke test — build from source & start natively
+  # ──────────────────────────────────────────────
+  native-smoke-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-wasip1
+
+      - name: Install planoai CLI
+        working-directory: ./cli
+        run: |
+          uv sync
+          uv tool install .
+
+      - name: Build native binaries
+        run: planoai build
+
+      - name: Start plano natively
+        env:
+          OPENAI_API_KEY: test-key-not-used
+        run: planoai up tests/e2e/config_native_smoke.yaml
+
+      - name: Health check
+        run: |
+          for i in $(seq 1 30); do
+            if curl -sf http://localhost:12000/healthz > /dev/null 2>&1; then
+              echo "Health check passed"
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Health check failed after 30s"
+          cat ~/.plano/run/logs/envoy.log || true
+          cat ~/.plano/run/logs/brightstaff.log || true
+          exit 1
+
+      - name: Stop plano
+        if: always()
+        run: planoai down || true
+
   # ──────────────────────────────────────────────
   # Single Docker build — shared by all downstream jobs
   # ──────────────────────────────────────────────
@@ -98,7 +152,6 @@ jobs:
   # Validate plano config
   # ──────────────────────────────────────────────
   validate-config:
-    needs: docker-build
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -109,14 +162,8 @@ jobs:
         with:
           python-version: "3.14"
 
-      - name: Download plano image
-        uses: actions/download-artifact@v7
-        with:
-          name: plano-image
-          path: /tmp
-
-      - name: Load plano image
-        run: docker load -i /tmp/plano-image.tar
+      - name: Install planoai
+        run: pip install ./cli
 
       - name: Validate plano config
         run: bash config/validate_plano_config.sh
diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml
new file mode 100644
index 00000000..f2dab920
--- /dev/null
+++ b/.github/workflows/publish-binaries.yml
@@ -0,0 +1,109 @@
+name: Publish pre-compiled binaries (release)
+
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Release tag to upload binaries to (e.g. 0.4.9)"
+        required: true
+
+permissions:
+  contents: write
+
+jobs:
+  build-wasm-plugins:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-wasip1
+
+      - name: Build WASM plugins
+        working-directory: crates
+        run: cargo build --release --target wasm32-wasip1 -p llm_gateway -p prompt_gateway
+
+      - name: Compress and upload WASM plugins to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gzip -k crates/target/wasm32-wasip1/release/prompt_gateway.wasm
+          gzip -k crates/target/wasm32-wasip1/release/llm_gateway.wasm
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            crates/target/wasm32-wasip1/release/prompt_gateway.wasm.gz \
+            crates/target/wasm32-wasip1/release/llm_gateway.wasm.gz \
+            --clobber
+
+  build-brightstaff-linux-amd64:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build brightstaff
+        working-directory: crates
+        run: cargo build --release -p brightstaff
+
+      - name: Compress and upload brightstaff to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cp crates/target/release/brightstaff brightstaff-linux-amd64
+          gzip brightstaff-linux-amd64
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            brightstaff-linux-amd64.gz \
+            --clobber
+
+  build-brightstaff-linux-arm64:
+    runs-on: [linux-arm64]
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build brightstaff
+        working-directory: crates
+        run: cargo build --release -p brightstaff
+
+      - name: Compress and upload brightstaff to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cp crates/target/release/brightstaff brightstaff-linux-arm64
+          gzip brightstaff-linux-arm64
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            brightstaff-linux-arm64.gz \
+            --clobber
+
+  build-brightstaff-darwin-arm64:
+    runs-on: macos-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build brightstaff
+        working-directory: crates
+        run: cargo build --release -p brightstaff
+
+      - name: Compress and upload brightstaff to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cp crates/target/release/brightstaff brightstaff-darwin-arm64
+          gzip brightstaff-darwin-arm64
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            brightstaff-darwin-arm64.gz \
+            --clobber
diff --git a/Dockerfile b/Dockerfile
index faeeec4d..43bb5d0c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,3 +1,6 @@
+# Envoy version — keep in sync with cli/planoai/consts.py ENVOY_VERSION
+ARG ENVOY_VERSION=v1.37.0
+
 # --- Dependency cache ---
 FROM rust:1.93.0 AS deps
 RUN rustup -v target add wasm32-wasip1
@@ -40,7 +43,7 @@ COPY crates/brightstaff/src    brightstaff/src
 RUN find common hermesllm brightstaff -name "*.rs" -exec touch {} +
 RUN cargo build --release -p brightstaff
 
-FROM docker.io/envoyproxy/envoy:v1.37.0 AS envoy
+FROM docker.io/envoyproxy/envoy:${ENVOY_VERSION} AS envoy
 
 FROM python:3.14-slim AS arch
 
@@ -66,6 +69,8 @@ RUN pip install --no-cache-dir uv
 COPY cli/pyproject.toml ./
 COPY cli/uv.lock ./
 COPY cli/README.md ./
+COPY config/plano_config_schema.yaml /config/plano_config_schema.yaml
+COPY config/envoy.template.yaml /config/envoy.template.yaml
 
 RUN uv run pip install --no-cache-dir .
 
diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py
index 0cc5c3dd..13bf2d16 100644
--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@@ -6,4 +6,16 @@ PLANO_COLOR = "#969FF4"
 SERVICE_NAME_ARCHGW = "plano"
 PLANO_DOCKER_NAME = "plano"
 PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.9")
-DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"
+DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
+
+# Native mode constants
+PLANO_HOME = os.path.join(os.path.expanduser("~"), ".plano")
+PLANO_RUN_DIR = os.path.join(PLANO_HOME, "run")
+PLANO_BIN_DIR = os.path.join(PLANO_HOME, "bin")
+PLANO_PLUGINS_DIR = os.path.join(PLANO_HOME, "plugins")
+ENVOY_VERSION = "v1.37.0"  # keep in sync with Dockerfile ARG ENVOY_VERSION
+NATIVE_PID_FILE = os.path.join(PLANO_RUN_DIR, "plano.pid")
+DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
+
+PLANO_GITHUB_REPO = "katanemo/archgw"
+PLANO_RELEASE_BASE_URL = f"https://github.com/{PLANO_GITHUB_REPO}/releases/download"
diff --git a/cli/planoai/core.py b/cli/planoai/core.py
index b7195f46..e9ddc7bd 100644
--- a/cli/planoai/core.py
+++ b/cli/planoai/core.py
@@ -33,8 +33,11 @@ def _get_gateway_ports(plano_config_file: str) -> list[int]:
     with open(plano_config_file) as f:
         plano_config_dict = yaml.safe_load(f)
 
+    model_providers = plano_config_dict.get("llm_providers") or plano_config_dict.get(
+        "model_providers"
+    )
     listeners, _, _ = convert_legacy_listeners(
-        plano_config_dict.get("listeners"), plano_config_dict.get("llm_providers")
+        plano_config_dict.get("listeners"), model_providers
     )
 
     all_ports = [listener.get("port") for listener in listeners]
diff --git a/cli/planoai/docker_cli.py b/cli/planoai/docker_cli.py
index f691cfb4..0e66c781 100644
--- a/cli/planoai/docker_cli.py
+++ b/cli/planoai/docker_cli.py
@@ -40,11 +40,35 @@ def docker_remove_container(container: str) -> str:
     return result.returncode
 
 
+def _prepare_docker_config(plano_config_file: str) -> str:
+    """Copy config to a temp file, replacing localhost with host.docker.internal.
+
+    Configs use localhost for native-first mode, but Docker containers need
+    host.docker.internal to reach services on the host.
+    """
+    import tempfile
+
+    with open(plano_config_file, "r") as f:
+        content = f.read()
+
+    if "localhost" not in content:
+        return plano_config_file
+
+    content = content.replace("localhost", "host.docker.internal")
+    tmp = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".yaml", prefix="plano_config_", delete=False
+    )
+    tmp.write(content)
+    tmp.close()
+    return tmp.name
+
+
 def docker_start_plano_detached(
     plano_config_file: str,
     env: dict,
     gateway_ports: list[int],
 ) -> str:
+    docker_config = _prepare_docker_config(plano_config_file)
     env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
 
     port_mappings = [
@@ -58,7 +82,7 @@ def docker_start_plano_detached(
     port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
 
     volume_mappings = [
-        f"{plano_config_file}:/app/plano_config.yaml:ro",
+        f"{docker_config}:/app/plano_config.yaml:ro",
     ]
     volume_mappings_args = [
         item for volume in volume_mappings for item in ("-v", volume)
diff --git a/cli/planoai/main.py b/cli/planoai/main.py
index a93e4c4d..82d2039f 100644
--- a/cli/planoai/main.py
+++ b/cli/planoai/main.py
@@ -30,6 +30,7 @@ from planoai.init_cmd import init as init_cmd
 from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
 from planoai.consts import (
     DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
+    DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
     PLANO_DOCKER_IMAGE,
     PLANO_DOCKER_NAME,
 )
@@ -130,7 +131,13 @@ def main(ctx, version):
 
 
 @click.command()
-def build():
+@click.option(
+    "--docker",
+    default=False,
+    help="Build the Docker image instead of native binaries.",
+    is_flag=True,
+)
+def build(docker):
     """Build Plano from source. Works from any directory within the repo."""
 
     # Find the repo root
@@ -141,6 +148,68 @@ def build():
         )
         sys.exit(1)
 
+    if not docker:
+        import shutil
+
+        crates_dir = os.path.join(repo_root, "crates")
+        console = _console()
+        _print_cli_header(console)
+
+        if not shutil.which("cargo"):
+            console.print(
+                "[red]✗[/red] [bold]cargo[/bold] not found. "
+                "Install Rust: [cyan]https://rustup.rs[/cyan]"
+            )
+            sys.exit(1)
+
+        console.print("[dim]Building WASM plugins (wasm32-wasip1)...[/dim]")
+        try:
+            subprocess.run(
+                [
+                    "cargo",
+                    "build",
+                    "--release",
+                    "--target",
+                    "wasm32-wasip1",
+                    "-p",
+                    "llm_gateway",
+                    "-p",
+                    "prompt_gateway",
+                ],
+                cwd=crates_dir,
+                check=True,
+            )
+            console.print("[green]✓[/green] WASM plugins built")
+        except subprocess.CalledProcessError as e:
+            console.print(f"[red]✗[/red] WASM build failed: {e}")
+            sys.exit(1)
+
+        console.print("[dim]Building brightstaff (native)...[/dim]")
+        try:
+            subprocess.run(
+                [
+                    "cargo",
+                    "build",
+                    "--release",
+                    "-p",
+                    "brightstaff",
+                ],
+                cwd=crates_dir,
+                check=True,
+            )
+            console.print("[green]✓[/green] brightstaff built")
+        except subprocess.CalledProcessError as e:
+            console.print(f"[red]✗[/red] brightstaff build failed: {e}")
+            sys.exit(1)
+
+        wasm_dir = os.path.join(crates_dir, "target", "wasm32-wasip1", "release")
+        native_dir = os.path.join(crates_dir, "target", "release")
+        console.print(f"\n[bold]Build artifacts:[/bold]")
+        console.print(f"  {os.path.join(wasm_dir, 'prompt_gateway.wasm')}")
+        console.print(f"  {os.path.join(wasm_dir, 'llm_gateway.wasm')}")
+        console.print(f"  {os.path.join(native_dir, 'brightstaff')}")
+        return
+
     dockerfile_path = os.path.join(repo_root, "Dockerfile")
 
     if not os.path.exists(dockerfile_path):
@@ -192,7 +261,13 @@ def build():
     help="Port for the OTLP trace collector (default: 4317).",
     show_default=True,
 )
-def up(file, path, foreground, with_tracing, tracing_port):
+@click.option(
+    "--docker",
+    default=False,
+    help="Run Plano inside Docker instead of natively.",
+    is_flag=True,
+)
+def up(file, path, foreground, with_tracing, tracing_port, docker):
     """Starts Plano."""
     from rich.status import Status
 
@@ -209,26 +284,51 @@ def up(file, path, foreground, with_tracing, tracing_port):
         )
         sys.exit(1)
 
-    with Status(
-        "[dim]Validating configuration[/dim]", spinner="dots", spinner_style="dim"
-    ):
-        (
-            validation_return_code,
-            _,
-            validation_stderr,
-        ) = docker_validate_plano_schema(plano_config_file)
+    if not docker:
+        from planoai.native_runner import native_validate_config
 
-    if validation_return_code != 0:
-        console.print(f"[red]✗[/red] Validation failed")
-        if validation_stderr:
-            console.print(f"  [dim]{validation_stderr.strip()}[/dim]")
-        sys.exit(1)
+        with Status(
+            "[dim]Validating configuration[/dim]",
+            spinner="dots",
+            spinner_style="dim",
+        ):
+            try:
+                native_validate_config(plano_config_file)
+            except SystemExit:
+                console.print(f"[red]✗[/red] Validation failed")
+                sys.exit(1)
+            except Exception as e:
+                console.print(f"[red]✗[/red] Validation failed")
+                console.print(f"  [dim]{str(e).strip()}[/dim]")
+                sys.exit(1)
+    else:
+        with Status(
+            "[dim]Validating configuration (Docker)[/dim]",
+            spinner="dots",
+            spinner_style="dim",
+        ):
+            (
+                validation_return_code,
+                _,
+                validation_stderr,
+            ) = docker_validate_plano_schema(plano_config_file)
+
+        if validation_return_code != 0:
+            console.print(f"[red]✗[/red] Validation failed")
+            if validation_stderr:
+                console.print(f"  [dim]{validation_stderr.strip()}[/dim]")
+            sys.exit(1)
 
     console.print(f"[green]✓[/green] Configuration valid")
 
     # Set up environment
+    default_otel = (
+        DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
+        if docker
+        else DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT
+    )
     env_stage = {
-        "OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
+        "OTEL_TRACING_GRPC_ENDPOINT": default_otel,
     }
     env = os.environ.copy()
     env.pop("PATH", None)
@@ -296,13 +396,20 @@ def up(file, path, foreground, with_tracing, tracing_port):
                 sys.exit(1)
 
         # Update the OTEL endpoint so the gateway sends traces to the right port
-        env_stage[
-            "OTEL_TRACING_GRPC_ENDPOINT"
-        ] = f"http://host.docker.internal:{tracing_port}"
+        tracing_host = "host.docker.internal" if docker else "localhost"
+        otel_endpoint = f"http://{tracing_host}:{tracing_port}"
+        env_stage["OTEL_TRACING_GRPC_ENDPOINT"] = otel_endpoint
 
     env.update(env_stage)
     try:
-        start_plano(plano_config_file, env, foreground=foreground)
+        if not docker:
+            from planoai.native_runner import start_native
+
+            start_native(
+                plano_config_file, env, foreground=foreground, with_tracing=with_tracing
+            )
+        else:
+            start_plano(plano_config_file, env, foreground=foreground)
 
         # When tracing is enabled but --foreground is not, keep the process
         # alive so the OTLP collector continues to receive spans.
@@ -320,15 +427,31 @@ def up(file, path, foreground, with_tracing, tracing_port):
 
 
 @click.command()
-def down():
+@click.option(
+    "--docker",
+    default=False,
+    help="Stop a Docker-based Plano instance.",
+    is_flag=True,
+)
+def down(docker):
     """Stops Plano."""
     console = _console()
     _print_cli_header(console)
 
-    with console.status(
-        f"[{PLANO_COLOR}]Shutting down Plano...[/{PLANO_COLOR}]", spinner="dots"
-    ):
-        stop_docker_container()
+    if not docker:
+        from planoai.native_runner import stop_native
+
+        with console.status(
+            f"[{PLANO_COLOR}]Shutting down Plano...[/{PLANO_COLOR}]",
+            spinner="dots",
+        ):
+            stop_native()
+    else:
+        with console.status(
+            f"[{PLANO_COLOR}]Shutting down Plano (Docker)...[/{PLANO_COLOR}]",
+            spinner="dots",
+        ):
+            stop_docker_container()
 
 
 @click.command()
diff --git a/cli/planoai/native_binaries.py b/cli/planoai/native_binaries.py
new file mode 100644
index 00000000..de199e84
--- /dev/null
+++ b/cli/planoai/native_binaries.py
@@ -0,0 +1,308 @@
+import gzip
+import os
+import platform
+import shutil
+import subprocess
+import sys
+import tarfile
+import tempfile
+
+import planoai
+from planoai.consts import (
+    ENVOY_VERSION,
+    PLANO_BIN_DIR,
+    PLANO_PLUGINS_DIR,
+    PLANO_RELEASE_BASE_URL,
+)
+from planoai.utils import find_repo_root, getLogger
+
+log = getLogger(__name__)
+
+
+def _get_platform_slug():
+    """Return the platform slug for binary downloads."""
+    system = platform.system().lower()
+    machine = platform.machine().lower()
+
+    mapping = {
+        ("linux", "x86_64"): "linux-amd64",
+        ("linux", "aarch64"): "linux-arm64",
+        ("darwin", "arm64"): "darwin-arm64",
+    }
+
+    slug = mapping.get((system, machine))
+    if slug is None:
+        if system == "darwin" and machine == "x86_64":
+            print(
+                "Error: macOS x86_64 (Intel) is not supported. "
+                "Pre-built binaries are only available for Apple Silicon (arm64)."
+            )
+            sys.exit(1)
+        print(
+            f"Error: Unsupported platform {system}/{machine}. "
+            "Supported platforms: linux-amd64, linux-arm64, darwin-arm64"
+        )
+        sys.exit(1)
+
+    return slug
+
+
+def _download_file(url, dest):
+    """Download a file from *url* to *dest* using curl."""
+    try:
+        subprocess.run(
+            ["curl", "-fSL", "-o", dest, url],
+            check=True,
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading: {e}")
+        print(f"URL: {url}")
+        print("Please check your internet connection and try again.")
+        sys.exit(1)
+
+
+def ensure_envoy_binary():
+    """Download Envoy binary if not already present or version changed. Returns path to binary."""
+    envoy_path = os.path.join(PLANO_BIN_DIR, "envoy")
+    version_path = os.path.join(PLANO_BIN_DIR, "envoy.version")
+
+    if os.path.exists(envoy_path) and os.access(envoy_path, os.X_OK):
+        # Check if cached binary matches the pinned version
+        if os.path.exists(version_path):
+            with open(version_path, "r") as f:
+                cached_version = f.read().strip()
+            if cached_version == ENVOY_VERSION:
+                log.info(f"Envoy {ENVOY_VERSION} found at {envoy_path}")
+                return envoy_path
+            print(
+                f"Envoy version changed ({cached_version} → {ENVOY_VERSION}), re-downloading..."
+            )
+        else:
+            log.info(
+                f"Envoy binary found at {envoy_path} (unknown version, re-downloading...)"
+            )
+
+    slug = _get_platform_slug()
+    url = (
+        f"https://github.com/tetratelabs/archive-envoy/releases/download/"
+        f"{ENVOY_VERSION}/envoy-{ENVOY_VERSION}-{slug}.tar.xz"
+    )
+
+    os.makedirs(PLANO_BIN_DIR, exist_ok=True)
+
+    print(f"Downloading Envoy {ENVOY_VERSION} for {slug}...")
+    print(f"  URL: {url}")
+
+    with tempfile.NamedTemporaryFile(suffix=".tar.xz", delete=False) as tmp:
+        tmp_path = tmp.name
+
+    try:
+        _download_file(url, tmp_path)
+
+        print("Extracting Envoy binary...")
+        with tarfile.open(tmp_path, "r:xz") as tar:
+            # Find the envoy binary inside the archive
+            envoy_member = None
+            for member in tar.getmembers():
+                if member.name.endswith("/bin/envoy") or member.name == "bin/envoy":
+                    envoy_member = member
+                    break
+
+            if envoy_member is None:
+                print("Error: Could not find envoy binary in the downloaded archive.")
+                print("Archive contents:")
+                for member in tar.getmembers():
+                    print(f"  {member.name}")
+                sys.exit(1)
+
+            # Extract just the binary
+            f = tar.extractfile(envoy_member)
+            if f is None:
+                print("Error: Could not extract envoy binary from archive.")
+                sys.exit(1)
+
+            with open(envoy_path, "wb") as out:
+                out.write(f.read())
+
+        os.chmod(envoy_path, 0o755)
+        with open(version_path, "w") as f:
+            f.write(ENVOY_VERSION)
+        print(f"Envoy {ENVOY_VERSION} installed at {envoy_path}")
+        return envoy_path
+
+    finally:
+        if os.path.exists(tmp_path):
+            os.unlink(tmp_path)
+
+
+def _find_local_wasm_plugins():
+    """Check for WASM plugins built from source. Returns (prompt_gw, llm_gw) or None."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        return None
+    wasm_dir = os.path.join(repo_root, "crates", "target", "wasm32-wasip1", "release")
+    prompt_gw = os.path.join(wasm_dir, "prompt_gateway.wasm")
+    llm_gw = os.path.join(wasm_dir, "llm_gateway.wasm")
+    if os.path.exists(prompt_gw) and os.path.exists(llm_gw):
+        return prompt_gw, llm_gw
+    return None
+
+
+def _find_local_brightstaff():
+    """Check for brightstaff binary built from source. Returns path or None."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        return None
+    path = os.path.join(repo_root, "crates", "target", "release", "brightstaff")
+    if os.path.exists(path) and os.access(path, os.X_OK):
+        return path
+    return None
+
+
+def ensure_wasm_plugins():
+    """Find or download WASM plugins. Checks: local build → cached download → fresh download."""
+    # 1. Local source build (inside repo)
+    local = _find_local_wasm_plugins()
+    if local:
+        log.info(f"Using locally-built WASM plugins: {local[0]}")
+        return local
+
+    # 2. Cached download
+    version = planoai.__version__
+    version_path = os.path.join(PLANO_PLUGINS_DIR, "wasm.version")
+    prompt_gw_path = os.path.join(PLANO_PLUGINS_DIR, "prompt_gateway.wasm")
+    llm_gw_path = os.path.join(PLANO_PLUGINS_DIR, "llm_gateway.wasm")
+
+    if os.path.exists(prompt_gw_path) and os.path.exists(llm_gw_path):
+        if os.path.exists(version_path):
+            with open(version_path, "r") as f:
+                cached_version = f.read().strip()
+            if cached_version == version:
+                log.info(f"WASM plugins {version} found at {PLANO_PLUGINS_DIR}")
+                return prompt_gw_path, llm_gw_path
+            print(
+                f"WASM plugins version changed ({cached_version} → {version}), re-downloading..."
+            )
+        else:
+            log.info("WASM plugins found (unknown version, re-downloading...)")
+
+    # 3. Download from GitHub releases (gzipped)
+    os.makedirs(PLANO_PLUGINS_DIR, exist_ok=True)
+
+    for name, dest in [
+        ("prompt_gateway.wasm", prompt_gw_path),
+        ("llm_gateway.wasm", llm_gw_path),
+    ]:
+        gz_name = f"{name}.gz"
+        url = f"{PLANO_RELEASE_BASE_URL}/{version}/{gz_name}"
+        print(f"Downloading {gz_name} ({version})...")
+        print(f"  URL: {url}")
+        gz_dest = dest + ".gz"
+        _download_file(url, gz_dest)
+        with gzip.open(gz_dest, "rb") as f_in, open(dest, "wb") as f_out:
+            shutil.copyfileobj(f_in, f_out)
+        os.unlink(gz_dest)
+        print(f"  Saved to {dest}")
+
+    with open(version_path, "w") as f:
+        f.write(version)
+
+    return prompt_gw_path, llm_gw_path
+
+
+def ensure_brightstaff_binary():
+    """Find or download brightstaff binary. Checks: local build → cached download → fresh download."""
+    # 1. Local source build (inside repo)
+    local = _find_local_brightstaff()
+    if local:
+        log.info(f"Using locally-built brightstaff: {local}")
+        return local
+
+    # 2. Cached download
+    version = planoai.__version__
+    brightstaff_path = os.path.join(PLANO_BIN_DIR, "brightstaff")
+    version_path = os.path.join(PLANO_BIN_DIR, "brightstaff.version")
+
+    if os.path.exists(brightstaff_path) and os.access(brightstaff_path, os.X_OK):
+        if os.path.exists(version_path):
+            with open(version_path, "r") as f:
+                cached_version = f.read().strip()
+            if cached_version == version:
+                log.info(f"brightstaff {version} found at {brightstaff_path}")
+                return brightstaff_path
+            print(
+                f"brightstaff version changed ({cached_version} → {version}), re-downloading..."
+            )
+        else:
+            log.info("brightstaff found (unknown version, re-downloading...)")
+
+    # 3. Download from GitHub releases (gzipped)
+    slug = _get_platform_slug()
+    filename = f"brightstaff-{slug}.gz"
+    url = f"{PLANO_RELEASE_BASE_URL}/{version}/{filename}"
+
+    os.makedirs(PLANO_BIN_DIR, exist_ok=True)
+
+    print(f"Downloading brightstaff ({version}) for {slug}...")
+    print(f"  URL: {url}")
+    gz_path = brightstaff_path + ".gz"
+    _download_file(url, gz_path)
+    with gzip.open(gz_path, "rb") as f_in, open(brightstaff_path, "wb") as f_out:
+        shutil.copyfileobj(f_in, f_out)
+    os.unlink(gz_path)
+
+    os.chmod(brightstaff_path, 0o755)
+    with open(version_path, "w") as f:
+        f.write(version)
+    print(f"brightstaff {version} installed at {brightstaff_path}")
+    return brightstaff_path
+
+
+def find_wasm_plugins():
+    """Find WASM plugin files built from source. Returns (prompt_gateway_path, llm_gateway_path)."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        print(
+            "Error: Could not find repository root. "
+            "Make sure you're inside the plano repository."
+        )
+        sys.exit(1)
+
+    wasm_dir = os.path.join(repo_root, "crates", "target", "wasm32-wasip1", "release")
+    prompt_gw = os.path.join(wasm_dir, "prompt_gateway.wasm")
+    llm_gw = os.path.join(wasm_dir, "llm_gateway.wasm")
+
+    missing = []
+    if not os.path.exists(prompt_gw):
+        missing.append("prompt_gateway.wasm")
+    if not os.path.exists(llm_gw):
+        missing.append("llm_gateway.wasm")
+
+    if missing:
+        print(f"Error: WASM plugins not found: {', '.join(missing)}")
+        print(f"  Expected at: {wasm_dir}/")
+        print("  Run 'planoai build' first to build them.")
+        sys.exit(1)
+
+    return prompt_gw, llm_gw
+
+
+def find_brightstaff_binary():
+    """Find the brightstaff binary built from source. Returns path."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        print(
+            "Error: Could not find repository root. "
+            "Make sure you're inside the plano repository."
+        )
+        sys.exit(1)
+
+    brightstaff_path = os.path.join(
+        repo_root, "crates", "target", "release", "brightstaff"
+    )
+    if not os.path.exists(brightstaff_path):
+        print(f"Error: brightstaff binary not found at {brightstaff_path}")
+        print("  Run 'planoai build' first to build it.")
+        sys.exit(1)
+
+    return brightstaff_path
diff --git a/cli/planoai/native_runner.py b/cli/planoai/native_runner.py
new file mode 100644
index 00000000..8331d698
--- /dev/null
+++ b/cli/planoai/native_runner.py
@@ -0,0 +1,434 @@
+import contextlib
+import io
+import json
+import os
+import signal
+import subprocess
+import sys
+import time
+
+from planoai.consts import (
+    NATIVE_PID_FILE,
+    PLANO_RUN_DIR,
+)
+from planoai.docker_cli import health_check_endpoint
+from planoai.native_binaries import (
+    ensure_brightstaff_binary,
+    ensure_envoy_binary,
+    ensure_wasm_plugins,
+)
+from planoai.utils import find_repo_root, getLogger
+
+log = getLogger(__name__)
+
+
+def _find_config_dir():
+    """Locate the directory containing plano_config_schema.yaml and envoy.template.yaml.
+
+    Checks package data first (pip-installed), then falls back to the repo checkout.
+    """
+    import planoai
+
+    pkg_data = os.path.join(os.path.dirname(planoai.__file__), "data")
+    if os.path.isdir(pkg_data) and os.path.exists(
+        os.path.join(pkg_data, "plano_config_schema.yaml")
+    ):
+        return pkg_data
+
+    repo_root = find_repo_root()
+    if repo_root:
+        config_dir = os.path.join(repo_root, "config")
+        if os.path.isdir(config_dir):
+            return config_dir
+
+    print(
+        "Error: Could not find config templates. "
+        "Make sure you're inside the plano repository or have the planoai package installed."
+    )
+    sys.exit(1)
+
+
+@contextlib.contextmanager
+def _temporary_env(overrides):
+    """Context manager that sets env vars from *overrides* and restores originals on exit."""
+    saved = {}
+    for key, value in overrides.items():
+        saved[key] = os.environ.get(key)
+        os.environ[key] = value
+    try:
+        yield
+    finally:
+        for key, original in saved.items():
+            if original is None:
+                os.environ.pop(key, None)
+            else:
+                os.environ[key] = original
+
+
+def render_native_config(plano_config_file, env, with_tracing=False):
+    """Render envoy and plano configs for native mode. Returns (envoy_config_path, plano_config_rendered_path)."""
+    import yaml
+
+    os.makedirs(PLANO_RUN_DIR, exist_ok=True)
+
+    prompt_gw_path, llm_gw_path = ensure_wasm_plugins()
+
+    # If --with-tracing, inject tracing config if not already present
+    effective_config_file = os.path.abspath(plano_config_file)
+    if with_tracing:
+        with open(plano_config_file, "r") as f:
+            config_data = yaml.safe_load(f)
+        tracing = config_data.get("tracing", {})
+        if not tracing.get("random_sampling"):
+            tracing["random_sampling"] = 100
+            config_data["tracing"] = tracing
+            effective_config_file = os.path.join(
+                PLANO_RUN_DIR, "config_with_tracing.yaml"
+            )
+            with open(effective_config_file, "w") as f:
+                yaml.dump(config_data, f, default_flow_style=False)
+
+    envoy_config_path = os.path.join(PLANO_RUN_DIR, "envoy.yaml")
+    plano_config_rendered_path = os.path.join(
+        PLANO_RUN_DIR, "plano_config_rendered.yaml"
+    )
+
+    # Set environment variables that config_generator.validate_and_render_schema() reads
+    config_dir = _find_config_dir()
+    overrides = {
+        "PLANO_CONFIG_FILE": effective_config_file,
+        "PLANO_CONFIG_SCHEMA_FILE": os.path.join(
+            config_dir, "plano_config_schema.yaml"
+        ),
+        "TEMPLATE_ROOT": config_dir,
+        "ENVOY_CONFIG_TEMPLATE_FILE": "envoy.template.yaml",
+        "PLANO_CONFIG_FILE_RENDERED": plano_config_rendered_path,
+        "ENVOY_CONFIG_FILE_RENDERED": envoy_config_path,
+    }
+
+    # Also propagate caller env vars (API keys, OTEL endpoint, etc.)
+    for key, value in env.items():
+        if key not in overrides:
+            overrides[key] = value
+
+    with _temporary_env(overrides):
+        from planoai.config_generator import validate_and_render_schema
+
+        # Suppress verbose print output from config_generator
+        with contextlib.redirect_stdout(io.StringIO()):
+            validate_and_render_schema()
+
+    # Post-process envoy.yaml: replace Docker WASM plugin paths with local paths
+    with open(envoy_config_path, "r") as f:
+        envoy_content = f.read()
+
+    envoy_content = envoy_content.replace(
+        "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm", prompt_gw_path
+    )
+    envoy_content = envoy_content.replace(
+        "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm", llm_gw_path
+    )
+
+    # Replace /var/log/ paths with local log directory (non-root friendly)
+    log_dir = os.path.join(PLANO_RUN_DIR, "logs")
+    os.makedirs(log_dir, exist_ok=True)
+    envoy_content = envoy_content.replace("/var/log/", log_dir + "/")
+
+    # Replace Linux CA cert path with platform-appropriate path
+    import platform
+
+    if platform.system() == "Darwin":
+        envoy_content = envoy_content.replace(
+            "/etc/ssl/certs/ca-certificates.crt", "/etc/ssl/cert.pem"
+        )
+
+    with open(envoy_config_path, "w") as f:
+        f.write(envoy_content)
+
+    # Run envsubst-equivalent on both rendered files using the caller's env
+    with _temporary_env(env):
+        for filepath in [envoy_config_path, plano_config_rendered_path]:
+            with open(filepath, "r") as f:
+                content = f.read()
+            content = os.path.expandvars(content)
+            with open(filepath, "w") as f:
+                f.write(content)
+
+    return envoy_config_path, plano_config_rendered_path
+
+
+def start_native(plano_config_file, env, foreground=False, with_tracing=False):
+    """Start Envoy and brightstaff natively."""
+    from planoai.core import _get_gateway_ports
+
+    console = None
+    try:
+        from rich.console import Console
+
+        console = Console()
+    except ImportError:
+        pass
+
+    def status_print(msg):
+        if console:
+            console.print(msg)
+        else:
+            print(msg)
+
+    envoy_path = ensure_envoy_binary()
+    ensure_wasm_plugins()
+    brightstaff_path = ensure_brightstaff_binary()
+    envoy_config_path, plano_config_rendered_path = render_native_config(
+        plano_config_file, env, with_tracing=with_tracing
+    )
+
+    status_print(f"[green]✓[/green] Configuration rendered")
+
+    log_dir = os.path.join(PLANO_RUN_DIR, "logs")
+    os.makedirs(log_dir, exist_ok=True)
+
+    log_level = env.get("LOG_LEVEL", "info")
+
+    # Start brightstaff
+    brightstaff_env = os.environ.copy()
+    brightstaff_env["RUST_LOG"] = log_level
+    brightstaff_env["PLANO_CONFIG_PATH_RENDERED"] = plano_config_rendered_path
+    # Propagate API keys and other env vars
+    for key, value in env.items():
+        brightstaff_env[key] = value
+
+    brightstaff_pid = _daemon_exec(
+        [brightstaff_path],
+        brightstaff_env,
+        os.path.join(log_dir, "brightstaff.log"),
+    )
+    log.info(f"Started brightstaff (PID {brightstaff_pid})")
+
+    # Start envoy
+    envoy_pid = _daemon_exec(
+        [
+            envoy_path,
+            "-c",
+            envoy_config_path,
+            "--component-log-level",
+            f"wasm:{log_level}",
+            "--log-format",
+            "[%Y-%m-%d %T.%e][%l] %v",
+        ],
+        brightstaff_env,
+        os.path.join(log_dir, "envoy.log"),
+    )
+    log.info(f"Started envoy (PID {envoy_pid})")
+
+    # Save PIDs
+    os.makedirs(PLANO_RUN_DIR, exist_ok=True)
+    with open(NATIVE_PID_FILE, "w") as f:
+        json.dump(
+            {
+                "envoy_pid": envoy_pid,
+                "brightstaff_pid": brightstaff_pid,
+            },
+            f,
+        )
+
+    # Health check
+    gateway_ports = _get_gateway_ports(plano_config_file)
+    status_print(f"[dim]Waiting for listeners to become healthy...[/dim]")
+
+    start_time = time.time()
+    timeout = 60
+    while True:
+        all_healthy = True
+        for port in gateway_ports:
+            if not health_check_endpoint(f"http://localhost:{port}/healthz"):
+                all_healthy = False
+
+        if all_healthy:
+            status_print(f"[green]✓[/green] Plano is running (native mode)")
+            for port in gateway_ports:
+                status_print(f"  [cyan]http://localhost:{port}[/cyan]")
+            break
+
+        # Check if processes are still alive
+        if not _is_pid_alive(brightstaff_pid):
+            status_print("[red]✗[/red] brightstaff exited unexpectedly")
+            status_print(f"  Check logs: {os.path.join(log_dir, 'brightstaff.log')}")
+            _kill_pid(envoy_pid)
+            sys.exit(1)
+
+        if not _is_pid_alive(envoy_pid):
+            status_print("[red]✗[/red] envoy exited unexpectedly")
+            status_print(f"  Check logs: {os.path.join(log_dir, 'envoy.log')}")
+            _kill_pid(brightstaff_pid)
+            sys.exit(1)
+
+        if time.time() - start_time > timeout:
+            status_print(f"[red]✗[/red] Health check timed out after {timeout}s")
+            status_print(f"  Check logs in: {log_dir}")
+            stop_native()
+            sys.exit(1)
+
+        time.sleep(1)
+
+    if foreground:
+        status_print(f"[dim]Running in foreground. Press Ctrl+C to stop.[/dim]")
+        status_print(f"[dim]Logs: {log_dir}[/dim]")
+        try:
+            import glob
+
+            access_logs = sorted(glob.glob(os.path.join(log_dir, "access_*.log")))
+            tail_proc = subprocess.Popen(
+                [
+                    "tail",
+                    "-f",
+                    os.path.join(log_dir, "envoy.log"),
+                    os.path.join(log_dir, "brightstaff.log"),
+                ]
+                + access_logs,
+                stdout=sys.stdout,
+                stderr=sys.stderr,
+            )
+            tail_proc.wait()
+        except KeyboardInterrupt:
+            status_print(f"\n[dim]Stopping Plano...[/dim]")
+            if tail_proc.poll() is None:
+                tail_proc.terminate()
+            stop_native()
+    else:
+        status_print(f"[dim]Logs: {log_dir}[/dim]")
+        status_print(f"[dim]Run 'planoai down' to stop.[/dim]")
+
+
+def _daemon_exec(args, env, log_path):
+    """Start a fully daemonized process via double-fork. Returns the child PID."""
+    log_fd = os.open(log_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o644)
+
+    pid = os.fork()
+    if pid > 0:
+        # Parent: close our copy of the log fd and wait for intermediate child
+        os.close(log_fd)
+        os.waitpid(pid, 0)
+        # Read the grandchild PID from the pipe
+        grandchild_pid_path = os.path.join(PLANO_RUN_DIR, f".daemon_pid_{pid}")
+        deadline = time.time() + 5
+        while time.time() < deadline:
+            if os.path.exists(grandchild_pid_path):
+                with open(grandchild_pid_path, "r") as f:
+                    grandchild_pid = int(f.read().strip())
+                os.unlink(grandchild_pid_path)
+                return grandchild_pid
+            time.sleep(0.05)
+        raise RuntimeError(f"Timed out waiting for daemon PID from {args[0]}")
+
+    # First child: create new session and fork again
+    os.setsid()
+    grandchild_pid = os.fork()
+    if grandchild_pid > 0:
+        # Intermediate child: write grandchild PID and exit
+        pid_path = os.path.join(PLANO_RUN_DIR, f".daemon_pid_{os.getpid()}")
+        with open(pid_path, "w") as f:
+            f.write(str(grandchild_pid))
+        os._exit(0)
+
+    # Grandchild: this is the actual daemon
+    os.dup2(log_fd, 1)  # stdout -> log
+    os.dup2(log_fd, 2)  # stderr -> log
+    os.close(log_fd)
+    # Close stdin
+    devnull = os.open(os.devnull, os.O_RDONLY)
+    os.dup2(devnull, 0)
+    os.close(devnull)
+
+    os.execve(args[0], args, env)
+
+
+def _is_pid_alive(pid):
+    """Check if a process with the given PID is still running."""
+    try:
+        os.kill(pid, 0)
+        return True
+    except ProcessLookupError:
+        return False
+    except PermissionError:
+        return True  # Process exists but we can't signal it
+
+
+def _kill_pid(pid):
+    """Send SIGTERM to a PID, ignoring errors."""
+    try:
+        os.kill(pid, signal.SIGTERM)
+    except (ProcessLookupError, PermissionError):
+        pass
+
+
+def stop_native():
+    """Stop natively-running Envoy and brightstaff processes."""
+    if not os.path.exists(NATIVE_PID_FILE):
+        print("No native Plano instance found (PID file missing).")
+        return
+
+    with open(NATIVE_PID_FILE, "r") as f:
+        pids = json.load(f)
+
+    envoy_pid = pids.get("envoy_pid")
+    brightstaff_pid = pids.get("brightstaff_pid")
+
+    for name, pid in [("envoy", envoy_pid), ("brightstaff", brightstaff_pid)]:
+        if pid is None:
+            continue
+        try:
+            os.kill(pid, signal.SIGTERM)
+            log.info(f"Sent SIGTERM to {name} (PID {pid})")
+        except ProcessLookupError:
+            log.info(f"{name} (PID {pid}) already stopped")
+            continue
+        except PermissionError:
+            log.info(f"Permission denied stopping {name} (PID {pid})")
+            continue
+
+        # Wait for graceful shutdown
+        deadline = time.time() + 10
+        while time.time() < deadline:
+            try:
+                os.kill(pid, 0)  # Check if still alive
+                time.sleep(0.5)
+            except ProcessLookupError:
+                break
+        else:
+            # Still alive after timeout, force kill
+            try:
+                os.kill(pid, signal.SIGKILL)
+                log.info(f"Sent SIGKILL to {name} (PID {pid})")
+            except ProcessLookupError:
+                pass
+
+    os.unlink(NATIVE_PID_FILE)
+    print("Plano stopped (native mode).")
+
+
+def native_validate_config(plano_config_file):
+    """Validate config in-process without Docker."""
+    config_dir = _find_config_dir()
+
+    # Create temp dir for rendered output (we just want validation)
+    os.makedirs(PLANO_RUN_DIR, exist_ok=True)
+
+    overrides = {
+        "PLANO_CONFIG_FILE": os.path.abspath(plano_config_file),
+        "PLANO_CONFIG_SCHEMA_FILE": os.path.join(
+            config_dir, "plano_config_schema.yaml"
+        ),
+        "TEMPLATE_ROOT": config_dir,
+        "ENVOY_CONFIG_TEMPLATE_FILE": "envoy.template.yaml",
+        "PLANO_CONFIG_FILE_RENDERED": os.path.join(
+            PLANO_RUN_DIR, "plano_config_rendered.yaml"
+        ),
+        "ENVOY_CONFIG_FILE_RENDERED": os.path.join(PLANO_RUN_DIR, "envoy.yaml"),
+    }
+
+    with _temporary_env(overrides):
+        from planoai.config_generator import validate_and_render_schema
+
+        # Suppress verbose print output from config_generator
+        with contextlib.redirect_stdout(io.StringIO()):
+            validate_and_render_schema()
diff --git a/cli/planoai/templates/coding_agent_routing.yaml b/cli/planoai/templates/coding_agent_routing.yaml
index b0e40000..b99994c9 100644
--- a/cli/planoai/templates/coding_agent_routing.yaml
+++ b/cli/planoai/templates/coding_agent_routing.yaml
@@ -23,7 +23,7 @@ model_providers:
 
   # Ollama Models
   - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
 
 
 # Model aliases - friendly names that map to actual provider names
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
index 2446fddb..e53f0bda 100644
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@@ -37,6 +37,10 @@ path = "planoai/__init__.py"
 [tool.hatch.build.targets.wheel]
 packages = ["planoai"]
 
+[tool.hatch.build.targets.wheel.force-include]
+"../config/plano_config_schema.yaml" = "planoai/data/plano_config_schema.yaml"
+"../config/envoy.template.yaml" = "planoai/data/envoy.template.yaml"
+
 [tool.hatch.build.targets.sdist]
 include = ["planoai/**"]
 
diff --git a/config/test_passthrough.yaml b/config/test_passthrough.yaml
index b9c74ae8..afecc7d4 100644
--- a/config/test_passthrough.yaml
+++ b/config/test_passthrough.yaml
@@ -6,8 +6,8 @@
 # that manage their own API key validation.
 #
 # To test:
-#   docker build -t plano-passthrough-test .
-#   docker run -d -p 10000:10000 -v $(pwd)/config/test_passthrough.yaml:/app/plano_config.yaml plano-passthrough-test
+#   pip install planoai
+#   planoai up config/test_passthrough.yaml
 #
 #   curl http://localhost:10000/v1/chat/completions \
 #     -H "Authorization: Bearer sk-your-virtual-key" \
diff --git a/config/validate_plano_config.sh b/config/validate_plano_config.sh
index cccdc8be..572ac2ec 100644
--- a/config/validate_plano_config.sh
+++ b/config/validate_plano_config.sh
@@ -1,20 +1,32 @@
 #!/bin/bash
 
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 failed_files=()
 
 for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do
   echo "Validating ${file}..."
-  touch $(pwd)/${file}_rendered
-  if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.9} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then
+  rendered_file="$(pwd)/${file}_rendered"
+  touch "$rendered_file"
+
+  PLANO_CONFIG_FILE="$(pwd)/${file}" \
+  PLANO_CONFIG_SCHEMA_FILE="${SCRIPT_DIR}/plano_config_schema.yaml" \
+  TEMPLATE_ROOT="${SCRIPT_DIR}" \
+  ENVOY_CONFIG_TEMPLATE_FILE="envoy.template.yaml" \
+  PLANO_CONFIG_FILE_RENDERED="$rendered_file" \
+  ENVOY_CONFIG_FILE_RENDERED="/dev/null" \
+  python -m planoai.config_generator 2>&1 > /dev/null
+
+  if [ $? -ne 0 ]; then
     echo "Validation failed for $file"
     failed_files+=("$file")
   fi
+
   RENDERED_CHECKED_IN_FILE=$(echo $file | sed 's/\.yaml$/_rendered.yaml/')
   if [ -f "$RENDERED_CHECKED_IN_FILE" ]; then
     echo "Checking rendered file against checked-in version..."
-    if ! diff -q "${file}_rendered" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
-      echo "Rendered file ${file}_rendered does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
-      failed_files+=("${file}_rendered")
+    if ! diff -q "$rendered_file" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
+      echo "Rendered file $rendered_file does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
+      failed_files+=("$rendered_file")
     else
       echo "Rendered file matches checked-in version."
     fi
diff --git a/demos/advanced/multi_turn_rag/config.yaml b/demos/advanced/multi_turn_rag/config.yaml
index 2c677eec..22e84015 100644
--- a/demos/advanced/multi_turn_rag/config.yaml
+++ b/demos/advanced/multi_turn_rag/config.yaml
@@ -7,7 +7,7 @@ listeners:
 
 endpoints:
   rag_energy_source_agent:
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
     connect_timeout: 0.005s
 
 model_providers:
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
index deedda23..e2fe23fb 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@@ -38,18 +38,17 @@ Plano acts as a **framework-agnostic proxy and data plane** that:
 ```bash
 # From the demo directory
 cd demos/agent_orchestration/multi_agent_crewai_langchain
-
-# Build and start all services
-docker-compose up -d
+./run_demo.sh
 ```
 
-This starts:
-- **Plano** (ports 12000, 8001) - routing and orchestration
+This starts Plano natively and brings up via Docker Compose:
 - **CrewAI Flight Agent** (port 10520) - flight search
 - **LangChain Weather Agent** (port 10510) - weather forecasts
 - **AnythingLLM** (port 3001) - chat interface
 - **Jaeger** (port 16686) - distributed tracing
 
+Plano runs natively on the host (ports 12000, 8001).
+
 ### Try It Out
 
 1. **Open the Chat Interface**
@@ -116,7 +115,7 @@ This starts:
 ## Cleanup
 
 ```bash
-docker-compose down
+./run_demo.sh down
 ```
 
 ## Next Steps
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
index a54888a6..2d9c180b 100644
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
@@ -1,21 +1,5 @@
 
 services:
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "8001:8001"
-      - "12000:12000"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
-      - LOG_LEVEL=${LOG_LEVEL:-info}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
   crewai-flight-agent:
     build:
       dockerfile: Dockerfile
@@ -23,7 +7,7 @@ services:
     ports:
       - "10520:10520"
     environment:
-      - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
       - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
       - PYTHONUNBUFFERED=1
     command: ["python", "-u", "crewai/flight_agent.py"]
@@ -35,7 +19,7 @@ services:
     ports:
       - "10510:10510"
     environment:
-      - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
     command: ["python", "-u", "langchain/weather_agent.py"]
 
   anythingllm:
@@ -48,7 +32,7 @@ services:
     environment:
       - STORAGE_DIR=/app/server/storage
       - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
new file mode 100755
index 00000000..b7dc0fad
--- /dev/null
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$AEROAPI_KEY" ]; then
+      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start agents and services
+  echo "Starting agents using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md
index 731084ba..d6468612 100644
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@@ -9,7 +9,7 @@ This demo consists of two intelligent agents that work together seamlessly:
 - **Weather Agent** - Real-time weather conditions and multi-day forecasts for any city worldwide
 - **Flight Agent** - Live flight information between airports with real-time tracking
 
-All agents use Plano's agent orchestration LLM to intelligently route user requests to the appropriate specialized agent based on conversation context and user intent. Both agents run as Docker containers for easy deployment.
+All agents use Plano's agent orchestration LLM to intelligently route user requests to the appropriate specialized agent based on conversation context and user intent.
 
 ## Features
 
@@ -22,8 +22,8 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque
 
 ## Prerequisites
 
-- Docker and Docker Compose
-- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed
+- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
+- Docker and Docker Compose (for agent services)
 - [OpenAI API key](https://platform.openai.com/api-keys)
 - [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)
 
@@ -40,17 +40,18 @@ export AEROAPI_KEY="your-flightaware-api-key"
 export OPENAI_API_KEY="your OpenAI api key"
 ```
 
-### 2. Start All Agents & Plano with Docker
+### 2. Start the Demo
 
 ```bash
-docker compose up --build
+./run_demo.sh
 ```
 
-This starts:
+This starts Plano natively and brings up via Docker Compose:
 - Weather Agent on port 10510
 - Flight Agent on port 10520
 - Open WebUI on port 8080
-- Plano Proxy on port 8001
+
+Plano runs natively on the host (port 8001).
 
 ### 4. Test the System
 
@@ -92,7 +93,7 @@ Assistant: [Both weather_agent and flight_agent respond simultaneously]
  Weather     Flight
   Agent       Agent
  (10510)     (10520)
- [Docker]    [Docker]
+ (10510)     (10520)
 ```
 
 Each agent:
@@ -101,7 +102,7 @@ Each agent:
 3. Generates response using GPT-5.2
 4. Streams response back to user
 
-Both agents run as Docker containers and communicate with Plano via `host.docker.internal`.
+Both agents run as Docker containers and communicate with Plano running natively on the host.
 
 ## Observability
 
diff --git a/demos/agent_orchestration/travel_agents/config.yaml b/demos/agent_orchestration/travel_agents/config.yaml
index 911baf89..45415795 100644
--- a/demos/agent_orchestration/travel_agents/config.yaml
+++ b/demos/agent_orchestration/travel_agents/config.yaml
@@ -2,9 +2,9 @@ version: v0.3.0
 
 agents:
   - id: weather_agent
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510
   - id: flight_agent
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520
 
 model_providers:
   - model: openai/gpt-5.2
diff --git a/demos/agent_orchestration/travel_agents/docker-compose.yaml b/demos/agent_orchestration/travel_agents/docker-compose.yaml
index b4e65b28..f0fb78e5 100644
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
@@ -1,18 +1,5 @@
 
 services:
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
   weather-agent:
     build:
       context: .
diff --git a/demos/agent_orchestration/travel_agents/run_demo.sh b/demos/agent_orchestration/travel_agents/run_demo.sh
new file mode 100755
index 00000000..b7dc0fad
--- /dev/null
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$AEROAPI_KEY" ]; then
+      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start agents and services
+  echo "Starting agents using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
diff --git a/demos/filter_chains/http_filter/README.md b/demos/filter_chains/http_filter/README.md
index fa683d9f..5e675113 100644
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
@@ -35,21 +35,21 @@ This demo consists of four components:
 
 ## Quick Start
 
-### 1. Start everything with Docker Compose
+### 1. Start the demo
 ```bash
-docker compose up --build
+export OPENAI_API_KEY="your-key"
+./run_demo.sh
 ```
 
-This brings up:
+This starts Plano natively and brings up via Docker Compose:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
-- Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries
 
-> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
+Plano runs natively on the host (port 8001 and 12000).
 
 ### 2. Test the system
 
@@ -74,16 +74,16 @@ The `config.yaml` defines how agents are connected:
 ```yaml
 filters:
   - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
     # type: mcp (default)
     # tool: input_guards (default - same as filter id)
 
   - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
     # type: mcp (default)
 
   - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 ```
 
 ## How It Works
diff --git a/demos/filter_chains/http_filter/docker-compose.yaml b/demos/filter_chains/http_filter/docker-compose.yaml
index 4946de8c..64962bce 100644
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
@@ -11,19 +11,6 @@ services:
     environment:
       - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
       - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
   jaeger:
     build:
       context: ../../shared/jaeger
@@ -41,7 +28,7 @@ services:
     environment:
       - STORAGE_DIR=/app/server/storage
       - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/filter_chains/http_filter/run_demo.sh b/demos/filter_chains/http_filter/run_demo.sh
new file mode 100755
index 00000000..bed84f16
--- /dev/null
+++ b/demos/filter_chains/http_filter/run_demo.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
diff --git a/demos/filter_chains/mcp_filter/README.md b/demos/filter_chains/mcp_filter/README.md
index fa683d9f..5e675113 100644
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
@@ -35,21 +35,21 @@ This demo consists of four components:
 
 ## Quick Start
 
-### 1. Start everything with Docker Compose
+### 1. Start the demo
 ```bash
-docker compose up --build
+export OPENAI_API_KEY="your-key"
+./run_demo.sh
 ```
 
-This brings up:
+This starts Plano natively and brings up via Docker Compose:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
-- Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries
 
-> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
+Plano runs natively on the host (port 8001 and 12000).
 
 ### 2. Test the system
 
@@ -74,16 +74,16 @@ The `config.yaml` defines how agents are connected:
 ```yaml
 filters:
   - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
     # type: mcp (default)
     # tool: input_guards (default - same as filter id)
 
   - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
     # type: mcp (default)
 
   - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 ```
 
 ## How It Works
diff --git a/demos/filter_chains/mcp_filter/config.yaml b/demos/filter_chains/mcp_filter/config.yaml
index 0b2b58a6..e07a49dc 100644
--- a/demos/filter_chains/mcp_filter/config.yaml
+++ b/demos/filter_chains/mcp_filter/config.yaml
@@ -2,21 +2,21 @@ version: v0.3.0
 
 agents:
   - id: rag_agent
-    url: http://host.docker.internal:10505
+    url: http://localhost:10505
 
 filters:
   - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
     # type: mcp (default)
     # transport: streamable-http (default)
     # tool: input_guards (default - same as filter id)
   - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
     # type: mcp (default)
     # transport: streamable-http (default)
     # tool: query_rewriter (default - same as filter id)
   - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 
 model_providers:
   - model: openai/gpt-4o-mini
diff --git a/demos/filter_chains/mcp_filter/docker-compose.yaml b/demos/filter_chains/mcp_filter/docker-compose.yaml
index 9ecc36e1..64962bce 100644
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
@@ -11,21 +11,6 @@ services:
     environment:
       - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
       - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "11000:11000"
-      - "12001:12001"
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
   jaeger:
     build:
       context: ../../shared/jaeger
@@ -43,7 +28,7 @@ services:
     environment:
       - STORAGE_DIR=/app/server/storage
       - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/filter_chains/mcp_filter/run_demo.sh b/demos/filter_chains/mcp_filter/run_demo.sh
new file mode 100755
index 00000000..bed84f16
--- /dev/null
+++ b/demos/filter_chains/mcp_filter/run_demo.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
diff --git a/demos/getting_started/llm_gateway/config.yaml b/demos/getting_started/llm_gateway/config.yaml
index 92769648..d18f8c0f 100644
--- a/demos/getting_started/llm_gateway/config.yaml
+++ b/demos/getting_started/llm_gateway/config.yaml
@@ -44,7 +44,7 @@ model_providers:
     access_key: $TOGETHER_API_KEY
 
   - model: custom/test-model
-    base_url: http://host.docker.internal:11223
+    base_url: http://localhost:11223
     provider_interface: openai
 
 tracing:
diff --git a/demos/getting_started/llm_gateway/docker-compose.yaml b/demos/getting_started/llm_gateway/docker-compose.yaml
index 52723fbf..3273d55a 100644
--- a/demos/getting_started/llm_gateway/docker-compose.yaml
+++ b/demos/getting_started/llm_gateway/docker-compose.yaml
@@ -1,20 +1,5 @@
 services:
 
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "12001:12001"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
   anythingllm:
     image: mintplexlabs/anythingllm
     restart: always
@@ -25,7 +10,7 @@ services:
     environment:
       - STORAGE_DIR=/app/server/storage
       - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/getting_started/weather_forecast/config.yaml b/demos/getting_started/weather_forecast/config.yaml
index 69451552..65048912 100644
--- a/demos/getting_started/weather_forecast/config.yaml
+++ b/demos/getting_started/weather_forecast/config.yaml
@@ -11,7 +11,7 @@ listeners:
 
 endpoints:
   weather_forecast_service:
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
     connect_timeout: 0.005s
 
 overrides:
diff --git a/demos/integrations/ollama/config.yaml b/demos/integrations/ollama/config.yaml
index c86fe002..2786ed97 100644
--- a/demos/integrations/ollama/config.yaml
+++ b/demos/integrations/ollama/config.yaml
@@ -9,7 +9,7 @@ model_providers:
 
   - model: my_llm_provider/llama3.2
     provider_interface: openai
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
     default: true
 
 system_prompt: |
diff --git a/demos/llm_routing/claude_code_router/README.md b/demos/llm_routing/claude_code_router/README.md
index d7aeab9e..d57f5344 100644
--- a/demos/llm_routing/claude_code_router/README.md
+++ b/demos/llm_routing/claude_code_router/README.md
@@ -39,8 +39,8 @@ Your Request → Plano → Suitable Model → Response
 # Install Claude Code if you haven't already
 npm install -g @anthropic-ai/claude-code
 
-# Ensure Docker is running
-docker --version
+# Install Plano CLI
+pip install planoai
 ```
 
 ### Step 1: Get Configuration
diff --git a/demos/llm_routing/claude_code_router/config.yaml b/demos/llm_routing/claude_code_router/config.yaml
index be763325..e72aa73a 100644
--- a/demos/llm_routing/claude_code_router/config.yaml
+++ b/demos/llm_routing/claude_code_router/config.yaml
@@ -28,7 +28,7 @@ model_providers:
 
   # Ollama Models
   - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
 
 
 # Model aliases - friendly names that map to actual provider names
diff --git a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
index 53b679ae..f46359cc 100644
--- a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+++ b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
@@ -49,7 +49,7 @@ model_providers:
 
   # Ollama Models
   - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
 
   # Grok (xAI) Models
   - model: xai/grok-4-0709
diff --git a/demos/llm_routing/openclaw_routing/README.md b/demos/llm_routing/openclaw_routing/README.md
index 7c201687..34ddde47 100644
--- a/demos/llm_routing/openclaw_routing/README.md
+++ b/demos/llm_routing/openclaw_routing/README.md
@@ -23,7 +23,6 @@ Plano uses a [preference-aligned router](https://arxiv.org/abs/2506.16655) to an
 
 ## Prerequisites
 
-- **Docker** running
 - **Plano CLI**: `uv tool install planoai` or `pip install planoai`
 - **OpenClaw**: `npm install -g openclaw@latest`
 - **API keys**:
@@ -43,7 +42,7 @@ export ANTHROPIC_API_KEY="your-anthropic-key"
 
 ```bash
 cd demos/llm_routing/openclaw_routing
-planoai up --service plano --foreground
+planoai up config.yaml
 ```
 
 ### 3. Set Up OpenClaw
diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md
index bfee4e34..03d28cee 100644
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@@ -3,25 +3,23 @@ This demo shows how you can use user preferences to route user prompts to approp
 
 ## How to start the demo
 
-Make sure your machine is up to date with [latest version of plano]([url](https://github.com/katanemo/plano/tree/main?tab=readme-ov-file#prerequisites)). And you have activated the virtual environment.
+Make sure you have Plano CLI installed (`pip install planoai` or `uv tool install planoai`).
 
-
-1. start anythingllm
 ```bash
-(venv) $ cd demos/llm_routing/preference_based_routing
-(venv) $ docker compose up -d
+cd demos/llm_routing/preference_based_routing
+./run_demo.sh
 ```
-2. start plano in the foreground
+
+Or manually:
+
+1. Start Plano
 ```bash
-(venv) $ planoai up --service plano --foreground
-# Or if installed with uv: uvx planoai up --service plano --foreground
-2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.9
-2025-05-30 18:00:09,953 - planoai.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/llm_routing/preference_based_routing/config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.9
-2025-05-30 18:00:10,662 - cli.core - INFO - plano status: running, health status: starting
-2025-05-30 18:00:11,712 - cli.core - INFO - plano status: running, health status: starting
-2025-05-30 18:00:12,761 - cli.core - INFO - plano is running and is healthy!
-...
+planoai up config.yaml
+```
+
+2. Start AnythingLLM
+```bash
+docker compose up -d
 ```
 
 3. open AnythingLLM http://localhost:3001/
diff --git a/demos/llm_routing/preference_based_routing/docker-compose.yaml b/demos/llm_routing/preference_based_routing/docker-compose.yaml
index 7c88594a..3273d55a 100644
--- a/demos/llm_routing/preference_based_routing/docker-compose.yaml
+++ b/demos/llm_routing/preference_based_routing/docker-compose.yaml
@@ -1,23 +1,5 @@
 services:
 
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "12001:12001"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?ANTHROPIC_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
-      - OTEL_TRACING_ENABLED=true
-      - RUST_LOG=debug
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
   anythingllm:
     image: mintplexlabs/anythingllm
     restart: always
@@ -28,7 +10,7 @@ services:
     environment:
       - STORAGE_DIR=/app/server/storage
       - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1
       - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
       - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
       - GENERIC_OPEN_AI_API_KEY=sk-placeholder
diff --git a/demos/llm_routing/preference_based_routing/plano_config_local.yaml b/demos/llm_routing/preference_based_routing/plano_config_local.yaml
index 0a3db8bf..dbd287dd 100644
--- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml
+++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml
@@ -13,7 +13,7 @@ model_providers:
 
   - name: arch-router
     model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
 
   - model: openai/gpt-4o-mini
     access_key: $OPENAI_API_KEY
diff --git a/demos/llm_routing/preference_based_routing/run_demo.sh b/demos/llm_routing/preference_based_routing/run_demo.sh
new file mode 100755
index 00000000..c9525c26
--- /dev/null
+++ b/demos/llm_routing/preference_based_routing/run_demo.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$ANTHROPIC_API_KEY" ]; then
+      echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    if [ -n "$ANTHROPIC_API_KEY" ]; then
+      echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+    fi
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
diff --git a/demos/shared/test_runner/run_demo_tests.sh b/demos/shared/test_runner/run_demo_tests.sh
index 7feeb9ac..0c098106 100644
--- a/demos/shared/test_runner/run_demo_tests.sh
+++ b/demos/shared/test_runner/run_demo_tests.sh
@@ -21,7 +21,7 @@ do
   echo "****************************************"
   cd ../../$demo
   echo "starting plano"
-  planoai up config.yaml
+  planoai up --docker config.yaml
   echo "starting docker containers"
   # only execute docker compose if demo is llm_routing/preference_based_routing
   if [ "$demo" == "llm_routing/preference_based_routing" ]; then
@@ -38,7 +38,7 @@ do
     exit 1
   fi
   echo "stopping docker containers and plano"
-  planoai down
+  planoai down --docker
   docker compose down -v
   cd ../../shared/test_runner
 done
diff --git a/docs/source/build_with_plano/includes/agent/function-calling-agent.yaml b/docs/source/build_with_plano/includes/agent/function-calling-agent.yaml
index 904b12ce..1399cb9b 100644
--- a/docs/source/build_with_plano/includes/agent/function-calling-agent.yaml
+++ b/docs/source/build_with_plano/includes/agent/function-calling-agent.yaml
@@ -54,6 +54,6 @@ endpoints:
     # value could be ip address or a hostname with port
     # this could also be a list of endpoints for load balancing
     # for example endpoint: [ ip1:port, ip2:port ]
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
     # max time to wait for a connection to be established
     connect_timeout: 0.005s
diff --git a/docs/source/concepts/llm_providers/model_aliases.rst b/docs/source/concepts/llm_providers/model_aliases.rst
index 2d29be93..5d0a43a4 100644
--- a/docs/source/concepts/llm_providers/model_aliases.rst
+++ b/docs/source/concepts/llm_providers/model_aliases.rst
@@ -32,7 +32,7 @@ Basic Configuration
         access_key: $ANTHROPIC_API_KEY
 
       - model: ollama/llama3.1
-        base_url: http://host.docker.internal:11434
+        base_url: http://localhost:11434
 
     # Define aliases that map to the models above
     model_aliases:
diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst
index 4ad89931..e09061e7 100644
--- a/docs/source/concepts/llm_providers/supported_providers.rst
+++ b/docs/source/concepts/llm_providers/supported_providers.rst
@@ -598,9 +598,9 @@ Ollama
       - model: ollama/llama3.1
         base_url: http://localhost:11434
 
-      # Ollama in Docker (from host)
+      # Ollama running locally
       - model: ollama/codellama
-        base_url: http://host.docker.internal:11434
+        base_url: http://localhost:11434
 
 
 OpenAI-Compatible Providers
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index de68cbe3..16624393 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -17,11 +17,17 @@ Follow this guide to learn how to quickly set up Plano and integrate it into you
 Prerequisites
 -------------
 
-Before you begin, ensure you have the following:
+Plano runs **natively** by default — no Docker or Rust toolchain required. Pre-compiled binaries are downloaded automatically on first run.
+
+1. `Python <https://www.python.org/downloads/>`_ (v3.10+)
+2. Supported platforms: Linux (x86_64, aarch64), macOS (Apple Silicon)
+
+**Docker mode** (optional):
+
+If you prefer to run inside Docker, add ``--docker`` to ``planoai up`` / ``planoai down``. This requires:
 
 1. `Docker System <https://docs.docker.com/get-started/get-docker/>`_ (v24)
 2. `Docker Compose <https://docs.docker.com/compose/install/>`_ (v2.29)
-3. `Python <https://www.python.org/downloads/>`_ (v3.10+)
 
 Plano's CLI allows you to manage and interact with the Plano efficiently. To install the CLI, simply run the following command:
 
@@ -84,17 +90,20 @@ Step 2. Start plano
 
 Once the config file is created, ensure that you have environment variables set up for ``ANTHROPIC_API_KEY`` and ``OPENAI_API_KEY`` (or these are defined in a ``.env`` file).
 
-Start Plano:
-
 .. code-block:: console
 
    $ planoai up plano_config.yaml
-   # Or if installed with uv tool: uvx planoai up plano_config.yaml
-   2024-12-05 11:24:51,288 - planoai.main - INFO - Starting plano cli version: 0.4.9
-   2024-12-05 11:24:51,825 - planoai.utils - INFO - Schema validation successful!
-   2024-12-05 11:24:51,825 - planoai.main - INFO - Starting plano
-   ...
-   2024-12-05 11:25:16,131 - planoai.core - INFO - Container is healthy!
+
+On the first run, Plano automatically downloads Envoy, WASM plugins, and brightstaff and caches them at ``~/.plano/``.
+
+To stop Plano, run ``planoai down``.
+
+**Docker mode** (optional):
+
+.. code-block:: console
+
+   $ planoai up plano_config.yaml --docker
+   $ planoai down --docker
 
 Step 3: Interact with LLM
 ~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -185,9 +194,9 @@ Here is a minimal configuration that wires Plano-Orchestrator to two HTTP servic
 
   agents:
     - id: flight_agent
-      url: http://host.docker.internal:10520  # your flights service
+      url: http://localhost:10520  # your flights service
     - id: hotel_agent
-      url: http://host.docker.internal:10530  # your hotels service
+      url: http://localhost:10530  # your hotels service
 
   model_providers:
     - model: openai/gpt-4o
diff --git a/docs/source/guides/observability/monitoring.rst b/docs/source/guides/observability/monitoring.rst
index 9d497568..736e0a64 100644
--- a/docs/source/guides/observability/monitoring.rst
+++ b/docs/source/guides/observability/monitoring.rst
@@ -59,7 +59,7 @@ are some sample configuration files for both, respectively.
         scheme: http
         static_configs:
         - targets:
-            - host.docker.internal:19901
+            - localhost:19901
         params:
         format: ["prometheus"]
 
diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst
index 7e72e578..8a44327f 100644
--- a/docs/source/resources/deployment.rst
+++ b/docs/source/resources/deployment.rst
@@ -3,7 +3,47 @@
 Deployment
 ==========
 
-This guide shows how to deploy Plano directly using Docker without the ``plano`` CLI, including basic runtime checks for routing and health monitoring.
+Plano can be deployed in two ways: **natively** on the host (default) or inside a **Docker container**.
+
+Native Deployment (Default)
+---------------------------
+
+Plano runs natively by default. Pre-compiled binaries (Envoy, WASM plugins, brightstaff) are automatically downloaded on the first run and cached at ``~/.plano/``.
+
+Supported platforms: Linux (x86_64, aarch64), macOS (Apple Silicon).
+
+Start Plano
+~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   planoai up plano_config.yaml
+
+Options:
+
+- ``--foreground`` — stay attached and stream logs (Ctrl+C to stop)
+- ``--with-tracing`` — start a local OTLP trace collector
+
+Runtime files (rendered configs, logs, PID file) are stored in ``~/.plano/run/``.
+
+Stop Plano
+~~~~~~~~~~
+
+.. code-block:: bash
+
+   planoai down
+
+Build from Source (Developer)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you want to build from source instead of using pre-compiled binaries, you need:
+
+- `Rust <https://rustup.rs>`_ with the ``wasm32-wasip1`` target
+- OpenSSL dev headers (``libssl-dev`` on Debian/Ubuntu, ``openssl`` on macOS)
+
+.. code-block:: bash
+
+   planoai build --native
 
 Docker Deployment
 -----------------
@@ -53,6 +93,13 @@ Check container health and logs:
    docker compose ps
    docker compose logs -f plano
 
+You can also use the CLI with Docker mode:
+
+.. code-block:: bash
+
+   planoai up plano_config.yaml --docker
+   planoai down --docker
+
 Runtime Tests
 -------------
 
diff --git a/docs/source/resources/includes/agents/agents_config.yaml b/docs/source/resources/includes/agents/agents_config.yaml
index 0b6aaba2..ef522337 100644
--- a/docs/source/resources/includes/agents/agents_config.yaml
+++ b/docs/source/resources/includes/agents/agents_config.yaml
@@ -2,9 +2,9 @@ version: v0.3.0
 
 agents:
   - id: weather_agent
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510
   - id: flight_agent
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520
 
 model_providers:
   - model: openai/gpt-4o
diff --git a/docs/source/resources/includes/plano_config_agents_filters.yaml b/docs/source/resources/includes/plano_config_agents_filters.yaml
index dfc8fe7b..f726b121 100644
--- a/docs/source/resources/includes/plano_config_agents_filters.yaml
+++ b/docs/source/resources/includes/plano_config_agents_filters.yaml
@@ -2,16 +2,16 @@ version: v0.3.0
 
 agents:
   - id: rag_agent
-    url: http://host.docker.internal:10505
+    url: http://localhost:10505
 
 filters:
   - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
     # type: mcp # default is mcp
     # transport: streamable-http # default is streamable-http
     # tool: query_rewriter # default name is the filter id
   - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 
 model_providers:
   - model: openai/gpt-4o-mini
diff --git a/docs/source/resources/includes/plano_config_full_reference.yaml b/docs/source/resources/includes/plano_config_full_reference.yaml
index cc3973e0..a650baea 100644
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@@ -4,15 +4,15 @@ version: v0.3.0
 # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
 agents:
   - id: weather_agent # Example agent for weather
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510
 
   - id: flight_agent # Example agent for flights
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520
 
 # MCP filters applied to requests/responses (e.g., input validation, query rewriting)
 filters:
   - id: input_guards # Example filter for input validation
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
     # type: mcp (default)
     # transport: streamable-http (default)
     # tool: input_guards (default - same as filter id)
diff --git a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
index abd909a0..9717b53a 100644
--- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@@ -1,31 +1,31 @@
 agents:
 - id: weather_agent
-  url: http://host.docker.internal:10510
+  url: http://localhost:10510
 - id: flight_agent
-  url: http://host.docker.internal:10520
+  url: http://localhost:10520
 endpoints:
   app_server:
     connect_timeout: 0.005s
     endpoint: 127.0.0.1
     port: 80
   flight_agent:
-    endpoint: host.docker.internal
+    endpoint: localhost
     port: 10520
     protocol: http
   input_guards:
-    endpoint: host.docker.internal
+    endpoint: localhost
     port: 10500
     protocol: http
   mistral_local:
     endpoint: 127.0.0.1
     port: 8001
   weather_agent:
-    endpoint: host.docker.internal
+    endpoint: localhost
     port: 10510
     protocol: http
 filters:
 - id: input_guards
-  url: http://host.docker.internal:10500
+  url: http://localhost:10500
 listeners:
 - address: 0.0.0.0
   agents:
@@ -130,6 +130,6 @@ prompt_targets:
     required: true
     type: int
 tracing:
-  opentracing_grpc_endpoint: http://host.docker.internal:4317
+  opentracing_grpc_endpoint: http://localhost:4317
   random_sampling: 100
 version: v0.3.0
diff --git a/tests/e2e/config_native_smoke.yaml b/tests/e2e/config_native_smoke.yaml
new file mode 100644
index 00000000..ddb0134f
--- /dev/null
+++ b/tests/e2e/config_native_smoke.yaml
@@ -0,0 +1,11 @@
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh
index 80535368..c24931f4 100644
--- a/tests/e2e/run_e2e_tests.sh
+++ b/tests/e2e/run_e2e_tests.sh
@@ -45,8 +45,8 @@ uv sync
 
 log startup plano gateway with function calling demo
 cd ../../
-planoai down
-planoai up demos/getting_started/weather_forecast/config.yaml
+planoai down --docker
+planoai up --docker demos/getting_started/weather_forecast/config.yaml
 cd -
 
 log running e2e tests for prompt gateway
@@ -55,11 +55,11 @@ uv run pytest test_prompt_gateway.py
 
 log shutting down the plano gateway service for prompt_gateway demo
 log ===============================================================
-planoai down
+planoai down --docker
 
 log startup plano gateway with model alias routing demo
 cd ../../
-planoai up demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+planoai up --docker demos/llm_routing/model_alias_routing/config_with_aliases.yaml
 cd -
 
 log running e2e tests for model alias routing
@@ -71,8 +71,8 @@ log ========================================
 uv run pytest test_openai_responses_api_client.py
 
 log startup plano gateway with state storage for openai responses api client demo
-planoai down
-planoai up config_memory_state_v1_responses.yaml
+planoai down --docker
+planoai up --docker config_memory_state_v1_responses.yaml
 
 log running e2e tests for openai responses api client
 log ========================================
diff --git a/tests/e2e/run_model_alias_tests.sh b/tests/e2e/run_model_alias_tests.sh
index 4c2aa7ee..49a2de13 100755
--- a/tests/e2e/run_model_alias_tests.sh
+++ b/tests/e2e/run_model_alias_tests.sh
@@ -36,8 +36,8 @@ uv sync
 # Start gateway with model alias routing config
 log "startup plano gateway with model alias routing demo"
 cd ../../
-planoai down || true
-planoai up demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+planoai down --docker || true
+planoai up --docker demos/llm_routing/model_alias_routing/config_with_aliases.yaml
 cd -
 
 # Run both test suites that share this config in a single pytest invocation
@@ -46,4 +46,4 @@ uv run pytest -n auto test_model_alias_routing.py test_openai_responses_api_clie
 
 # Cleanup
 log "shutting down"
-planoai down || true
+planoai down --docker || true
diff --git a/tests/e2e/run_prompt_gateway_tests.sh b/tests/e2e/run_prompt_gateway_tests.sh
index bb9cdd43..58d850d8 100755
--- a/tests/e2e/run_prompt_gateway_tests.sh
+++ b/tests/e2e/run_prompt_gateway_tests.sh
@@ -41,8 +41,8 @@ cd -
 # Start gateway with prompt_gateway config
 log "startup plano gateway with function calling demo"
 cd ../../
-planoai down || true
-planoai up demos/getting_started/weather_forecast/config.yaml
+planoai down --docker || true
+planoai up --docker demos/getting_started/weather_forecast/config.yaml
 cd -
 
 # Run tests
@@ -51,7 +51,7 @@ uv run pytest test_prompt_gateway.py
 
 # Cleanup
 log "shutting down"
-planoai down || true
+planoai down --docker || true
 cd ../../demos/getting_started/weather_forecast
 docker compose down
 cd -
diff --git a/tests/e2e/run_responses_state_tests.sh b/tests/e2e/run_responses_state_tests.sh
index 3c974402..d503680c 100755
--- a/tests/e2e/run_responses_state_tests.sh
+++ b/tests/e2e/run_responses_state_tests.sh
@@ -35,8 +35,8 @@ uv sync
 # Start gateway with state storage config
 log "startup plano gateway with state storage config"
 cd ../../
-planoai down || true
-planoai up tests/e2e/config_memory_state_v1_responses.yaml
+planoai down --docker || true
+planoai up --docker tests/e2e/config_memory_state_v1_responses.yaml
 cd -
 
 # Run tests
@@ -45,4 +45,4 @@ uv run pytest test_openai_responses_api_client_with_state.py
 
 # Cleanup
 log "shutting down"
-planoai down || true
+planoai down --docker || true