Run plano natively by default (#744)

2026-04-25 00:36:34 +02:00 · 2026-03-05 07:35:25 -08:00 · 2026-03-05 07:35:25 -08:00 · f63d5de02c
commit f63d5de02c
parent 198c912202
56 changed files with 1557 additions and 256 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -53,6 +53,60 @@ jobs:
      - name: Run tests
        run: uv run pytest

+  # ──────────────────────────────────────────────
+  # Native mode smoke test — build from source & start natively
+  # ──────────────────────────────────────────────
+  native-smoke-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Install uv
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-wasip1
+
+      - name: Install planoai CLI
+        working-directory: ./cli
+        run: |
+          uv sync
+          uv tool install .
+
+      - name: Build native binaries
+        run: planoai build
+
+      - name: Start plano natively
+        env:
+          OPENAI_API_KEY: test-key-not-used
+        run: planoai up tests/e2e/config_native_smoke.yaml
+
+      - name: Health check
+        run: |
+          for i in $(seq 1 30); do
+            if curl -sf http://localhost:12000/healthz > /dev/null 2>&1; then
+              echo "Health check passed"
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Health check failed after 30s"
+          cat ~/.plano/run/logs/envoy.log || true
+          cat ~/.plano/run/logs/brightstaff.log || true
+          exit 1
+
+      - name: Stop plano
+        if: always()
+        run: planoai down || true
+
  # ──────────────────────────────────────────────
  # Single Docker build — shared by all downstream jobs
  # ──────────────────────────────────────────────
@ -98,7 +152,6 @@ jobs:
  # Validate plano config
  # ──────────────────────────────────────────────
  validate-config:
-    needs: docker-build
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
@ -109,14 +162,8 @@ jobs:
        with:
          python-version: "3.14"

-      - name: Download plano image
-        uses: actions/download-artifact@v7
-        with:
-          name: plano-image
-          path: /tmp
-
-      - name: Load plano image
-        run: docker load -i /tmp/plano-image.tar
+      - name: Install planoai
+        run: pip install ./cli

      - name: Validate plano config
        run: bash config/validate_plano_config.sh
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@ -0,0 +1,109 @@
+name: Publish pre-compiled binaries (release)
+
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: "Release tag to upload binaries to (e.g. 0.4.9)"
+        required: true
+
+permissions:
+  contents: write
+
+jobs:
+  build-wasm-plugins:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-wasip1
+
+      - name: Build WASM plugins
+        working-directory: crates
+        run: cargo build --release --target wasm32-wasip1 -p llm_gateway -p prompt_gateway
+
+      - name: Compress and upload WASM plugins to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gzip -k crates/target/wasm32-wasip1/release/prompt_gateway.wasm
+          gzip -k crates/target/wasm32-wasip1/release/llm_gateway.wasm
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            crates/target/wasm32-wasip1/release/prompt_gateway.wasm.gz \
+            crates/target/wasm32-wasip1/release/llm_gateway.wasm.gz \
+            --clobber
+
+  build-brightstaff-linux-amd64:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build brightstaff
+        working-directory: crates
+        run: cargo build --release -p brightstaff
+
+      - name: Compress and upload brightstaff to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cp crates/target/release/brightstaff brightstaff-linux-amd64
+          gzip brightstaff-linux-amd64
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            brightstaff-linux-amd64.gz \
+            --clobber
+
+  build-brightstaff-linux-arm64:
+    runs-on: [linux-arm64]
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build brightstaff
+        working-directory: crates
+        run: cargo build --release -p brightstaff
+
+      - name: Compress and upload brightstaff to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cp crates/target/release/brightstaff brightstaff-linux-arm64
+          gzip brightstaff-linux-arm64
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            brightstaff-linux-arm64.gz \
+            --clobber
+
+  build-brightstaff-darwin-arm64:
+    runs-on: macos-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Build brightstaff
+        working-directory: crates
+        run: cargo build --release -p brightstaff
+
+      - name: Compress and upload brightstaff to release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cp crates/target/release/brightstaff brightstaff-darwin-arm64
+          gzip brightstaff-darwin-arm64
+          gh release upload "${{ github.event.release.tag_name || inputs.tag }}" \
+            brightstaff-darwin-arm64.gz \
+            --clobber
--- a/7
+++ b/7
@ -1,3 +1,6 @@
+# Envoy version — keep in sync with cli/planoai/consts.py ENVOY_VERSION
+ARG ENVOY_VERSION=v1.37.0
+
 # --- Dependency cache ---
 FROM rust:1.93.0 AS deps
 RUN rustup -v target add wasm32-wasip1
@ -40,7 +43,7 @@ COPY crates/brightstaff/src    brightstaff/src
 RUN find common hermesllm brightstaff -name "*.rs" -exec touch {} +
 RUN cargo build --release -p brightstaff

-FROM docker.io/envoyproxy/envoy:v1.37.0 AS envoy
+FROM docker.io/envoyproxy/envoy:${ENVOY_VERSION} AS envoy

 FROM python:3.14-slim AS arch

@ -66,6 +69,8 @@ RUN pip install --no-cache-dir uv
 COPY cli/pyproject.toml ./
 COPY cli/uv.lock ./
 COPY cli/README.md ./
+COPY config/plano_config_schema.yaml /config/plano_config_schema.yaml
+COPY config/envoy.template.yaml /config/envoy.template.yaml

 RUN uv run pip install --no-cache-dir .

--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@ -6,4 +6,16 @@ PLANO_COLOR = "#969FF4"
 SERVICE_NAME_ARCHGW = "plano"
 PLANO_DOCKER_NAME = "plano"
 PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.9")
-DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"
+DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
+
+# Native mode constants
+PLANO_HOME = os.path.join(os.path.expanduser("~"), ".plano")
+PLANO_RUN_DIR = os.path.join(PLANO_HOME, "run")
+PLANO_BIN_DIR = os.path.join(PLANO_HOME, "bin")
+PLANO_PLUGINS_DIR = os.path.join(PLANO_HOME, "plugins")
+ENVOY_VERSION = "v1.37.0"  # keep in sync with Dockerfile ARG ENVOY_VERSION
+NATIVE_PID_FILE = os.path.join(PLANO_RUN_DIR, "plano.pid")
+DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
+
+PLANO_GITHUB_REPO = "katanemo/archgw"
+PLANO_RELEASE_BASE_URL = f"https://github.com/{PLANO_GITHUB_REPO}/releases/download"
--- a/cli/planoai/core.py
+++ b/cli/planoai/core.py
@ -33,8 +33,11 @@ def _get_gateway_ports(plano_config_file: str) -> list[int]:
    with open(plano_config_file) as f:
        plano_config_dict = yaml.safe_load(f)

+    model_providers = plano_config_dict.get("llm_providers") or plano_config_dict.get(
+        "model_providers"
+    )
    listeners, _, _ = convert_legacy_listeners(
-        plano_config_dict.get("listeners"), plano_config_dict.get("llm_providers")
+        plano_config_dict.get("listeners"), model_providers
    )

    all_ports = [listener.get("port") for listener in listeners]
--- a/cli/planoai/docker_cli.py
+++ b/cli/planoai/docker_cli.py
@ -40,11 +40,35 @@ def docker_remove_container(container: str) -> str:
    return result.returncode


+def _prepare_docker_config(plano_config_file: str) -> str:
+    """Copy config to a temp file, replacing localhost with host.docker.internal.
+
+    Configs use localhost for native-first mode, but Docker containers need
+    host.docker.internal to reach services on the host.
+    """
+    import tempfile
+
+    with open(plano_config_file, "r") as f:
+        content = f.read()
+
+    if "localhost" not in content:
+        return plano_config_file
+
+    content = content.replace("localhost", "host.docker.internal")
+    tmp = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".yaml", prefix="plano_config_", delete=False
+    )
+    tmp.write(content)
+    tmp.close()
+    return tmp.name
+
+
 def docker_start_plano_detached(
    plano_config_file: str,
    env: dict,
    gateway_ports: list[int],
 ) -> str:
+    docker_config = _prepare_docker_config(plano_config_file)
    env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]

    port_mappings = [
@ -58,7 +82,7 @@ def docker_start_plano_detached(
    port_mappings_args = [item for port in port_mappings for item in ("-p", port)]

    volume_mappings = [
-        f"{plano_config_file}:/app/plano_config.yaml:ro",
+        f"{docker_config}:/app/plano_config.yaml:ro",
    ]
    volume_mappings_args = [
        item for volume in volume_mappings for item in ("-v", volume)
--- a/cli/planoai/main.py
+++ b/cli/planoai/main.py
@ -30,6 +30,7 @@ from planoai.init_cmd import init as init_cmd
 from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
 from planoai.consts import (
    DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
+    DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
    PLANO_DOCKER_IMAGE,
    PLANO_DOCKER_NAME,
 )
@ -130,7 +131,13 @@ def main(ctx, version):


@click.command()
-def build():
+@click.option(
+    "--docker",
+    default=False,
+    help="Build the Docker image instead of native binaries.",
+    is_flag=True,
+)
+def build(docker):
    """Build Plano from source. Works from any directory within the repo."""

    # Find the repo root
@ -141,6 +148,68 @@ def build():
        )
        sys.exit(1)

+    if not docker:
+        import shutil
+
+        crates_dir = os.path.join(repo_root, "crates")
+        console = _console()
+        _print_cli_header(console)
+
+        if not shutil.which("cargo"):
+            console.print(
+                "[red]✗[/red] [bold]cargo[/bold] not found. "
+                "Install Rust: [cyan]https://rustup.rs[/cyan]"
+            )
+            sys.exit(1)
+
+        console.print("[dim]Building WASM plugins (wasm32-wasip1)...[/dim]")
+        try:
+            subprocess.run(
+                [
+                    "cargo",
+                    "build",
+                    "--release",
+                    "--target",
+                    "wasm32-wasip1",
+                    "-p",
+                    "llm_gateway",
+                    "-p",
+                    "prompt_gateway",
+                ],
+                cwd=crates_dir,
+                check=True,
+            )
+            console.print("[green]✓[/green] WASM plugins built")
+        except subprocess.CalledProcessError as e:
+            console.print(f"[red]✗[/red] WASM build failed: {e}")
+            sys.exit(1)
+
+        console.print("[dim]Building brightstaff (native)...[/dim]")
+        try:
+            subprocess.run(
+                [
+                    "cargo",
+                    "build",
+                    "--release",
+                    "-p",
+                    "brightstaff",
+                ],
+                cwd=crates_dir,
+                check=True,
+            )
+            console.print("[green]✓[/green] brightstaff built")
+        except subprocess.CalledProcessError as e:
+            console.print(f"[red]✗[/red] brightstaff build failed: {e}")
+            sys.exit(1)
+
+        wasm_dir = os.path.join(crates_dir, "target", "wasm32-wasip1", "release")
+        native_dir = os.path.join(crates_dir, "target", "release")
+        console.print(f"\n[bold]Build artifacts:[/bold]")
+        console.print(f"  {os.path.join(wasm_dir, 'prompt_gateway.wasm')}")
+        console.print(f"  {os.path.join(wasm_dir, 'llm_gateway.wasm')}")
+        console.print(f"  {os.path.join(native_dir, 'brightstaff')}")
+        return
+
    dockerfile_path = os.path.join(repo_root, "Dockerfile")

    if not os.path.exists(dockerfile_path):
@ -192,7 +261,13 @@ def build():
    help="Port for the OTLP trace collector (default: 4317).",
    show_default=True,
 )
-def up(file, path, foreground, with_tracing, tracing_port):
+@click.option(
+    "--docker",
+    default=False,
+    help="Run Plano inside Docker instead of natively.",
+    is_flag=True,
+)
+def up(file, path, foreground, with_tracing, tracing_port, docker):
    """Starts Plano."""
    from rich.status import Status

@ -209,8 +284,28 @@ def up(file, path, foreground, with_tracing, tracing_port):
        )
        sys.exit(1)

+    if not docker:
+        from planoai.native_runner import native_validate_config
+
        with Status(
-        "[dim]Validating configuration[/dim]", spinner="dots", spinner_style="dim"
+            "[dim]Validating configuration[/dim]",
+            spinner="dots",
+            spinner_style="dim",
+        ):
+            try:
+                native_validate_config(plano_config_file)
+            except SystemExit:
+                console.print(f"[red]✗[/red] Validation failed")
+                sys.exit(1)
+            except Exception as e:
+                console.print(f"[red]✗[/red] Validation failed")
+                console.print(f"  [dim]{str(e).strip()}[/dim]")
+                sys.exit(1)
+    else:
+        with Status(
+            "[dim]Validating configuration (Docker)[/dim]",
+            spinner="dots",
+            spinner_style="dim",
        ):
            (
                validation_return_code,
@ -227,8 +322,13 @@ def up(file, path, foreground, with_tracing, tracing_port):
    console.print(f"[green]✓[/green] Configuration valid")

    # Set up environment
+    default_otel = (
+        DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
+        if docker
+        else DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT
+    )
    env_stage = {
-        "OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
+        "OTEL_TRACING_GRPC_ENDPOINT": default_otel,
    }
    env = os.environ.copy()
    env.pop("PATH", None)
@ -296,12 +396,19 @@ def up(file, path, foreground, with_tracing, tracing_port):
                sys.exit(1)

        # Update the OTEL endpoint so the gateway sends traces to the right port
-        env_stage[
-            "OTEL_TRACING_GRPC_ENDPOINT"
-        ] = f"http://host.docker.internal:{tracing_port}"
+        tracing_host = "host.docker.internal" if docker else "localhost"
+        otel_endpoint = f"http://{tracing_host}:{tracing_port}"
+        env_stage["OTEL_TRACING_GRPC_ENDPOINT"] = otel_endpoint

    env.update(env_stage)
    try:
+        if not docker:
+            from planoai.native_runner import start_native
+
+            start_native(
+                plano_config_file, env, foreground=foreground, with_tracing=with_tracing
+            )
+        else:
            start_plano(plano_config_file, env, foreground=foreground)

        # When tracing is enabled but --foreground is not, keep the process
@ -320,13 +427,29 @@ def up(file, path, foreground, with_tracing, tracing_port):


@click.command()
-def down():
+@click.option(
+    "--docker",
+    default=False,
+    help="Stop a Docker-based Plano instance.",
+    is_flag=True,
+)
+def down(docker):
    """Stops Plano."""
    console = _console()
    _print_cli_header(console)

+    if not docker:
+        from planoai.native_runner import stop_native
+
        with console.status(
-        f"[{PLANO_COLOR}]Shutting down Plano...[/{PLANO_COLOR}]", spinner="dots"
+            f"[{PLANO_COLOR}]Shutting down Plano...[/{PLANO_COLOR}]",
+            spinner="dots",
+        ):
+            stop_native()
+    else:
+        with console.status(
+            f"[{PLANO_COLOR}]Shutting down Plano (Docker)...[/{PLANO_COLOR}]",
+            spinner="dots",
        ):
            stop_docker_container()

--- a/cli/planoai/native_binaries.py
+++ b/cli/planoai/native_binaries.py
@ -0,0 +1,308 @@
+import gzip
+import os
+import platform
+import shutil
+import subprocess
+import sys
+import tarfile
+import tempfile
+
+import planoai
+from planoai.consts import (
+    ENVOY_VERSION,
+    PLANO_BIN_DIR,
+    PLANO_PLUGINS_DIR,
+    PLANO_RELEASE_BASE_URL,
+)
+from planoai.utils import find_repo_root, getLogger
+
+log = getLogger(__name__)
+
+
+def _get_platform_slug():
+    """Return the platform slug for binary downloads."""
+    system = platform.system().lower()
+    machine = platform.machine().lower()
+
+    mapping = {
+        ("linux", "x86_64"): "linux-amd64",
+        ("linux", "aarch64"): "linux-arm64",
+        ("darwin", "arm64"): "darwin-arm64",
+    }
+
+    slug = mapping.get((system, machine))
+    if slug is None:
+        if system == "darwin" and machine == "x86_64":
+            print(
+                "Error: macOS x86_64 (Intel) is not supported. "
+                "Pre-built binaries are only available for Apple Silicon (arm64)."
+            )
+            sys.exit(1)
+        print(
+            f"Error: Unsupported platform {system}/{machine}. "
+            "Supported platforms: linux-amd64, linux-arm64, darwin-arm64"
+        )
+        sys.exit(1)
+
+    return slug
+
+
+def _download_file(url, dest):
+    """Download a file from *url* to *dest* using curl."""
+    try:
+        subprocess.run(
+            ["curl", "-fSL", "-o", dest, url],
+            check=True,
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading: {e}")
+        print(f"URL: {url}")
+        print("Please check your internet connection and try again.")
+        sys.exit(1)
+
+
+def ensure_envoy_binary():
+    """Download Envoy binary if not already present or version changed. Returns path to binary."""
+    envoy_path = os.path.join(PLANO_BIN_DIR, "envoy")
+    version_path = os.path.join(PLANO_BIN_DIR, "envoy.version")
+
+    if os.path.exists(envoy_path) and os.access(envoy_path, os.X_OK):
+        # Check if cached binary matches the pinned version
+        if os.path.exists(version_path):
+            with open(version_path, "r") as f:
+                cached_version = f.read().strip()
+            if cached_version == ENVOY_VERSION:
+                log.info(f"Envoy {ENVOY_VERSION} found at {envoy_path}")
+                return envoy_path
+            print(
+                f"Envoy version changed ({cached_version} → {ENVOY_VERSION}), re-downloading..."
+            )
+        else:
+            log.info(
+                f"Envoy binary found at {envoy_path} (unknown version, re-downloading...)"
+            )
+
+    slug = _get_platform_slug()
+    url = (
+        f"https://github.com/tetratelabs/archive-envoy/releases/download/"
+        f"{ENVOY_VERSION}/envoy-{ENVOY_VERSION}-{slug}.tar.xz"
+    )
+
+    os.makedirs(PLANO_BIN_DIR, exist_ok=True)
+
+    print(f"Downloading Envoy {ENVOY_VERSION} for {slug}...")
+    print(f"  URL: {url}")
+
+    with tempfile.NamedTemporaryFile(suffix=".tar.xz", delete=False) as tmp:
+        tmp_path = tmp.name
+
+    try:
+        _download_file(url, tmp_path)
+
+        print("Extracting Envoy binary...")
+        with tarfile.open(tmp_path, "r:xz") as tar:
+            # Find the envoy binary inside the archive
+            envoy_member = None
+            for member in tar.getmembers():
+                if member.name.endswith("/bin/envoy") or member.name == "bin/envoy":
+                    envoy_member = member
+                    break
+
+            if envoy_member is None:
+                print("Error: Could not find envoy binary in the downloaded archive.")
+                print("Archive contents:")
+                for member in tar.getmembers():
+                    print(f"  {member.name}")
+                sys.exit(1)
+
+            # Extract just the binary
+            f = tar.extractfile(envoy_member)
+            if f is None:
+                print("Error: Could not extract envoy binary from archive.")
+                sys.exit(1)
+
+            with open(envoy_path, "wb") as out:
+                out.write(f.read())
+
+        os.chmod(envoy_path, 0o755)
+        with open(version_path, "w") as f:
+            f.write(ENVOY_VERSION)
+        print(f"Envoy {ENVOY_VERSION} installed at {envoy_path}")
+        return envoy_path
+
+    finally:
+        if os.path.exists(tmp_path):
+            os.unlink(tmp_path)
+
+
+def _find_local_wasm_plugins():
+    """Check for WASM plugins built from source. Returns (prompt_gw, llm_gw) or None."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        return None
+    wasm_dir = os.path.join(repo_root, "crates", "target", "wasm32-wasip1", "release")
+    prompt_gw = os.path.join(wasm_dir, "prompt_gateway.wasm")
+    llm_gw = os.path.join(wasm_dir, "llm_gateway.wasm")
+    if os.path.exists(prompt_gw) and os.path.exists(llm_gw):
+        return prompt_gw, llm_gw
+    return None
+
+
+def _find_local_brightstaff():
+    """Check for brightstaff binary built from source. Returns path or None."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        return None
+    path = os.path.join(repo_root, "crates", "target", "release", "brightstaff")
+    if os.path.exists(path) and os.access(path, os.X_OK):
+        return path
+    return None
+
+
+def ensure_wasm_plugins():
+    """Find or download WASM plugins. Checks: local build → cached download → fresh download."""
+    # 1. Local source build (inside repo)
+    local = _find_local_wasm_plugins()
+    if local:
+        log.info(f"Using locally-built WASM plugins: {local[0]}")
+        return local
+
+    # 2. Cached download
+    version = planoai.__version__
+    version_path = os.path.join(PLANO_PLUGINS_DIR, "wasm.version")
+    prompt_gw_path = os.path.join(PLANO_PLUGINS_DIR, "prompt_gateway.wasm")
+    llm_gw_path = os.path.join(PLANO_PLUGINS_DIR, "llm_gateway.wasm")
+
+    if os.path.exists(prompt_gw_path) and os.path.exists(llm_gw_path):
+        if os.path.exists(version_path):
+            with open(version_path, "r") as f:
+                cached_version = f.read().strip()
+            if cached_version == version:
+                log.info(f"WASM plugins {version} found at {PLANO_PLUGINS_DIR}")
+                return prompt_gw_path, llm_gw_path
+            print(
+                f"WASM plugins version changed ({cached_version} → {version}), re-downloading..."
+            )
+        else:
+            log.info("WASM plugins found (unknown version, re-downloading...)")
+
+    # 3. Download from GitHub releases (gzipped)
+    os.makedirs(PLANO_PLUGINS_DIR, exist_ok=True)
+
+    for name, dest in [
+        ("prompt_gateway.wasm", prompt_gw_path),
+        ("llm_gateway.wasm", llm_gw_path),
+    ]:
+        gz_name = f"{name}.gz"
+        url = f"{PLANO_RELEASE_BASE_URL}/{version}/{gz_name}"
+        print(f"Downloading {gz_name} ({version})...")
+        print(f"  URL: {url}")
+        gz_dest = dest + ".gz"
+        _download_file(url, gz_dest)
+        with gzip.open(gz_dest, "rb") as f_in, open(dest, "wb") as f_out:
+            shutil.copyfileobj(f_in, f_out)
+        os.unlink(gz_dest)
+        print(f"  Saved to {dest}")
+
+    with open(version_path, "w") as f:
+        f.write(version)
+
+    return prompt_gw_path, llm_gw_path
+
+
+def ensure_brightstaff_binary():
+    """Find or download brightstaff binary. Checks: local build → cached download → fresh download."""
+    # 1. Local source build (inside repo)
+    local = _find_local_brightstaff()
+    if local:
+        log.info(f"Using locally-built brightstaff: {local}")
+        return local
+
+    # 2. Cached download
+    version = planoai.__version__
+    brightstaff_path = os.path.join(PLANO_BIN_DIR, "brightstaff")
+    version_path = os.path.join(PLANO_BIN_DIR, "brightstaff.version")
+
+    if os.path.exists(brightstaff_path) and os.access(brightstaff_path, os.X_OK):
+        if os.path.exists(version_path):
+            with open(version_path, "r") as f:
+                cached_version = f.read().strip()
+            if cached_version == version:
+                log.info(f"brightstaff {version} found at {brightstaff_path}")
+                return brightstaff_path
+            print(
+                f"brightstaff version changed ({cached_version} → {version}), re-downloading..."
+            )
+        else:
+            log.info("brightstaff found (unknown version, re-downloading...)")
+
+    # 3. Download from GitHub releases (gzipped)
+    slug = _get_platform_slug()
+    filename = f"brightstaff-{slug}.gz"
+    url = f"{PLANO_RELEASE_BASE_URL}/{version}/{filename}"
+
+    os.makedirs(PLANO_BIN_DIR, exist_ok=True)
+
+    print(f"Downloading brightstaff ({version}) for {slug}...")
+    print(f"  URL: {url}")
+    gz_path = brightstaff_path + ".gz"
+    _download_file(url, gz_path)
+    with gzip.open(gz_path, "rb") as f_in, open(brightstaff_path, "wb") as f_out:
+        shutil.copyfileobj(f_in, f_out)
+    os.unlink(gz_path)
+
+    os.chmod(brightstaff_path, 0o755)
+    with open(version_path, "w") as f:
+        f.write(version)
+    print(f"brightstaff {version} installed at {brightstaff_path}")
+    return brightstaff_path
+
+
+def find_wasm_plugins():
+    """Find WASM plugin files built from source. Returns (prompt_gateway_path, llm_gateway_path)."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        print(
+            "Error: Could not find repository root. "
+            "Make sure you're inside the plano repository."
+        )
+        sys.exit(1)
+
+    wasm_dir = os.path.join(repo_root, "crates", "target", "wasm32-wasip1", "release")
+    prompt_gw = os.path.join(wasm_dir, "prompt_gateway.wasm")
+    llm_gw = os.path.join(wasm_dir, "llm_gateway.wasm")
+
+    missing = []
+    if not os.path.exists(prompt_gw):
+        missing.append("prompt_gateway.wasm")
+    if not os.path.exists(llm_gw):
+        missing.append("llm_gateway.wasm")
+
+    if missing:
+        print(f"Error: WASM plugins not found: {', '.join(missing)}")
+        print(f"  Expected at: {wasm_dir}/")
+        print("  Run 'planoai build' first to build them.")
+        sys.exit(1)
+
+    return prompt_gw, llm_gw
+
+
+def find_brightstaff_binary():
+    """Find the brightstaff binary built from source. Returns path."""
+    repo_root = find_repo_root()
+    if not repo_root:
+        print(
+            "Error: Could not find repository root. "
+            "Make sure you're inside the plano repository."
+        )
+        sys.exit(1)
+
+    brightstaff_path = os.path.join(
+        repo_root, "crates", "target", "release", "brightstaff"
+    )
+    if not os.path.exists(brightstaff_path):
+        print(f"Error: brightstaff binary not found at {brightstaff_path}")
+        print("  Run 'planoai build' first to build it.")
+        sys.exit(1)
+
+    return brightstaff_path
--- a/cli/planoai/native_runner.py
+++ b/cli/planoai/native_runner.py
@ -0,0 +1,434 @@
+import contextlib
+import io
+import json
+import os
+import signal
+import subprocess
+import sys
+import time
+
+from planoai.consts import (
+    NATIVE_PID_FILE,
+    PLANO_RUN_DIR,
+)
+from planoai.docker_cli import health_check_endpoint
+from planoai.native_binaries import (
+    ensure_brightstaff_binary,
+    ensure_envoy_binary,
+    ensure_wasm_plugins,
+)
+from planoai.utils import find_repo_root, getLogger
+
+log = getLogger(__name__)
+
+
+def _find_config_dir():
+    """Locate the directory containing plano_config_schema.yaml and envoy.template.yaml.
+
+    Checks package data first (pip-installed), then falls back to the repo checkout.
+    """
+    import planoai
+
+    pkg_data = os.path.join(os.path.dirname(planoai.__file__), "data")
+    if os.path.isdir(pkg_data) and os.path.exists(
+        os.path.join(pkg_data, "plano_config_schema.yaml")
+    ):
+        return pkg_data
+
+    repo_root = find_repo_root()
+    if repo_root:
+        config_dir = os.path.join(repo_root, "config")
+        if os.path.isdir(config_dir):
+            return config_dir
+
+    print(
+        "Error: Could not find config templates. "
+        "Make sure you're inside the plano repository or have the planoai package installed."
+    )
+    sys.exit(1)
+
+
+@contextlib.contextmanager
+def _temporary_env(overrides):
+    """Context manager that sets env vars from *overrides* and restores originals on exit."""
+    saved = {}
+    for key, value in overrides.items():
+        saved[key] = os.environ.get(key)
+        os.environ[key] = value
+    try:
+        yield
+    finally:
+        for key, original in saved.items():
+            if original is None:
+                os.environ.pop(key, None)
+            else:
+                os.environ[key] = original
+
+
+def render_native_config(plano_config_file, env, with_tracing=False):
+    """Render envoy and plano configs for native mode. Returns (envoy_config_path, plano_config_rendered_path)."""
+    import yaml
+
+    os.makedirs(PLANO_RUN_DIR, exist_ok=True)
+
+    prompt_gw_path, llm_gw_path = ensure_wasm_plugins()
+
+    # If --with-tracing, inject tracing config if not already present
+    effective_config_file = os.path.abspath(plano_config_file)
+    if with_tracing:
+        with open(plano_config_file, "r") as f:
+            config_data = yaml.safe_load(f)
+        tracing = config_data.get("tracing", {})
+        if not tracing.get("random_sampling"):
+            tracing["random_sampling"] = 100
+            config_data["tracing"] = tracing
+            effective_config_file = os.path.join(
+                PLANO_RUN_DIR, "config_with_tracing.yaml"
+            )
+            with open(effective_config_file, "w") as f:
+                yaml.dump(config_data, f, default_flow_style=False)
+
+    envoy_config_path = os.path.join(PLANO_RUN_DIR, "envoy.yaml")
+    plano_config_rendered_path = os.path.join(
+        PLANO_RUN_DIR, "plano_config_rendered.yaml"
+    )
+
+    # Set environment variables that config_generator.validate_and_render_schema() reads
+    config_dir = _find_config_dir()
+    overrides = {
+        "PLANO_CONFIG_FILE": effective_config_file,
+        "PLANO_CONFIG_SCHEMA_FILE": os.path.join(
+            config_dir, "plano_config_schema.yaml"
+        ),
+        "TEMPLATE_ROOT": config_dir,
+        "ENVOY_CONFIG_TEMPLATE_FILE": "envoy.template.yaml",
+        "PLANO_CONFIG_FILE_RENDERED": plano_config_rendered_path,
+        "ENVOY_CONFIG_FILE_RENDERED": envoy_config_path,
+    }
+
+    # Also propagate caller env vars (API keys, OTEL endpoint, etc.)
+    for key, value in env.items():
+        if key not in overrides:
+            overrides[key] = value
+
+    with _temporary_env(overrides):
+        from planoai.config_generator import validate_and_render_schema
+
+        # Suppress verbose print output from config_generator
+        with contextlib.redirect_stdout(io.StringIO()):
+            validate_and_render_schema()
+
+    # Post-process envoy.yaml: replace Docker WASM plugin paths with local paths
+    with open(envoy_config_path, "r") as f:
+        envoy_content = f.read()
+
+    envoy_content = envoy_content.replace(
+        "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm", prompt_gw_path
+    )
+    envoy_content = envoy_content.replace(
+        "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm", llm_gw_path
+    )
+
+    # Replace /var/log/ paths with local log directory (non-root friendly)
+    log_dir = os.path.join(PLANO_RUN_DIR, "logs")
+    os.makedirs(log_dir, exist_ok=True)
+    envoy_content = envoy_content.replace("/var/log/", log_dir + "/")
+
+    # Replace Linux CA cert path with platform-appropriate path
+    import platform
+
+    if platform.system() == "Darwin":
+        envoy_content = envoy_content.replace(
+            "/etc/ssl/certs/ca-certificates.crt", "/etc/ssl/cert.pem"
+        )
+
+    with open(envoy_config_path, "w") as f:
+        f.write(envoy_content)
+
+    # Run envsubst-equivalent on both rendered files using the caller's env
+    with _temporary_env(env):
+        for filepath in [envoy_config_path, plano_config_rendered_path]:
+            with open(filepath, "r") as f:
+                content = f.read()
+            content = os.path.expandvars(content)
+            with open(filepath, "w") as f:
+                f.write(content)
+
+    return envoy_config_path, plano_config_rendered_path
+
+
+def start_native(plano_config_file, env, foreground=False, with_tracing=False):
+    """Start Envoy and brightstaff natively."""
+    from planoai.core import _get_gateway_ports
+
+    console = None
+    try:
+        from rich.console import Console
+
+        console = Console()
+    except ImportError:
+        pass
+
+    def status_print(msg):
+        if console:
+            console.print(msg)
+        else:
+            print(msg)
+
+    envoy_path = ensure_envoy_binary()
+    ensure_wasm_plugins()
+    brightstaff_path = ensure_brightstaff_binary()
+    envoy_config_path, plano_config_rendered_path = render_native_config(
+        plano_config_file, env, with_tracing=with_tracing
+    )
+
+    status_print(f"[green]✓[/green] Configuration rendered")
+
+    log_dir = os.path.join(PLANO_RUN_DIR, "logs")
+    os.makedirs(log_dir, exist_ok=True)
+
+    log_level = env.get("LOG_LEVEL", "info")
+
+    # Start brightstaff
+    brightstaff_env = os.environ.copy()
+    brightstaff_env["RUST_LOG"] = log_level
+    brightstaff_env["PLANO_CONFIG_PATH_RENDERED"] = plano_config_rendered_path
+    # Propagate API keys and other env vars
+    for key, value in env.items():
+        brightstaff_env[key] = value
+
+    brightstaff_pid = _daemon_exec(
+        [brightstaff_path],
+        brightstaff_env,
+        os.path.join(log_dir, "brightstaff.log"),
+    )
+    log.info(f"Started brightstaff (PID {brightstaff_pid})")
+
+    # Start envoy
+    envoy_pid = _daemon_exec(
+        [
+            envoy_path,
+            "-c",
+            envoy_config_path,
+            "--component-log-level",
+            f"wasm:{log_level}",
+            "--log-format",
+            "[%Y-%m-%d %T.%e][%l] %v",
+        ],
+        brightstaff_env,
+        os.path.join(log_dir, "envoy.log"),
+    )
+    log.info(f"Started envoy (PID {envoy_pid})")
+
+    # Save PIDs
+    os.makedirs(PLANO_RUN_DIR, exist_ok=True)
+    with open(NATIVE_PID_FILE, "w") as f:
+        json.dump(
+            {
+                "envoy_pid": envoy_pid,
+                "brightstaff_pid": brightstaff_pid,
+            },
+            f,
+        )
+
+    # Health check
+    gateway_ports = _get_gateway_ports(plano_config_file)
+    status_print(f"[dim]Waiting for listeners to become healthy...[/dim]")
+
+    start_time = time.time()
+    timeout = 60
+    while True:
+        all_healthy = True
+        for port in gateway_ports:
+            if not health_check_endpoint(f"http://localhost:{port}/healthz"):
+                all_healthy = False
+
+        if all_healthy:
+            status_print(f"[green]✓[/green] Plano is running (native mode)")
+            for port in gateway_ports:
+                status_print(f"  [cyan]http://localhost:{port}[/cyan]")
+            break
+
+        # Check if processes are still alive
+        if not _is_pid_alive(brightstaff_pid):
+            status_print("[red]✗[/red] brightstaff exited unexpectedly")
+            status_print(f"  Check logs: {os.path.join(log_dir, 'brightstaff.log')}")
+            _kill_pid(envoy_pid)
+            sys.exit(1)
+
+        if not _is_pid_alive(envoy_pid):
+            status_print("[red]✗[/red] envoy exited unexpectedly")
+            status_print(f"  Check logs: {os.path.join(log_dir, 'envoy.log')}")
+            _kill_pid(brightstaff_pid)
+            sys.exit(1)
+
+        if time.time() - start_time > timeout:
+            status_print(f"[red]✗[/red] Health check timed out after {timeout}s")
+            status_print(f"  Check logs in: {log_dir}")
+            stop_native()
+            sys.exit(1)
+
+        time.sleep(1)
+
+    if foreground:
+        status_print(f"[dim]Running in foreground. Press Ctrl+C to stop.[/dim]")
+        status_print(f"[dim]Logs: {log_dir}[/dim]")
+        try:
+            import glob
+
+            access_logs = sorted(glob.glob(os.path.join(log_dir, "access_*.log")))
+            tail_proc = subprocess.Popen(
+                [
+                    "tail",
+                    "-f",
+                    os.path.join(log_dir, "envoy.log"),
+                    os.path.join(log_dir, "brightstaff.log"),
+                ]
+                + access_logs,
+                stdout=sys.stdout,
+                stderr=sys.stderr,
+            )
+            tail_proc.wait()
+        except KeyboardInterrupt:
+            status_print(f"\n[dim]Stopping Plano...[/dim]")
+            if tail_proc.poll() is None:
+                tail_proc.terminate()
+            stop_native()
+    else:
+        status_print(f"[dim]Logs: {log_dir}[/dim]")
+        status_print(f"[dim]Run 'planoai down' to stop.[/dim]")
+
+
+def _daemon_exec(args, env, log_path):
+    """Start a fully daemonized process via double-fork. Returns the child PID."""
+    log_fd = os.open(log_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o644)
+
+    pid = os.fork()
+    if pid > 0:
+        # Parent: close our copy of the log fd and wait for intermediate child
+        os.close(log_fd)
+        os.waitpid(pid, 0)
+        # Read the grandchild PID from the pipe
+        grandchild_pid_path = os.path.join(PLANO_RUN_DIR, f".daemon_pid_{pid}")
+        deadline = time.time() + 5
+        while time.time() < deadline:
+            if os.path.exists(grandchild_pid_path):
+                with open(grandchild_pid_path, "r") as f:
+                    grandchild_pid = int(f.read().strip())
+                os.unlink(grandchild_pid_path)
+                return grandchild_pid
+            time.sleep(0.05)
+        raise RuntimeError(f"Timed out waiting for daemon PID from {args[0]}")
+
+    # First child: create new session and fork again
+    os.setsid()
+    grandchild_pid = os.fork()
+    if grandchild_pid > 0:
+        # Intermediate child: write grandchild PID and exit
+        pid_path = os.path.join(PLANO_RUN_DIR, f".daemon_pid_{os.getpid()}")
+        with open(pid_path, "w") as f:
+            f.write(str(grandchild_pid))
+        os._exit(0)
+
+    # Grandchild: this is the actual daemon
+    os.dup2(log_fd, 1)  # stdout -> log
+    os.dup2(log_fd, 2)  # stderr -> log
+    os.close(log_fd)
+    # Close stdin
+    devnull = os.open(os.devnull, os.O_RDONLY)
+    os.dup2(devnull, 0)
+    os.close(devnull)
+
+    os.execve(args[0], args, env)
+
+
+def _is_pid_alive(pid):
+    """Check if a process with the given PID is still running."""
+    try:
+        os.kill(pid, 0)
+        return True
+    except ProcessLookupError:
+        return False
+    except PermissionError:
+        return True  # Process exists but we can't signal it
+
+
+def _kill_pid(pid):
+    """Send SIGTERM to a PID, ignoring errors."""
+    try:
+        os.kill(pid, signal.SIGTERM)
+    except (ProcessLookupError, PermissionError):
+        pass
+
+
+def stop_native():
+    """Stop natively-running Envoy and brightstaff processes."""
+    if not os.path.exists(NATIVE_PID_FILE):
+        print("No native Plano instance found (PID file missing).")
+        return
+
+    with open(NATIVE_PID_FILE, "r") as f:
+        pids = json.load(f)
+
+    envoy_pid = pids.get("envoy_pid")
+    brightstaff_pid = pids.get("brightstaff_pid")
+
+    for name, pid in [("envoy", envoy_pid), ("brightstaff", brightstaff_pid)]:
+        if pid is None:
+            continue
+        try:
+            os.kill(pid, signal.SIGTERM)
+            log.info(f"Sent SIGTERM to {name} (PID {pid})")
+        except ProcessLookupError:
+            log.info(f"{name} (PID {pid}) already stopped")
+            continue
+        except PermissionError:
+            log.info(f"Permission denied stopping {name} (PID {pid})")
+            continue
+
+        # Wait for graceful shutdown
+        deadline = time.time() + 10
+        while time.time() < deadline:
+            try:
+                os.kill(pid, 0)  # Check if still alive
+                time.sleep(0.5)
+            except ProcessLookupError:
+                break
+        else:
+            # Still alive after timeout, force kill
+            try:
+                os.kill(pid, signal.SIGKILL)
+                log.info(f"Sent SIGKILL to {name} (PID {pid})")
+            except ProcessLookupError:
+                pass
+
+    os.unlink(NATIVE_PID_FILE)
+    print("Plano stopped (native mode).")
+
+
+def native_validate_config(plano_config_file):
+    """Validate config in-process without Docker."""
+    config_dir = _find_config_dir()
+
+    # Create temp dir for rendered output (we just want validation)
+    os.makedirs(PLANO_RUN_DIR, exist_ok=True)
+
+    overrides = {
+        "PLANO_CONFIG_FILE": os.path.abspath(plano_config_file),
+        "PLANO_CONFIG_SCHEMA_FILE": os.path.join(
+            config_dir, "plano_config_schema.yaml"
+        ),
+        "TEMPLATE_ROOT": config_dir,
+        "ENVOY_CONFIG_TEMPLATE_FILE": "envoy.template.yaml",
+        "PLANO_CONFIG_FILE_RENDERED": os.path.join(
+            PLANO_RUN_DIR, "plano_config_rendered.yaml"
+        ),
+        "ENVOY_CONFIG_FILE_RENDERED": os.path.join(PLANO_RUN_DIR, "envoy.yaml"),
+    }
+
+    with _temporary_env(overrides):
+        from planoai.config_generator import validate_and_render_schema
+
+        # Suppress verbose print output from config_generator
+        with contextlib.redirect_stdout(io.StringIO()):
+            validate_and_render_schema()
--- a/cli/planoai/templates/coding_agent_routing.yaml
+++ b/cli/planoai/templates/coding_agent_routing.yaml
@ -23,7 +23,7 @@ model_providers:

  # Ollama Models
  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434


 # Model aliases - friendly names that map to actual provider names
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@ -37,6 +37,10 @@ path = "planoai/__init__.py"
 [tool.hatch.build.targets.wheel]
 packages = ["planoai"]

+[tool.hatch.build.targets.wheel.force-include]
+"../config/plano_config_schema.yaml" = "planoai/data/plano_config_schema.yaml"
+"../config/envoy.template.yaml" = "planoai/data/envoy.template.yaml"
+
 [tool.hatch.build.targets.sdist]
 include = ["planoai/**"]

--- a/config/test_passthrough.yaml
+++ b/config/test_passthrough.yaml
@ -6,8 +6,8 @@
 # that manage their own API key validation.
 #
 # To test:
-#   docker build -t plano-passthrough-test .
-#   docker run -d -p 10000:10000 -v $(pwd)/config/test_passthrough.yaml:/app/plano_config.yaml plano-passthrough-test
+#   pip install planoai
+#   planoai up config/test_passthrough.yaml
 #
 #   curl http://localhost:10000/v1/chat/completions \
 #     -H "Authorization: Bearer sk-your-virtual-key" \
--- a/config/validate_plano_config.sh
+++ b/config/validate_plano_config.sh
@ -1,20 +1,32 @@
 #!/bin/bash

+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 failed_files=()

 for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do
  echo "Validating ${file}..."
-  touch $(pwd)/${file}_rendered
-  if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.9} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then
+  rendered_file="$(pwd)/${file}_rendered"
+  touch "$rendered_file"
+
+  PLANO_CONFIG_FILE="$(pwd)/${file}" \
+  PLANO_CONFIG_SCHEMA_FILE="${SCRIPT_DIR}/plano_config_schema.yaml" \
+  TEMPLATE_ROOT="${SCRIPT_DIR}" \
+  ENVOY_CONFIG_TEMPLATE_FILE="envoy.template.yaml" \
+  PLANO_CONFIG_FILE_RENDERED="$rendered_file" \
+  ENVOY_CONFIG_FILE_RENDERED="/dev/null" \
+  python -m planoai.config_generator 2>&1 > /dev/null
+
+  if [ $? -ne 0 ]; then
    echo "Validation failed for $file"
    failed_files+=("$file")
  fi
+
  RENDERED_CHECKED_IN_FILE=$(echo $file | sed 's/\.yaml$/_rendered.yaml/')
  if [ -f "$RENDERED_CHECKED_IN_FILE" ]; then
    echo "Checking rendered file against checked-in version..."
-    if ! diff -q "${file}_rendered" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
-      echo "Rendered file ${file}_rendered does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
-      failed_files+=("${file}_rendered")
+    if ! diff -q "$rendered_file" "$RENDERED_CHECKED_IN_FILE" > /dev/null; then
+      echo "Rendered file $rendered_file does not match checked-in version ${RENDERED_CHECKED_IN_FILE}"
+      failed_files+=("$rendered_file")
    else
      echo "Rendered file matches checked-in version."
    fi
--- a/demos/advanced/multi_turn_rag/config.yaml
+++ b/demos/advanced/multi_turn_rag/config.yaml
@ -7,7 +7,7 @@ listeners:

 endpoints:
  rag_energy_source_agent:
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
    connect_timeout: 0.005s

 model_providers:
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@ -38,18 +38,17 @@ Plano acts as a **framework-agnostic proxy and data plane** that:
 ```bash
 # From the demo directory
 cd demos/agent_orchestration/multi_agent_crewai_langchain
-
-# Build and start all services
-docker-compose up -d
+./run_demo.sh
 ```

-This starts:
- **Plano** (ports 12000, 8001) - routing and orchestration
+This starts Plano natively and brings up via Docker Compose:
 - **CrewAI Flight Agent** (port 10520) - flight search
 - **LangChain Weather Agent** (port 10510) - weather forecasts
 - **AnythingLLM** (port 3001) - chat interface
 - **Jaeger** (port 16686) - distributed tracing

+Plano runs natively on the host (ports 12000, 8001).
+
 ### Try It Out

 1. **Open the Chat Interface**
@ -116,7 +115,7 @@ This starts:
 ## Cleanup

 ```bash
-docker-compose down
+./run_demo.sh down
 ```

 ## Next Steps
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
@ -1,21 +1,5 @@

 services:
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "8001:8001"
-      - "12000:12000"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
-      - LOG_LEVEL=${LOG_LEVEL:-info}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
  crewai-flight-agent:
    build:
      dockerfile: Dockerfile
@ -23,7 +7,7 @@ services:
    ports:
      - "10520:10520"
    environment:
-      - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
      - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set}
      - PYTHONUNBUFFERED=1
    command: ["python", "-u", "crewai/flight_agent.py"]
@ -35,7 +19,7 @@ services:
    ports:
      - "10510:10510"
    environment:
-      - LLM_GATEWAY_ENDPOINT=http://plano:12000/v1
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
    command: ["python", "-u", "langchain/weather_agent.py"]

  anythingllm:
@ -48,7 +32,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh
@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$AEROAPI_KEY" ]; then
+      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start agents and services
+  echo "Starting agents using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@ -9,7 +9,7 @@ This demo consists of two intelligent agents that work together seamlessly:
 - **Weather Agent** - Real-time weather conditions and multi-day forecasts for any city worldwide
 - **Flight Agent** - Live flight information between airports with real-time tracking

-All agents use Plano's agent orchestration LLM to intelligently route user requests to the appropriate specialized agent based on conversation context and user intent. Both agents run as Docker containers for easy deployment.
+All agents use Plano's agent orchestration LLM to intelligently route user requests to the appropriate specialized agent based on conversation context and user intent.

 ## Features

@ -22,8 +22,8 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque

 ## Prerequisites

- Docker and Docker Compose
- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed
+- [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`)
+- Docker and Docker Compose (for agent services)
 - [OpenAI API key](https://platform.openai.com/api-keys)
 - [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal)

@ -40,17 +40,18 @@ export AEROAPI_KEY="your-flightaware-api-key"
 export OPENAI_API_KEY="your OpenAI api key"
 ```

-### 2. Start All Agents & Plano with Docker
+### 2. Start the Demo

 ```bash
-docker compose up --build
+./run_demo.sh
 ```

-This starts:
+This starts Plano natively and brings up via Docker Compose:
 - Weather Agent on port 10510
 - Flight Agent on port 10520
 - Open WebUI on port 8080
- Plano Proxy on port 8001
+
+Plano runs natively on the host (port 8001).

 ### 4. Test the System

@ -92,7 +93,7 @@ Assistant: [Both weather_agent and flight_agent respond simultaneously]
 Weather     Flight
  Agent       Agent
 (10510)     (10520)
- [Docker]    [Docker]
+ (10510)     (10520)
 ```

 Each agent:
@ -101,7 +102,7 @@ Each agent:
 3. Generates response using GPT-5.2
 4. Streams response back to user

-Both agents run as Docker containers and communicate with Plano via `host.docker.internal`.
+Both agents run as Docker containers and communicate with Plano running natively on the host.

 ## Observability

--- a/demos/agent_orchestration/travel_agents/config.yaml
+++ b/demos/agent_orchestration/travel_agents/config.yaml
@ -2,9 +2,9 @@ version: v0.3.0

 agents:
  - id: weather_agent
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510
  - id: flight_agent
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520

 model_providers:
  - model: openai/gpt-5.2
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
@ -1,18 +1,5 @@

 services:
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
  weather-agent:
    build:
      context: .
--- a/demos/agent_orchestration/travel_agents/run_demo.sh
+++ b/demos/agent_orchestration/travel_agents/run_demo.sh
@ -0,0 +1,51 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$AEROAPI_KEY" ]; then
+      echo "Error: AEROAPI_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start agents and services
+  echo "Starting agents using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
@ -35,21 +35,21 @@ This demo consists of four components:

 ## Quick Start

-### 1. Start everything with Docker Compose
+### 1. Start the demo
 ```bash
-docker compose up --build
+export OPENAI_API_KEY="your-key"
+./run_demo.sh
 ```

-This brings up:
+This starts Plano natively and brings up via Docker Compose:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
- Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries

-> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
+Plano runs natively on the host (port 8001 and 12000).

 ### 2. Test the system

@ -74,16 +74,16 @@ The `config.yaml` defines how agents are connected:
 ```yaml
 filters:
  - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # tool: input_guards (default - same as filter id)

  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp (default)

  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 ```

 ## How It Works
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
@ -11,19 +11,6 @@ services:
    environment:
      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
  jaeger:
    build:
      context: ../../shared/jaeger
@ -41,7 +28,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/filter_chains/http_filter/run_demo.sh
+++ b/demos/filter_chains/http_filter/run_demo.sh
@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
@ -35,21 +35,21 @@ This demo consists of four components:

 ## Quick Start

-### 1. Start everything with Docker Compose
+### 1. Start the demo
 ```bash
-docker compose up --build
+export OPENAI_API_KEY="your-key"
+./run_demo.sh
 ```

-This brings up:
+This starts Plano natively and brings up via Docker Compose:
 - Input Guards MCP server on port 10500
 - Query Rewriter MCP server on port 10501
 - Context Builder MCP server on port 10502
 - RAG Agent REST server on port 10505
- Plano listener on port 8001 (and gateway on 12000)
 - Jaeger UI for viewing traces at http://localhost:16686
 - AnythingLLM at http://localhost:3001 for interactive queries

-> Set `OPENAI_API_KEY` in your environment before running; `LLM_GATEWAY_ENDPOINT` defaults to `http://host.docker.internal:12000/v1`.
+Plano runs natively on the host (port 8001 and 12000).

 ### 2. Test the system

@ -74,16 +74,16 @@ The `config.yaml` defines how agents are connected:
 ```yaml
 filters:
  - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # tool: input_guards (default - same as filter id)

  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp (default)

  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502
 ```

 ## How It Works
--- a/demos/filter_chains/mcp_filter/config.yaml
+++ b/demos/filter_chains/mcp_filter/config.yaml
@ -2,21 +2,21 @@ version: v0.3.0

 agents:
  - id: rag_agent
-    url: http://host.docker.internal:10505
+    url: http://localhost:10505

 filters:
  - id: input_guards
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: input_guards (default - same as filter id)
  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: query_rewriter (default - same as filter id)
  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502

 model_providers:
  - model: openai/gpt-4o-mini
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
@ -11,21 +11,6 @@ services:
    environment:
      - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1}
      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "11000:11000"
-      - "12001:12001"
-      - "12000:12000"
-      - "8001:8001"
-    environment:
-      - PLANO_CONFIG_PATH=/config/config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
  jaeger:
    build:
      context: ../../shared/jaeger
@ -43,7 +28,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:8001/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:8001/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/filter_chains/mcp_filter/run_demo.sh
+++ b/demos/filter_chains/mcp_filter/run_demo.sh
@ -0,0 +1,46 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set OpenAI key
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    echo ".env file created with OPENAI_API_KEY."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/getting_started/llm_gateway/config.yaml
+++ b/demos/getting_started/llm_gateway/config.yaml
@ -44,7 +44,7 @@ model_providers:
    access_key: $TOGETHER_API_KEY

  - model: custom/test-model
-    base_url: http://host.docker.internal:11223
+    base_url: http://localhost:11223
    provider_interface: openai

 tracing:
--- a/demos/getting_started/llm_gateway/docker-compose.yaml
+++ b/demos/getting_started/llm_gateway/docker-compose.yaml
@ -1,20 +1,5 @@
 services:

-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "12001:12001"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
@ -25,7 +10,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/getting_started/weather_forecast/config.yaml
+++ b/demos/getting_started/weather_forecast/config.yaml
@ -11,7 +11,7 @@ listeners:

 endpoints:
  weather_forecast_service:
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
    connect_timeout: 0.005s

 overrides:
--- a/demos/integrations/ollama/config.yaml
+++ b/demos/integrations/ollama/config.yaml
@ -9,7 +9,7 @@ model_providers:

  - model: my_llm_provider/llama3.2
    provider_interface: openai
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434
    default: true

 system_prompt: |
--- a/demos/llm_routing/claude_code_router/README.md
+++ b/demos/llm_routing/claude_code_router/README.md
@ -39,8 +39,8 @@ Your Request → Plano → Suitable Model → Response
 # Install Claude Code if you haven't already
 npm install -g @anthropic-ai/claude-code

-# Ensure Docker is running
-docker --version
+# Install Plano CLI
+pip install planoai
 ```

 ### Step 1: Get Configuration
--- a/demos/llm_routing/claude_code_router/config.yaml
+++ b/demos/llm_routing/claude_code_router/config.yaml
@ -28,7 +28,7 @@ model_providers:

  # Ollama Models
  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434


 # Model aliases - friendly names that map to actual provider names
--- a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+++ b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml
@ -49,7 +49,7 @@ model_providers:

  # Ollama Models
  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434

  # Grok (xAI) Models
  - model: xai/grok-4-0709
--- a/demos/llm_routing/openclaw_routing/README.md
+++ b/demos/llm_routing/openclaw_routing/README.md
@ -23,7 +23,6 @@ Plano uses a [preference-aligned router](https://arxiv.org/abs/2506.16655) to an

 ## Prerequisites

- **Docker** running
 - **Plano CLI**: `uv tool install planoai` or `pip install planoai`
 - **OpenClaw**: `npm install -g openclaw@latest`
 - **API keys**:
@ -43,7 +42,7 @@ export ANTHROPIC_API_KEY="your-anthropic-key"

 ```bash
 cd demos/llm_routing/openclaw_routing
-planoai up --service plano --foreground
+planoai up config.yaml
 ```

 ### 3. Set Up OpenClaw
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@ -3,25 +3,23 @@ This demo shows how you can use user preferences to route user prompts to approp

 ## How to start the demo

-Make sure your machine is up to date with [latest version of plano]([url](https://github.com/katanemo/plano/tree/main?tab=readme-ov-file#prerequisites)). And you have activated the virtual environment.
+Make sure you have Plano CLI installed (`pip install planoai` or `uv tool install planoai`).

-
-1. start anythingllm
 ```bash
-(venv) $ cd demos/llm_routing/preference_based_routing
-(venv) $ docker compose up -d
+cd demos/llm_routing/preference_based_routing
+./run_demo.sh
 ```
-2. start plano in the foreground
+
+Or manually:
+
+1. Start Plano
 ```bash
-(venv) $ planoai up --service plano --foreground
-# Or if installed with uv: uvx planoai up --service plano --foreground
-2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.9
-2025-05-30 18:00:09,953 - planoai.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/llm_routing/preference_based_routing/config.yaml
-2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.9
-2025-05-30 18:00:10,662 - cli.core - INFO - plano status: running, health status: starting
-2025-05-30 18:00:11,712 - cli.core - INFO - plano status: running, health status: starting
-2025-05-30 18:00:12,761 - cli.core - INFO - plano is running and is healthy!
-...
+planoai up config.yaml
+```
+
+2. Start AnythingLLM
+```bash
+docker compose up -d
 ```

 3. open AnythingLLM http://localhost:3001/
--- a/demos/llm_routing/preference_based_routing/docker-compose.yaml
+++ b/demos/llm_routing/preference_based_routing/docker-compose.yaml
@ -1,23 +1,5 @@
 services:

-  plano:
-    build:
-      context: ../../../
-      dockerfile: Dockerfile
-    ports:
-      - "12000:12000"
-      - "12001:12001"
-    environment:
-      - PLANO_CONFIG_PATH=/app/plano_config.yaml
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
-      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:?ANTHROPIC_API_KEY environment variable is required but not set}
-      - OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
-      - OTEL_TRACING_ENABLED=true
-      - RUST_LOG=debug
-    volumes:
-      - ./config.yaml:/app/plano_config.yaml:ro
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-
  anythingllm:
    image: mintplexlabs/anythingllm
    restart: always
@ -28,7 +10,7 @@ services:
    environment:
      - STORAGE_DIR=/app/server/storage
      - LLM_PROVIDER=generic-openai
-      - GENERIC_OPEN_AI_BASE_PATH=http://plano:12000/v1
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1
      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
--- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml
+++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml
@ -13,7 +13,7 @@ model_providers:

  - name: arch-router
    model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
-    base_url: http://host.docker.internal:11434
+    base_url: http://localhost:11434

  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY
--- a/demos/llm_routing/preference_based_routing/run_demo.sh
+++ b/demos/llm_routing/preference_based_routing/run_demo.sh
@ -0,0 +1,52 @@
+#!/bin/bash
+set -e
+
+# Function to start the demo
+start_demo() {
+  # Step 1: Check if .env file exists
+  if [ -f ".env" ]; then
+    echo ".env file already exists. Skipping creation."
+  else
+    # Step 2: Create `.env` file and set API keys
+    if [ -z "$OPENAI_API_KEY" ]; then
+      echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
+      exit 1
+    fi
+    if [ -z "$ANTHROPIC_API_KEY" ]; then
+      echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
+    fi
+
+    echo "Creating .env file..."
+    echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
+    if [ -n "$ANTHROPIC_API_KEY" ]; then
+      echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
+    fi
+    echo ".env file created with API keys."
+  fi
+
+  # Step 3: Start Plano
+  echo "Starting Plano with config.yaml..."
+  planoai up config.yaml
+
+  # Step 4: Start services
+  echo "Starting services using Docker Compose..."
+  docker compose up -d
+}
+
+# Function to stop the demo
+stop_demo() {
+  # Step 1: Stop Docker Compose services
+  echo "Stopping Docker Compose services..."
+  docker compose down
+
+  # Step 2: Stop Plano
+  echo "Stopping Plano..."
+  planoai down
+}
+
+# Main script logic
+if [ "$1" == "down" ]; then
+  stop_demo
+else
+  start_demo
+fi
--- a/demos/shared/test_runner/run_demo_tests.sh
+++ b/demos/shared/test_runner/run_demo_tests.sh
@ -21,7 +21,7 @@ do
  echo "****************************************"
  cd ../../$demo
  echo "starting plano"
-  planoai up config.yaml
+  planoai up --docker config.yaml
  echo "starting docker containers"
  # only execute docker compose if demo is llm_routing/preference_based_routing
  if [ "$demo" == "llm_routing/preference_based_routing" ]; then
@ -38,7 +38,7 @@ do
    exit 1
  fi
  echo "stopping docker containers and plano"
-  planoai down
+  planoai down --docker
  docker compose down -v
  cd ../../shared/test_runner
 done
--- a/docs/source/build_with_plano/includes/agent/function-calling-agent.yaml
+++ b/docs/source/build_with_plano/includes/agent/function-calling-agent.yaml
@ -54,6 +54,6 @@ endpoints:
    # value could be ip address or a hostname with port
    # this could also be a list of endpoints for load balancing
    # for example endpoint: [ ip1:port, ip2:port ]
-    endpoint: host.docker.internal:18083
+    endpoint: localhost:18083
    # max time to wait for a connection to be established
    connect_timeout: 0.005s
--- a/docs/source/concepts/llm_providers/model_aliases.rst
+++ b/docs/source/concepts/llm_providers/model_aliases.rst
@ -32,7 +32,7 @@ Basic Configuration
        access_key: $ANTHROPIC_API_KEY

      - model: ollama/llama3.1
-        base_url: http://host.docker.internal:11434
+        base_url: http://localhost:11434

    # Define aliases that map to the models above
    model_aliases:
--- a/docs/source/concepts/llm_providers/supported_providers.rst
+++ b/docs/source/concepts/llm_providers/supported_providers.rst
@ -598,9 +598,9 @@ Ollama
      - model: ollama/llama3.1
        base_url: http://localhost:11434

-      # Ollama in Docker (from host)
+      # Ollama running locally
      - model: ollama/codellama
-        base_url: http://host.docker.internal:11434
+        base_url: http://localhost:11434


 OpenAI-Compatible Providers
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@ -17,11 +17,17 @@ Follow this guide to learn how to quickly set up Plano and integrate it into you
 Prerequisites
 -------------

-Before you begin, ensure you have the following:
+Plano runs **natively** by default — no Docker or Rust toolchain required. Pre-compiled binaries are downloaded automatically on first run.
+
+1. `Python <https://www.python.org/downloads/>`_ (v3.10+)
+2. Supported platforms: Linux (x86_64, aarch64), macOS (Apple Silicon)
+
+**Docker mode** (optional):
+
+If you prefer to run inside Docker, add ``--docker`` to ``planoai up`` / ``planoai down``. This requires:

 1. `Docker System <https://docs.docker.com/get-started/get-docker/>`_ (v24)
 2. `Docker Compose <https://docs.docker.com/compose/install/>`_ (v2.29)
-3. `Python <https://www.python.org/downloads/>`_ (v3.10+)

 Plano's CLI allows you to manage and interact with the Plano efficiently. To install the CLI, simply run the following command:

@ -84,17 +90,20 @@ Step 2. Start plano

 Once the config file is created, ensure that you have environment variables set up for ``ANTHROPIC_API_KEY`` and ``OPENAI_API_KEY`` (or these are defined in a ``.env`` file).

-Start Plano:
-
 .. code-block:: console

   $ planoai up plano_config.yaml
-   # Or if installed with uv tool: uvx planoai up plano_config.yaml
-   2024-12-05 11:24:51,288 - planoai.main - INFO - Starting plano cli version: 0.4.9
-   2024-12-05 11:24:51,825 - planoai.utils - INFO - Schema validation successful!
-   2024-12-05 11:24:51,825 - planoai.main - INFO - Starting plano
-   ...
-   2024-12-05 11:25:16,131 - planoai.core - INFO - Container is healthy!
+
+On the first run, Plano automatically downloads Envoy, WASM plugins, and brightstaff and caches them at ``~/.plano/``.
+
+To stop Plano, run ``planoai down``.
+
+**Docker mode** (optional):
+
+.. code-block:: console
+
+   $ planoai up plano_config.yaml --docker
+   $ planoai down --docker

 Step 3: Interact with LLM
 ~~~~~~~~~~~~~~~~~~~~~~~~~
@ -185,9 +194,9 @@ Here is a minimal configuration that wires Plano-Orchestrator to two HTTP servic

  agents:
    - id: flight_agent
-      url: http://host.docker.internal:10520  # your flights service
+      url: http://localhost:10520  # your flights service
    - id: hotel_agent
-      url: http://host.docker.internal:10530  # your hotels service
+      url: http://localhost:10530  # your hotels service

  model_providers:
    - model: openai/gpt-4o
--- a/docs/source/guides/observability/monitoring.rst
+++ b/docs/source/guides/observability/monitoring.rst
@ -59,7 +59,7 @@ are some sample configuration files for both, respectively.
        scheme: http
        static_configs:
        - targets:
-            - host.docker.internal:19901
+            - localhost:19901
        params:
        format: ["prometheus"]

--- a/docs/source/resources/deployment.rst
+++ b/docs/source/resources/deployment.rst
@ -3,7 +3,47 @@
 Deployment
 ==========

-This guide shows how to deploy Plano directly using Docker without the ``plano`` CLI, including basic runtime checks for routing and health monitoring.
+Plano can be deployed in two ways: **natively** on the host (default) or inside a **Docker container**.
+
+Native Deployment (Default)
+---------------------------
+
+Plano runs natively by default. Pre-compiled binaries (Envoy, WASM plugins, brightstaff) are automatically downloaded on the first run and cached at ``~/.plano/``.
+
+Supported platforms: Linux (x86_64, aarch64), macOS (Apple Silicon).
+
+Start Plano
+~~~~~~~~~~~~
+
+.. code-block:: bash
+
+   planoai up plano_config.yaml
+
+Options:
+
+- ``--foreground`` — stay attached and stream logs (Ctrl+C to stop)
+- ``--with-tracing`` — start a local OTLP trace collector
+
+Runtime files (rendered configs, logs, PID file) are stored in ``~/.plano/run/``.
+
+Stop Plano
+~~~~~~~~~~
+
+.. code-block:: bash
+
+   planoai down
+
+Build from Source (Developer)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you want to build from source instead of using pre-compiled binaries, you need:
+
+- `Rust <https://rustup.rs>`_ with the ``wasm32-wasip1`` target
+- OpenSSL dev headers (``libssl-dev`` on Debian/Ubuntu, ``openssl`` on macOS)
+
+.. code-block:: bash
+
+   planoai build --native

 Docker Deployment
 -----------------
@ -53,6 +93,13 @@ Check container health and logs:
   docker compose ps
   docker compose logs -f plano

+You can also use the CLI with Docker mode:
+
+.. code-block:: bash
+
+   planoai up plano_config.yaml --docker
+   planoai down --docker
+
 Runtime Tests
 -------------

--- a/docs/source/resources/includes/agents/agents_config.yaml
+++ b/docs/source/resources/includes/agents/agents_config.yaml
@ -2,9 +2,9 @@ version: v0.3.0

 agents:
  - id: weather_agent
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510
  - id: flight_agent
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520

 model_providers:
  - model: openai/gpt-4o
--- a/docs/source/resources/includes/plano_config_agents_filters.yaml
+++ b/docs/source/resources/includes/plano_config_agents_filters.yaml
@ -2,16 +2,16 @@ version: v0.3.0

 agents:
  - id: rag_agent
-    url: http://host.docker.internal:10505
+    url: http://localhost:10505

 filters:
  - id: query_rewriter
-    url: http://host.docker.internal:10501
+    url: http://localhost:10501
    # type: mcp # default is mcp
    # transport: streamable-http # default is streamable-http
    # tool: query_rewriter # default name is the filter id
  - id: context_builder
-    url: http://host.docker.internal:10502
+    url: http://localhost:10502

 model_providers:
  - model: openai/gpt-4o-mini
--- a/docs/source/resources/includes/plano_config_full_reference.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference.yaml
@ -4,15 +4,15 @@ version: v0.3.0
 # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
 agents:
  - id: weather_agent # Example agent for weather
-    url: http://host.docker.internal:10510
+    url: http://localhost:10510

  - id: flight_agent # Example agent for flights
-    url: http://host.docker.internal:10520
+    url: http://localhost:10520

 # MCP filters applied to requests/responses (e.g., input validation, query rewriting)
 filters:
  - id: input_guards # Example filter for input validation
-    url: http://host.docker.internal:10500
+    url: http://localhost:10500
    # type: mcp (default)
    # transport: streamable-http (default)
    # tool: input_guards (default - same as filter id)
--- a/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/plano_config_full_reference_rendered.yaml
@ -1,31 +1,31 @@
 agents:
 - id: weather_agent
-  url: http://host.docker.internal:10510
+  url: http://localhost:10510
 - id: flight_agent
-  url: http://host.docker.internal:10520
+  url: http://localhost:10520
 endpoints:
  app_server:
    connect_timeout: 0.005s
    endpoint: 127.0.0.1
    port: 80
  flight_agent:
-    endpoint: host.docker.internal
+    endpoint: localhost
    port: 10520
    protocol: http
  input_guards:
-    endpoint: host.docker.internal
+    endpoint: localhost
    port: 10500
    protocol: http
  mistral_local:
    endpoint: 127.0.0.1
    port: 8001
  weather_agent:
-    endpoint: host.docker.internal
+    endpoint: localhost
    port: 10510
    protocol: http
 filters:
 - id: input_guards
-  url: http://host.docker.internal:10500
+  url: http://localhost:10500
 listeners:
 - address: 0.0.0.0
  agents:
@ -130,6 +130,6 @@ prompt_targets:
    required: true
    type: int
 tracing:
-  opentracing_grpc_endpoint: http://host.docker.internal:4317
+  opentracing_grpc_endpoint: http://localhost:4317
  random_sampling: 100
 version: v0.3.0
--- a/tests/e2e/config_native_smoke.yaml
+++ b/tests/e2e/config_native_smoke.yaml
@ -0,0 +1,11 @@
+version: v0.3.0
+
+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
--- a/tests/e2e/run_e2e_tests.sh
+++ b/tests/e2e/run_e2e_tests.sh
@ -45,8 +45,8 @@ uv sync

 log startup plano gateway with function calling demo
 cd ../../
-planoai down
-planoai up demos/getting_started/weather_forecast/config.yaml
+planoai down --docker
+planoai up --docker demos/getting_started/weather_forecast/config.yaml
 cd -

 log running e2e tests for prompt gateway
@ -55,11 +55,11 @@ uv run pytest test_prompt_gateway.py

 log shutting down the plano gateway service for prompt_gateway demo
 log ===============================================================
-planoai down
+planoai down --docker

 log startup plano gateway with model alias routing demo
 cd ../../
-planoai up demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+planoai up --docker demos/llm_routing/model_alias_routing/config_with_aliases.yaml
 cd -

 log running e2e tests for model alias routing
@ -71,8 +71,8 @@ log ========================================
 uv run pytest test_openai_responses_api_client.py

 log startup plano gateway with state storage for openai responses api client demo
-planoai down
-planoai up config_memory_state_v1_responses.yaml
+planoai down --docker
+planoai up --docker config_memory_state_v1_responses.yaml

 log running e2e tests for openai responses api client
 log ========================================
--- a/tests/e2e/run_model_alias_tests.sh
+++ b/tests/e2e/run_model_alias_tests.sh
@ -36,8 +36,8 @@ uv sync
 # Start gateway with model alias routing config
 log "startup plano gateway with model alias routing demo"
 cd ../../
-planoai down || true
-planoai up demos/llm_routing/model_alias_routing/config_with_aliases.yaml
+planoai down --docker || true
+planoai up --docker demos/llm_routing/model_alias_routing/config_with_aliases.yaml
 cd -

 # Run both test suites that share this config in a single pytest invocation
@ -46,4 +46,4 @@ uv run pytest -n auto test_model_alias_routing.py test_openai_responses_api_clie

 # Cleanup
 log "shutting down"
-planoai down || true
+planoai down --docker || true
--- a/tests/e2e/run_prompt_gateway_tests.sh
+++ b/tests/e2e/run_prompt_gateway_tests.sh
@ -41,8 +41,8 @@ cd -
 # Start gateway with prompt_gateway config
 log "startup plano gateway with function calling demo"
 cd ../../
-planoai down || true
-planoai up demos/getting_started/weather_forecast/config.yaml
+planoai down --docker || true
+planoai up --docker demos/getting_started/weather_forecast/config.yaml
 cd -

 # Run tests
@ -51,7 +51,7 @@ uv run pytest test_prompt_gateway.py

 # Cleanup
 log "shutting down"
-planoai down || true
+planoai down --docker || true
 cd ../../demos/getting_started/weather_forecast
 docker compose down
 cd -
--- a/tests/e2e/run_responses_state_tests.sh
+++ b/tests/e2e/run_responses_state_tests.sh
@ -35,8 +35,8 @@ uv sync
 # Start gateway with state storage config
 log "startup plano gateway with state storage config"
 cd ../../
-planoai down || true
-planoai up tests/e2e/config_memory_state_v1_responses.yaml
+planoai down --docker || true
+planoai up --docker tests/e2e/config_memory_state_v1_responses.yaml
 cd -

 # Run tests
@ -45,4 +45,4 @@ uv run pytest test_openai_responses_api_client_with_state.py

 # Cleanup
 log "shutting down"
-planoai down || true
+planoai down --docker || true