This commit is contained in:
Musa 2026-05-31 00:23:01 +08:00 committed by GitHub
commit 4043c5e5b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 2500 additions and 76 deletions

View file

@ -0,0 +1,625 @@
"""Configure Claude Desktop to use the local Plano gateway.
Python port of Ollama's `cmd/launch/claude_desktop.go` tailored for Plano. The
flow is intentionally simpler than Ollama's:
1. Detect Claude Desktop on macOS / Windows.
2. Pick a string to put in Claude's ``inferenceGatewayApiKey`` slot (Claude
Desktop requires the field; Plano's local gateway does not enforce bearer
auth, so a placeholder is fine see ``_resolve_api_key`` for precedence).
3. Rewrite Claude Desktop config JSON files with ``.bak`` backups to switch
Claude into 3rd-party gateway mode pointed at Plano.
4. Optionally restart Claude Desktop so the changes take effect immediately.
Restoring flips ``deploymentMode`` back to ``1p`` and removes the Plano gateway
profile + meta entry.
The Claude Desktop ``deploymentMode = "3p"`` profile structure used here is
defined by Anthropic / observed via the Ollama integration; we do not control
it. We re-use the same JSON shape so Claude Desktop happily accepts the Plano
profile alongside any other third-party profile the user may have.
"""
from __future__ import annotations
import glob as _glob
import json
import os
import shutil
import subprocess
import sys
import tempfile
import time
from dataclasses import dataclass, field
from typing import Callable, Optional
from planoai.utils import getLogger
log = getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
INTEGRATION_NAME = "claude-desktop"
PROFILE_NAME = "Plano"
# Deterministic UUID-v4 distinct from Ollama's `…0114`. The trailing bytes
# spell "PlanO" in ASCII to make it easy to identify the profile in
# `_meta.json`.
PROFILE_ID = "00000000-0000-4000-8000-0000506C616E"
DEFAULT_BASE_URL = "http://localhost:12000"
SUCCESS_MESSAGE = "Claude Desktop profile changed to Plano."
RESTORE_HINT = (
"To restore the usual Claude profile, run: "
"planoai launch claude-desktop --restore"
)
RESTORED_MESSAGE = "Claude Desktop restored to the usual Claude profile."
# Placeholder Claude Desktop writes into the gateway profile when the user
# hasn't overridden it. Plano's local gateway does not enforce a bearer
# token; this string only exists so Claude Desktop has a non-empty value to
# attach to outbound requests.
DEFAULT_API_KEY = "plano"
# How long we wait for Claude Desktop to fully exit on restart.
_QUIT_TIMEOUT_SECONDS = 30
# ---------------------------------------------------------------------------
# Test seams: replace these in tests instead of monkey-patching os/subprocess.
# ---------------------------------------------------------------------------
# Platform identifier. ``"darwin"``, ``"windows"``, or anything else (which
# is treated as unsupported). Module-level so tests can override it.
def _detect_goos() -> str:
if os.name == "nt":
return "windows"
if sys.platform == "darwin":
return "darwin"
return sys.platform
_GOOS: str = _detect_goos()
_user_home: Callable[[], str] = os.path.expanduser # called as _user_home("~")
def _is_running() -> bool:
"""Return True if Claude Desktop is currently running."""
if _GOOS == "darwin":
try:
out = subprocess.run(
["pgrep", "-f", "Claude.app/Contents/MacOS/Claude"],
capture_output=True,
text=True,
check=False,
)
return out.returncode == 0 and out.stdout.strip() != ""
except FileNotFoundError:
return False
if _GOOS == "windows":
script = (
"(Get-Process claude -ErrorAction SilentlyContinue "
"| Where-Object { $_.MainWindowHandle -ne 0 } "
"| Select-Object -First 1).Id"
)
try:
out = subprocess.run(
["powershell.exe", "-NoProfile", "-Command", script],
capture_output=True,
text=True,
check=False,
)
return out.returncode == 0 and out.stdout.strip() != ""
except FileNotFoundError:
return False
return False
def _quit() -> None:
"""Ask Claude Desktop to quit gracefully."""
if _GOOS == "darwin":
subprocess.run(
["osascript", "-e", 'tell application "Claude" to quit'],
check=False,
)
return
if _GOOS == "windows":
script = (
"Get-Process claude -ErrorAction SilentlyContinue "
"| Where-Object { $_.MainWindowHandle -ne 0 } "
"| ForEach-Object { [void]$_.CloseMainWindow() }"
)
subprocess.run(
["powershell.exe", "-NoProfile", "-Command", script],
check=False,
)
def _open() -> None:
"""Launch Claude Desktop."""
if _GOOS == "darwin":
subprocess.run(["open", "-a", "Claude"], check=False)
return
if _GOOS == "windows":
path = _claude_app_path()
if not path:
raise RuntimeError(
"Claude Desktop executable was not found; open Claude Desktop "
"manually once and re-run 'planoai launch claude-desktop'"
)
ps_path = "'" + path.replace("'", "''") + "'"
subprocess.run(
[
"powershell.exe",
"-NoProfile",
"-Command",
f"Start-Process -FilePath {ps_path}",
],
check=False,
)
def _sleep(seconds: float) -> None:
time.sleep(seconds)
# ---------------------------------------------------------------------------
# Path discovery
# ---------------------------------------------------------------------------
@dataclass
class _ThirdPartyPaths:
desktop_config: str
meta: str
profile: str
@dataclass
class _Targets:
normal_configs: list[str] = field(default_factory=list)
third_party_profiles: list[_ThirdPartyPaths] = field(default_factory=list)
def supported() -> Optional[str]:
"""Return ``None`` if the platform is supported, else an error message."""
if _GOOS in ("darwin", "windows"):
return None
return "Claude Desktop launch is only supported on macOS and Windows"
def _home() -> str:
home = _user_home("~")
if home == "~" or not home:
raise RuntimeError("could not resolve user home directory")
return home
def _local_app_data() -> str:
val = (os.environ.get("LOCALAPPDATA") or "").strip()
if val:
return val
user = (os.environ.get("USERPROFILE") or "").strip()
if user:
return os.path.join(user, "AppData", "Local")
return os.path.join(_home(), "AppData", "Local")
def _darwin_profile_roots() -> tuple[list[str], list[str]]:
base = os.path.join(_home(), "Library", "Application Support")
return ([os.path.join(base, "Claude")], [os.path.join(base, "Claude-3p")])
def _windows_profile_roots() -> tuple[list[str], list[str]]:
local = _local_app_data()
normal = [
os.path.join(local, "Claude"),
os.path.join(local, "Claude Nest"),
]
third_party = [
os.path.join(local, "Claude-3p"),
os.path.join(local, "Claude Nest-3p"),
]
return normal, third_party
def _dedupe_paths(paths: list[str]) -> list[str]:
out: list[str] = []
seen: set[str] = set()
for path in paths:
if not path or not path.strip():
continue
key = path.lower()
if key in seen:
continue
seen.add(key)
out.append(path)
return out
def _target_paths() -> _Targets:
err = supported()
if err is not None:
raise RuntimeError(err)
if _GOOS == "darwin":
normal, third = _darwin_profile_roots()
else:
normal, third = _windows_profile_roots()
targets = _Targets()
for root in _dedupe_paths(normal):
targets.normal_configs.append(os.path.join(root, "claude_desktop_config.json"))
for root in _dedupe_paths(third):
targets.third_party_profiles.append(
_ThirdPartyPaths(
desktop_config=os.path.join(root, "claude_desktop_config.json"),
meta=os.path.join(root, "configLibrary", "_meta.json"),
profile=os.path.join(root, "configLibrary", f"{PROFILE_ID}.json"),
)
)
return targets
def _claude_app_path() -> str:
"""Return path to the Claude Desktop executable, or ``""`` if unknown."""
if _GOOS == "darwin":
candidates = ["/Applications/Claude.app"]
candidates.append(os.path.join(_home(), "Applications", "Claude.app"))
for path in candidates:
if os.path.exists(path):
return path
return ""
if _GOOS == "windows":
local = _local_app_data()
candidates = [
os.path.join(local, "Programs", "Claude", "Claude.exe"),
os.path.join(local, "Programs", "Claude Desktop", "Claude.exe"),
os.path.join(local, "Claude", "Claude.exe"),
os.path.join(local, "Claude Nest", "Claude.exe"),
os.path.join(local, "Claude Desktop", "Claude.exe"),
os.path.join(local, "AnthropicClaude", "Claude.exe"),
]
for pattern in (
os.path.join(local, "AnthropicClaude", "app-*", "Claude.exe"),
os.path.join(local, "Programs", "Claude", "app-*", "Claude.exe"),
os.path.join(local, "Programs", "Claude Desktop", "app-*", "Claude.exe"),
):
candidates.extend(_glob.glob(pattern))
for path in _dedupe_paths(candidates):
if os.path.exists(path):
return path
return ""
return ""
def is_installed() -> bool:
"""Best-effort check: app binary or any profile dir is present."""
if _claude_app_path():
return True
if _GOOS == "windows" and _is_running():
return True
if _GOOS == "darwin":
normal, third = _darwin_profile_roots()
elif _GOOS == "windows":
normal, third = _windows_profile_roots()
else:
return False
for path in normal + third:
if os.path.isdir(path):
return True
return False
# ---------------------------------------------------------------------------
# JSON IO with atomic write + .bak backup
# ---------------------------------------------------------------------------
def _read_json(path: str) -> dict:
with open(path, "r", encoding="utf-8") as f:
data = f.read()
if not data.strip():
return {}
parsed = json.loads(data)
return parsed if isinstance(parsed, dict) else {}
def _read_json_allow_missing(path: str) -> dict:
try:
return _read_json(path)
except FileNotFoundError:
return {}
def _atomic_write_with_backup(path: str, payload: bytes) -> None:
"""Write ``payload`` to ``path`` atomically, keeping a ``.bak`` copy."""
parent = os.path.dirname(path)
if parent:
os.makedirs(parent, exist_ok=True)
if os.path.exists(path):
try:
shutil.copy2(path, path + ".bak")
except OSError as e:
log.debug("could not write backup for %s: %s", path, e)
fd, tmp_path = tempfile.mkstemp(prefix=".plano_", suffix=".tmp", dir=parent or None)
try:
with os.fdopen(fd, "wb") as f:
f.write(payload)
os.replace(tmp_path, path)
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
def _write_json(path: str, value: dict) -> None:
payload = (json.dumps(value, indent=2) + "\n").encode("utf-8")
_atomic_write_with_backup(path, payload)
# ---------------------------------------------------------------------------
# JSON shape mutators (1:1 with Ollama)
# ---------------------------------------------------------------------------
def _write_deployment_mode(path: str, mode: str) -> None:
cfg = _read_json_allow_missing(path)
cfg["deploymentMode"] = mode
_write_json(path, cfg)
def _write_meta(path: str, profile_id: str, name: str) -> None:
meta = _read_json_allow_missing(path)
meta["appliedId"] = profile_id
raw_entries = meta.get("entries")
entries: list = []
if isinstance(raw_entries, list):
for entry in raw_entries:
if isinstance(entry, dict) and entry.get("id") == profile_id:
continue
entries.append(entry)
entries.append({"id": profile_id, "name": name})
meta["entries"] = entries
_write_json(path, meta)
def _write_gateway_profile(
path: str, api_key: str, base_url: str, force_chooser: bool
) -> None:
cfg = _read_json_allow_missing(path)
cfg["inferenceProvider"] = "gateway"
cfg["inferenceGatewayBaseUrl"] = base_url
cfg["inferenceGatewayApiKey"] = api_key
cfg["inferenceGatewayAuthScheme"] = "bearer"
cfg.pop("inferenceModels", None)
cfg["disableDeploymentModeChooser"] = force_chooser
_write_json(path, cfg)
def _restore_meta(path: str) -> None:
meta = _read_json_allow_missing(path)
if not meta:
return
changed = False
if meta.get("appliedId") == PROFILE_ID:
meta.pop("appliedId", None)
changed = True
raw_entries = meta.get("entries")
if isinstance(raw_entries, list):
filtered: list = []
for entry in raw_entries:
if isinstance(entry, dict) and entry.get("id") == PROFILE_ID:
changed = True
continue
filtered.append(entry)
meta["entries"] = filtered
if changed:
_write_json(path, meta)
def _restore_profile(path: str) -> None:
cfg = _read_json_allow_missing(path)
if not cfg:
return
cfg["disableDeploymentModeChooser"] = False
for key in (
"inferenceProvider",
"inferenceGatewayBaseUrl",
"inferenceGatewayAuthScheme",
"inferenceModels",
):
cfg.pop(key, None)
_write_json(path, cfg)
def _read_applied_id(path: str) -> str:
try:
meta = _read_json(path)
except (FileNotFoundError, json.JSONDecodeError):
return ""
val = meta.get("appliedId")
return val if isinstance(val, str) else ""
def _read_deployment_mode(path: str) -> str:
try:
cfg = _read_json(path)
except (FileNotFoundError, json.JSONDecodeError):
return ""
val = cfg.get("deploymentMode")
return val if isinstance(val, str) else ""
def _read_gateway_api_key(path: str) -> str:
try:
cfg = _read_json(path)
except (FileNotFoundError, json.JSONDecodeError):
return ""
val = cfg.get("inferenceGatewayApiKey")
return val.strip() if isinstance(val, str) else ""
def _third_party_profile_ok(t: _ThirdPartyPaths) -> bool:
if _read_applied_id(t.meta) != PROFILE_ID:
return False
try:
cfg = _read_json(t.profile)
except (FileNotFoundError, json.JSONDecodeError):
return False
if cfg.get("inferenceProvider") != "gateway":
return False
base_url = cfg.get("inferenceGatewayBaseUrl")
if not isinstance(base_url, str) or not base_url.strip():
return False
api_key = cfg.get("inferenceGatewayApiKey")
if not isinstance(api_key, str) or not api_key.strip():
return False
return True
def is_configured() -> bool:
try:
targets = _target_paths()
except RuntimeError:
return False
if not targets.normal_configs or not targets.third_party_profiles:
return False
for path in targets.normal_configs:
if _read_deployment_mode(path) != "3p":
return False
for t in targets.third_party_profiles:
if _read_deployment_mode(t.desktop_config) != "3p":
return False
if not _third_party_profile_ok(t):
return False
return True
# ---------------------------------------------------------------------------
# API key resolution
# ---------------------------------------------------------------------------
#
# Plano's local gateway does not enforce bearer auth — there's no such thing
# as a "Plano API key". Claude Desktop's third-party profile schema, however,
# requires ``inferenceGatewayApiKey`` to be a non-empty string before it will
# treat the profile as configured. We therefore pick *some* string to write
# into that slot, with the following precedence so users running Plano behind
# their own auth proxy can opt-in:
#
# 1. ``$PLANO_API_KEY`` — explicit override (e.g. an internal auth token).
# 2. The existing ``inferenceGatewayApiKey`` already in Claude's 3p profile,
# so re-running ``planoai launch claude-desktop`` does not clobber a
# value the user manually set.
# 3. The fixed placeholder ``DEFAULT_API_KEY`` ("plano").
#
# We do not validate this string against the gateway. The gateway's
# reachability is already surfaced by ``launch_cmd._is_plano_running()``
# before this module is invoked.
def _resolve_api_key(profile_paths: list[str]) -> str:
env_key = (os.environ.get("PLANO_API_KEY") or "").strip()
if env_key:
return env_key
for path in profile_paths:
existing = _read_gateway_api_key(path)
if existing:
return existing
return DEFAULT_API_KEY
# ---------------------------------------------------------------------------
# Public configure / restore / launch
# ---------------------------------------------------------------------------
def configure(base_url: str = DEFAULT_BASE_URL, *, force_chooser: bool = True) -> None:
"""Switch Claude Desktop into 3p mode pointed at the local Plano gateway."""
err = supported()
if err is not None:
raise RuntimeError(err)
targets = _target_paths()
profile_paths = [t.profile for t in targets.third_party_profiles]
api_key = _resolve_api_key(profile_paths)
for path in targets.normal_configs:
_write_deployment_mode(path, "3p")
for t in targets.third_party_profiles:
_write_deployment_mode(t.desktop_config, "3p")
_write_meta(t.meta, PROFILE_ID, PROFILE_NAME)
_write_gateway_profile(t.profile, api_key, base_url, force_chooser)
def restore() -> None:
"""Flip Claude Desktop back to the default Anthropic profile."""
err = supported()
if err is not None:
raise RuntimeError(err)
targets = _target_paths()
for path in targets.normal_configs:
_write_deployment_mode(path, "1p")
for t in targets.third_party_profiles:
_write_deployment_mode(t.desktop_config, "1p")
_restore_meta(t.meta)
_restore_profile(t.profile)
def _can_prompt() -> bool:
return sys.stdin.isatty() and sys.stderr.isatty()
def _confirm(prompt: str, yes: bool) -> bool:
if yes:
return True
if not _can_prompt():
return False
try:
answer = input(f"{prompt} [Y/n] ").strip().lower()
except (EOFError, KeyboardInterrupt):
sys.stderr.write("\n")
return False
return answer in ("", "y", "yes")
def launch_or_restart(prompt: str, yes: bool) -> None:
"""Open Claude Desktop, restarting it first if it is already running."""
err = supported()
if err is not None:
raise RuntimeError(err)
if not _is_running():
_open()
return
if not _confirm(prompt, yes):
sys.stderr.write(
"Quit and reopen Claude Desktop when you're ready for the "
"profile change to take effect.\n"
)
return
_quit()
deadline = time.time() + _QUIT_TIMEOUT_SECONDS
while time.time() < deadline:
if not _is_running():
break
_sleep(0.2)
else:
raise RuntimeError(
"Claude Desktop did not quit; quit it manually and re-run " "the command"
)
_open()

331
cli/planoai/launch_cmd.py Normal file
View file

@ -0,0 +1,331 @@
"""``planoai launch`` command group.
Launches CLI agents (Claude Code, Codex) or the Claude Desktop app against the
local Plano gateway. This replaces the old ``planoai cli-agent`` command.
"""
from __future__ import annotations
import json
import os
import sys
from typing import Optional
import rich_click as click
import yaml
from planoai import claude_desktop as _cd
from planoai.consts import NATIVE_PID_FILE, PLANO_DOCKER_NAME
from planoai.core import _resolve_cli_agent_endpoint, start_cli_agent
from planoai.docker_cli import docker_container_status
from planoai.defaults import DEFAULT_LLM_LISTENER_PORT
from planoai.utils import find_config_file, getLogger
log = getLogger(__name__)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _is_native_plano_running() -> bool:
if not os.path.exists(NATIVE_PID_FILE):
return False
try:
with open(NATIVE_PID_FILE, "r") as f:
pids = json.load(f)
except (OSError, json.JSONDecodeError):
return False
envoy_pid = pids.get("envoy_pid")
brightstaff_pid = pids.get("brightstaff_pid")
if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int):
return False
for pid in (envoy_pid, brightstaff_pid):
try:
os.kill(pid, 0)
except ProcessLookupError:
return False
except PermissionError:
continue
return True
def _is_plano_running() -> bool:
if _is_native_plano_running():
return True
return docker_container_status(PLANO_DOCKER_NAME) == "running"
def _require_plano_running(console) -> None:
if _is_plano_running():
return
console.print("[red]✗[/red] Plano is not running.")
console.print(
"[dim]Start Plano first using 'planoai up <config.yaml>' "
"(native or --docker mode).[/dim]"
)
sys.exit(1)
def _start_plano_with_config(config_path: str, console) -> None:
"""Invoke `planoai up` against the given config and wait for it to be healthy.
Reuses the click ``up`` command's callback so we get the same validation,
env loading, and native runner behavior as a top-level invocation. ``up``
runs in detached/background mode by default and only returns once Plano is
healthy, so we can safely continue with the Claude Desktop config flow
after it returns.
"""
# Lazy import: ``planoai.main`` pulls in heavy modules (rich, native runner,
# etc.) and would create a circular import at module-load time.
from planoai.main import up
abs_path = os.path.abspath(config_path)
if not os.path.exists(abs_path):
console.print(f"[red]✗[/red] Config file not found: {abs_path}")
sys.exit(1)
console.print(
f"[dim]Starting Plano with config " f"[cyan]{abs_path}[/cyan]...[/dim]"
)
up.callback(
file=abs_path,
path=".",
foreground=False,
with_tracing=False,
tracing_port=4317,
docker=False,
verbose=False,
listener_port=DEFAULT_LLM_LISTENER_PORT,
)
def _base_url_from_config_file(config_path: str) -> Optional[str]:
try:
with open(config_path, "r") as f:
cfg = yaml.safe_load(f) or {}
except (OSError, yaml.YAMLError):
return None
_host, port = _resolve_cli_agent_endpoint(cfg)
return f"http://localhost:{port}"
def _resolve_plano_config(file: Optional[str], path: str, console) -> str:
plano_config_file = find_config_file(path, file)
if not os.path.exists(plano_config_file):
console.print(f"[red]✗[/red] Config file not found: {plano_config_file}")
sys.exit(1)
return plano_config_file
def _run_cli_agent(agent_type: str, file, path, settings) -> None:
from rich.console import Console
console = Console()
_require_plano_running(console)
plano_config_file = _resolve_plano_config(file, path, console)
try:
start_cli_agent(plano_config_file, agent_type, settings)
except SystemExit:
raise
except Exception as e:
click.echo(f"Error: {e}")
sys.exit(1)
# ---------------------------------------------------------------------------
# Group + subcommands
# ---------------------------------------------------------------------------
@click.group()
def launch():
"""Launch a CLI agent or desktop app against the local Plano gateway."""
@launch.command("claude-cli")
@click.argument("file", required=False)
@click.option(
"--path", default=".", help="Path to the directory containing plano_config.yaml"
)
@click.option(
"--settings",
default="{}",
help="Additional settings as JSON string for the CLI agent.",
)
def claude_cli(file, path, settings):
"""Launch the Claude Code CLI connected to Plano."""
_run_cli_agent("claude", file, path, settings)
@launch.command("codex")
@click.argument("file", required=False)
@click.option(
"--path", default=".", help="Path to the directory containing plano_config.yaml"
)
@click.option(
"--settings",
default="{}",
help="Additional settings as JSON string for the CLI agent.",
)
def codex(file, path, settings):
"""Launch the Codex CLI connected to Plano."""
_run_cli_agent("codex", file, path, settings)
@launch.command("claude-desktop")
@click.option(
"--config",
"config_path",
type=click.Path(dir_okay=False),
default=None,
help="Path to a Plano config; if Plano isn't already running, "
"`planoai up <config>` is invoked first so the gateway is ready before "
"Claude Desktop is configured.",
)
@click.option(
"--no-launch",
"no_launch",
is_flag=True,
default=False,
help="Configure Claude Desktop but do not (re)open the app afterwards.",
)
@click.option(
"--restore",
"restore_flag",
is_flag=True,
default=False,
help="Switch Claude Desktop back to its usual Anthropic Claude profile.",
)
@click.option(
"--yes",
"-y",
"yes_flag",
is_flag=True,
default=False,
help="Auto-approve restart prompts.",
)
@click.option(
"--base-url",
default=None,
help="Plano LLM listener URL (default: derived from --config or running Plano, falling back to http://localhost:12000).",
)
def claude_desktop_cmd(config_path, no_launch, restore_flag, yes_flag, base_url):
"""Configure Claude Desktop to use the local Plano gateway.
Mirrors `ollama launch claude-desktop`: rewrites Claude Desktop's profile
JSONs (with `.bak` backups) to switch into third-party gateway mode pointed
at Plano, then optionally restarts Claude Desktop so the change takes
effect. When `--config <path>` is supplied and Plano is not already
running, this command also starts Plano with that config first, so the
end-to-end flow is a single command.
"""
from rich.console import Console
console = Console()
err = _cd.supported()
if err is not None:
console.print(f"[red]✗[/red] {err}")
sys.exit(1)
if restore_flag:
if config_path is not None:
console.print(
"[yellow]⚠[/yellow] --config is ignored when --restore is set."
)
try:
_cd.restore()
except Exception as e:
console.print(f"[red]✗[/red] Failed to restore Claude Desktop: {e}")
sys.exit(1)
console.print(f"[green]✓[/green] {_cd.RESTORED_MESSAGE}")
if no_launch:
return
try:
_cd.launch_or_restart(
"Restart Claude Desktop to use the usual Claude profile?",
yes_flag,
)
except Exception as e:
console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}")
return
# Auto-start Plano if --config was provided and nothing is running yet.
if config_path is not None:
abs_config = os.path.abspath(config_path)
if not os.path.exists(abs_config):
console.print(f"[red]✗[/red] Config file not found: {abs_config}")
sys.exit(1)
if _is_plano_running():
console.print(
"[dim]Plano already running; skipping startup. Using listener "
"from [cyan]"
f"{abs_config}[/cyan] for the gateway URL.[/dim]"
)
else:
_start_plano_with_config(abs_config, console)
# Resolve base URL precedence: --base-url > --config file > running Plano > default.
resolved_url = (
base_url
or (
_base_url_from_config_file(os.path.abspath(config_path))
if config_path is not None
else None
)
or _resolve_base_url_from_running_plano()
or _cd.DEFAULT_BASE_URL
)
if not _is_plano_running():
console.print(
"[yellow]⚠[/yellow] Plano does not appear to be running. "
"Start it with [cyan]planoai up[/cyan] (or pass [cyan]--config "
"<path>[/cyan]) before using Claude Desktop."
)
console.print(
f"[dim]Configuring Claude Desktop to use Plano at "
f"[cyan]{resolved_url}[/cyan][/dim]"
)
try:
_cd.configure(resolved_url)
except Exception as e:
console.print(f"[red]✗[/red] Failed to configure Claude Desktop: {e}")
sys.exit(1)
console.print(f"[green]✓[/green] {_cd.SUCCESS_MESSAGE}")
console.print(f"[dim]{_cd.RESTORE_HINT}[/dim]")
if no_launch:
return
try:
_cd.launch_or_restart("Restart Claude Desktop to use Plano?", yes_flag)
except Exception as e:
console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}")
def _resolve_base_url_from_running_plano() -> Optional[str]:
"""Return ``http://localhost:<port>`` for the active Plano LLM listener.
Best-effort: if no config can be located, return ``None`` so the caller
falls back to ``DEFAULT_BASE_URL``.
"""
try:
plano_config_file = find_config_file(".", None)
except Exception:
return None
if not plano_config_file or not os.path.exists(plano_config_file):
return None
try:
with open(plano_config_file, "r") as f:
cfg = yaml.safe_load(f) or {}
except (OSError, yaml.YAMLError):
return None
_host, port = _resolve_cli_agent_endpoint(cfg)
return f"http://localhost:{port}"

View file

@ -1,4 +1,3 @@
import json
import os
import multiprocessing
import subprocess
@ -19,7 +18,6 @@ PLANO_COLOR = "#969FF4"
from planoai.docker_cli import (
docker_validate_plano_schema,
stream_gateway_logs,
docker_container_status,
)
from planoai.utils import (
getLogger,
@ -33,19 +31,17 @@ from planoai.utils import (
from planoai.core import (
start_plano,
stop_docker_container,
start_cli_agent,
)
from planoai.init_cmd import init as init_cmd
from planoai.launch_cmd import launch as launch_cmd
from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd
from planoai.obs_cmd import obs as obs_cmd
from planoai.consts import (
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
NATIVE_PID_FILE,
PLANO_RUN_DIR,
PLANO_DOCKER_IMAGE,
PLANO_DOCKER_NAME,
)
from planoai.rich_click_config import configure_rich_click
from planoai.versioning import check_version_status, get_latest_version, get_version
@ -53,30 +49,6 @@ from planoai.versioning import check_version_status, get_latest_version, get_ver
log = getLogger(__name__)
def _is_native_plano_running() -> bool:
if not os.path.exists(NATIVE_PID_FILE):
return False
try:
with open(NATIVE_PID_FILE, "r") as f:
pids = json.load(f)
except (OSError, json.JSONDecodeError):
return False
envoy_pid = pids.get("envoy_pid")
brightstaff_pid = pids.get("brightstaff_pid")
if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int):
return False
for pid in (envoy_pid, brightstaff_pid):
try:
os.kill(pid, 0)
except ProcessLookupError:
return False
except PermissionError:
continue
return True
def _is_port_in_use(port: int) -> bool:
"""Check if a TCP port is already bound on localhost."""
import socket
@ -690,57 +662,12 @@ def logs(debug, follow, docker):
plano_process.terminate()
@click.command()
@click.argument("type", type=click.Choice(["claude", "codex"]), required=True)
@click.argument("file", required=False) # Optional file argument
@click.option(
"--path", default=".", help="Path to the directory containing plano_config.yaml"
)
@click.option(
"--settings",
default="{}",
help="Additional settings as JSON string for the CLI agent.",
)
def cli_agent(type, file, path, settings):
"""Start a CLI agent connected to Plano.
CLI_AGENT: The type of CLI agent to start ('claude' or 'codex')
"""
native_running = _is_native_plano_running()
docker_running = False
if not native_running:
docker_running = docker_container_status(PLANO_DOCKER_NAME) == "running"
if not (native_running or docker_running):
log.error("Plano is not running.")
log.error(
"Start Plano first using 'planoai up <config.yaml>' (native or --docker mode)."
)
sys.exit(1)
# Determine plano_config.yaml path
plano_config_file = find_config_file(path, file)
if not os.path.exists(plano_config_file):
log.error(f"Config file not found: {plano_config_file}")
sys.exit(1)
try:
start_cli_agent(plano_config_file, type, settings)
except SystemExit:
# Re-raise SystemExit to preserve exit codes
raise
except Exception as e:
click.echo(f"Error: {e}")
sys.exit(1)
# add commands to the main group
main.add_command(up)
main.add_command(down)
main.add_command(build)
main.add_command(logs)
main.add_command(cli_agent)
main.add_command(launch_cmd, name="launch")
main.add_command(generate_prompt_targets)
main.add_command(init_cmd, name="init")
main.add_command(trace_cmd, name="trace")

View file

@ -46,6 +46,20 @@ def configure_rich_click(plano_color: str) -> None:
"options": ["--debug", "--follow"],
},
],
"planoai launch claude-desktop": [
{
"name": "Plano gateway",
"options": ["--config", "--base-url"],
},
{
"name": "Mode",
"options": ["--no-launch", "--restore"],
},
{
"name": "Confirmation",
"options": ["--yes"],
},
],
}
# Command groups for main help.
@ -57,7 +71,7 @@ def configure_rich_click(plano_color: str) -> None:
},
{
"name": "Agent Commands",
"commands": ["cli-agent"],
"commands": ["launch"],
},
{
"name": "Observability",
@ -68,4 +82,14 @@ def configure_rich_click(plano_color: str) -> None:
"commands": ["generate-prompt-targets"],
},
],
"planoai launch": [
{
"name": "CLI Agents",
"commands": ["claude-cli", "codex"],
},
{
"name": "Desktop Apps",
"commands": ["claude-desktop"],
},
],
}

View file

@ -0,0 +1,366 @@
"""Tests for `planoai launch claude-desktop` configuration logic."""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from planoai import claude_desktop as cd
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def fake_home(tmp_path, monkeypatch):
"""Pretend we're on macOS with a fresh home directory.
Plano's local gateway has no API key concept, so by default we ensure
``$PLANO_API_KEY`` is unset; tests that exercise the env-override path
re-set it explicitly.
"""
monkeypatch.setattr(cd, "_GOOS", "darwin")
monkeypatch.setattr(cd, "_user_home", lambda _: str(tmp_path))
monkeypatch.delenv("PLANO_API_KEY", raising=False)
return tmp_path
def _normal_config_path(home: Path) -> Path:
return (
home
/ "Library"
/ "Application Support"
/ "Claude"
/ "claude_desktop_config.json"
)
def _third_party_root(home: Path) -> Path:
return home / "Library" / "Application Support" / "Claude-3p"
def _third_party_config_path(home: Path) -> Path:
return _third_party_root(home) / "claude_desktop_config.json"
def _meta_path(home: Path) -> Path:
return _third_party_root(home) / "configLibrary" / "_meta.json"
def _profile_path(home: Path) -> Path:
return _third_party_root(home) / "configLibrary" / f"{cd.PROFILE_ID}.json"
# ---------------------------------------------------------------------------
# configure() / restore()
# ---------------------------------------------------------------------------
def test_configure_writes_all_four_files_with_default_api_key(fake_home):
cd.configure("http://localhost:12000")
normal_cfg = json.loads(_normal_config_path(fake_home).read_text())
assert normal_cfg["deploymentMode"] == "3p"
third_cfg = json.loads(_third_party_config_path(fake_home).read_text())
assert third_cfg["deploymentMode"] == "3p"
meta = json.loads(_meta_path(fake_home).read_text())
assert meta["appliedId"] == cd.PROFILE_ID
assert any(
isinstance(e, dict) and e.get("id") == cd.PROFILE_ID for e in meta["entries"]
)
profile = json.loads(_profile_path(fake_home).read_text())
assert profile["inferenceProvider"] == "gateway"
assert profile["inferenceGatewayBaseUrl"] == "http://localhost:12000"
# No env override and no pre-existing profile -> placeholder is written.
assert profile["inferenceGatewayApiKey"] == cd.DEFAULT_API_KEY
assert profile["inferenceGatewayAuthScheme"] == "bearer"
assert profile["disableDeploymentModeChooser"] is True
assert "inferenceModels" not in profile
def test_configure_uses_env_override_when_set(fake_home, monkeypatch):
monkeypatch.setenv("PLANO_API_KEY", "from-env")
cd.configure("http://localhost:12000")
profile = json.loads(_profile_path(fake_home).read_text())
assert profile["inferenceGatewayApiKey"] == "from-env"
def test_configure_preserves_existing_profile_api_key(fake_home):
profile = _profile_path(fake_home)
profile.parent.mkdir(parents=True, exist_ok=True)
profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
cd.configure("http://localhost:12000")
written = json.loads(profile.read_text())
assert written["inferenceGatewayApiKey"] == "from-profile"
def test_configure_does_not_call_network(fake_home, monkeypatch):
"""Plano's local gateway is not validated at configure time. We must not
attempt any HTTP request a 503 from the gateway must not block setup.
"""
def boom(*_args, **_kwargs):
raise AssertionError("configure() must not perform network calls")
monkeypatch.setattr("urllib.request.urlopen", boom)
cd.configure("http://localhost:12000")
profile = json.loads(_profile_path(fake_home).read_text())
assert profile["inferenceProvider"] == "gateway"
def test_configure_preserves_existing_unrelated_keys(fake_home):
normal_path = _normal_config_path(fake_home)
normal_path.parent.mkdir(parents=True, exist_ok=True)
normal_path.write_text(
json.dumps({"someOtherSetting": 123, "deploymentMode": "1p"})
)
cd.configure("http://localhost:12000")
cfg = json.loads(normal_path.read_text())
assert cfg["someOtherSetting"] == 123
assert cfg["deploymentMode"] == "3p"
def test_configure_writes_backup_of_existing_files(fake_home):
normal_path = _normal_config_path(fake_home)
normal_path.parent.mkdir(parents=True, exist_ok=True)
normal_path.write_text('{"deploymentMode":"1p"}')
cd.configure("http://localhost:12000")
backup = normal_path.with_suffix(normal_path.suffix + ".bak")
assert backup.exists()
assert json.loads(backup.read_text())["deploymentMode"] == "1p"
def test_restore_reverts_deployment_mode_and_strips_gateway_keys(fake_home):
cd.configure("http://localhost:12000")
cd.restore()
assert (
json.loads(_normal_config_path(fake_home).read_text())["deploymentMode"] == "1p"
)
third_cfg = json.loads(_third_party_config_path(fake_home).read_text())
assert third_cfg["deploymentMode"] == "1p"
meta = json.loads(_meta_path(fake_home).read_text())
assert meta.get("appliedId") != cd.PROFILE_ID
assert all(
not (isinstance(e, dict) and e.get("id") == cd.PROFILE_ID)
for e in meta.get("entries", [])
)
profile = json.loads(_profile_path(fake_home).read_text())
assert profile["disableDeploymentModeChooser"] is False
for stripped in (
"inferenceProvider",
"inferenceGatewayBaseUrl",
"inferenceGatewayAuthScheme",
"inferenceModels",
):
assert stripped not in profile
def test_restore_meta_keeps_unrelated_entries(fake_home):
meta_path = _meta_path(fake_home)
meta_path.parent.mkdir(parents=True, exist_ok=True)
meta_path.write_text(
json.dumps(
{
"appliedId": cd.PROFILE_ID,
"entries": [
{"id": cd.PROFILE_ID, "name": "Plano"},
{"id": "00000000-0000-0000-0000-000000000001", "name": "Other"},
],
}
)
)
cd._restore_meta(str(meta_path))
meta = json.loads(meta_path.read_text())
assert meta.get("appliedId") in (None, "")
ids = [e["id"] for e in meta["entries"] if isinstance(e, dict)]
assert ids == ["00000000-0000-0000-0000-000000000001"]
# ---------------------------------------------------------------------------
# is_configured()
# ---------------------------------------------------------------------------
def test_is_configured_false_on_fresh_home(fake_home):
assert cd.is_configured() is False
def test_is_configured_true_after_configure(fake_home):
cd.configure("http://localhost:12000")
assert cd.is_configured() is True
def test_is_configured_false_when_only_normal_config_set(fake_home):
cd.configure("http://localhost:12000")
third_cfg = _third_party_config_path(fake_home)
data = json.loads(third_cfg.read_text())
data["deploymentMode"] = "1p"
third_cfg.write_text(json.dumps(data))
assert cd.is_configured() is False
# ---------------------------------------------------------------------------
# API key resolution (placeholder by default; env override; profile preserve)
# ---------------------------------------------------------------------------
def test_resolve_api_key_returns_placeholder_when_no_inputs(fake_home):
assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY
def test_resolve_api_key_uses_env_when_set(fake_home, monkeypatch):
monkeypatch.setenv("PLANO_API_KEY", "from-env")
profile = _profile_path(fake_home)
profile.parent.mkdir(parents=True, exist_ok=True)
profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
# Env wins over profile.
assert cd._resolve_api_key([str(profile)]) == "from-env"
def test_resolve_api_key_falls_back_to_existing_profile(fake_home):
profile = _profile_path(fake_home)
profile.parent.mkdir(parents=True, exist_ok=True)
profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
assert cd._resolve_api_key([str(profile)]) == "from-profile"
def test_resolve_api_key_skips_blank_env(fake_home, monkeypatch):
monkeypatch.setenv("PLANO_API_KEY", " ")
assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY
# ---------------------------------------------------------------------------
# Atomic write
# ---------------------------------------------------------------------------
def test_atomic_write_creates_backup_of_existing_file(tmp_path):
target = tmp_path / "deep" / "nested" / "file.json"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text("ORIGINAL")
cd._atomic_write_with_backup(str(target), b"NEW")
assert target.read_text() == "NEW"
assert (tmp_path / "deep" / "nested" / "file.json.bak").read_text() == "ORIGINAL"
def test_atomic_write_skips_backup_when_no_existing_file(tmp_path):
target = tmp_path / "fresh.json"
cd._atomic_write_with_backup(str(target), b"DATA")
assert target.read_text() == "DATA"
assert not (tmp_path / "fresh.json.bak").exists()
def test_atomic_write_does_not_truncate_on_failure(tmp_path, monkeypatch):
target = tmp_path / "file.json"
target.write_text("ORIGINAL")
real_replace = os.replace
def boom(_src, _dst):
raise OSError("disk full")
monkeypatch.setattr(os, "replace", boom)
with pytest.raises(OSError):
cd._atomic_write_with_backup(str(target), b"NEW")
monkeypatch.setattr(os, "replace", real_replace)
assert target.read_text() == "ORIGINAL"
leftover = list(tmp_path.glob(".plano_*.tmp"))
assert leftover == []
# ---------------------------------------------------------------------------
# Platform support
# ---------------------------------------------------------------------------
def test_supported_returns_error_on_linux(monkeypatch):
monkeypatch.setattr(cd, "_GOOS", "linux")
msg = cd.supported()
assert msg is not None
assert "macOS" in msg and "Windows" in msg
def test_supported_returns_none_on_darwin(monkeypatch):
monkeypatch.setattr(cd, "_GOOS", "darwin")
assert cd.supported() is None
def test_configure_raises_on_unsupported_platform(monkeypatch):
monkeypatch.setattr(cd, "_GOOS", "linux")
with pytest.raises(RuntimeError, match="macOS"):
cd.configure()
def test_restore_raises_on_unsupported_platform(monkeypatch):
monkeypatch.setattr(cd, "_GOOS", "linux")
with pytest.raises(RuntimeError, match="macOS"):
cd.restore()
# ---------------------------------------------------------------------------
# launch_or_restart()
# ---------------------------------------------------------------------------
def test_launch_or_restart_opens_when_not_running(monkeypatch):
monkeypatch.setattr(cd, "_GOOS", "darwin")
monkeypatch.setattr(cd, "_is_running", lambda: False)
opened = []
monkeypatch.setattr(cd, "_open", lambda: opened.append(True))
monkeypatch.setattr(
cd, "_quit", lambda: pytest.fail("should not quit when not running")
)
cd.launch_or_restart("prompt", yes=True)
assert opened == [True]
def test_launch_or_restart_with_yes_quits_then_opens(monkeypatch):
monkeypatch.setattr(cd, "_GOOS", "darwin")
running = [True]
monkeypatch.setattr(cd, "_is_running", lambda: running[0])
def quit_app():
running[0] = False
quit_calls = []
open_calls = []
monkeypatch.setattr(
cd,
"_quit",
lambda: (quit_calls.append(True), quit_app()),
)
monkeypatch.setattr(cd, "_open", lambda: open_calls.append(True))
monkeypatch.setattr(cd, "_sleep", lambda _: None)
cd.launch_or_restart("Restart?", yes=True)
assert quit_calls == [True]
assert open_calls == [True]

231
cli/test/test_launch_cmd.py Normal file
View file

@ -0,0 +1,231 @@
"""Tests for the `planoai launch claude-desktop` click command.
Focused on the wiring between the CLI flags and the underlying
`claude_desktop` module / `up` invocation. The actual JSON-rewriting and key
validation are covered in `test_claude_desktop.py`.
"""
from __future__ import annotations
from click.testing import CliRunner
from planoai import claude_desktop as cd
from planoai import launch_cmd as lc
def _stub_cd(monkeypatch):
"""Replace ``claude_desktop`` side-effects with no-ops + call recorders."""
calls: dict[str, list] = {
"configure": [],
"restore": [],
"launch_or_restart": [],
}
monkeypatch.setattr(cd, "supported", lambda: None)
monkeypatch.setattr(
cd,
"configure",
lambda base_url, **_kw: calls["configure"].append(base_url),
)
monkeypatch.setattr(cd, "restore", lambda: calls["restore"].append(True))
monkeypatch.setattr(
cd,
"launch_or_restart",
lambda prompt, yes: calls["launch_or_restart"].append((prompt, yes)),
)
return calls
def test_config_path_starts_plano_when_not_running(tmp_path, monkeypatch):
config = tmp_path / "plano_config.yaml"
config.write_text(
"version: v0.4.0\n"
"listeners:\n"
" - name: llm\n"
" type: model\n"
" port: 12345\n"
" address: 0.0.0.0\n"
"model_providers: []\n"
)
cd_calls = _stub_cd(monkeypatch)
monkeypatch.setattr(lc, "_is_plano_running", lambda: False)
up_calls = []
def fake_up(
file,
path,
foreground,
with_tracing,
tracing_port,
docker,
verbose,
listener_port,
):
up_calls.append(
{
"file": file,
"foreground": foreground,
"docker": docker,
"listener_port": listener_port,
}
)
from planoai.main import up as up_cmd
monkeypatch.setattr(up_cmd, "callback", fake_up)
runner = CliRunner()
result = runner.invoke(
lc.launch,
["claude-desktop", "--config", str(config), "--yes"],
)
assert result.exit_code == 0, result.output
assert len(up_calls) == 1
assert up_calls[0]["file"] == str(config)
assert up_calls[0]["foreground"] is False
assert cd_calls["configure"] == ["http://localhost:12345"]
# --yes implies we restart Claude Desktop after configuring.
assert cd_calls["launch_or_restart"]
assert cd_calls["launch_or_restart"][0][1] is True
def test_config_path_skips_up_when_plano_already_running(tmp_path, monkeypatch):
config = tmp_path / "plano_config.yaml"
config.write_text(
"version: v0.4.0\n"
"listeners:\n"
" - name: llm\n"
" type: model\n"
" port: 12500\n"
"model_providers: []\n"
)
cd_calls = _stub_cd(monkeypatch)
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
sentinel = []
def boom(*args, **kwargs):
sentinel.append("called")
from planoai.main import up as up_cmd
monkeypatch.setattr(up_cmd, "callback", boom)
runner = CliRunner()
result = runner.invoke(
lc.launch,
["claude-desktop", "--config", str(config), "--no-launch"],
)
assert result.exit_code == 0, result.output
assert sentinel == [], "should not invoke up.callback when Plano is already running"
assert cd_calls["configure"] == ["http://localhost:12500"]
# --no-launch skips the restart step.
assert cd_calls["launch_or_restart"] == []
def test_config_path_must_exist(tmp_path, monkeypatch):
cd_calls = _stub_cd(monkeypatch)
monkeypatch.setattr(lc, "_is_plano_running", lambda: False)
runner = CliRunner()
result = runner.invoke(
lc.launch,
["claude-desktop", "--config", str(tmp_path / "nope.yaml")],
)
assert result.exit_code != 0
assert "not found" in result.output.lower()
assert cd_calls["configure"] == []
def test_no_launch_skips_open(monkeypatch):
cd_calls = _stub_cd(monkeypatch)
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
runner = CliRunner()
result = runner.invoke(
lc.launch,
["claude-desktop", "--no-launch", "--base-url", "http://localhost:9999"],
)
assert result.exit_code == 0, result.output
assert cd_calls["configure"] == ["http://localhost:9999"]
assert cd_calls["launch_or_restart"] == []
def test_restore_ignores_config_path(tmp_path, monkeypatch):
config = tmp_path / "plano_config.yaml"
config.write_text("version: v0.4.0\nmodel_providers: []\n")
cd_calls = _stub_cd(monkeypatch)
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
runner = CliRunner()
result = runner.invoke(
lc.launch,
["claude-desktop", "--restore", "--config", str(config), "--yes"],
)
assert result.exit_code == 0, result.output
assert cd_calls["restore"] == [True]
assert cd_calls["configure"] == []
assert "ignored" in result.output.lower()
def test_base_url_overrides_config_file(tmp_path, monkeypatch):
config = tmp_path / "plano_config.yaml"
config.write_text(
"version: v0.4.0\n"
"listeners:\n"
" - name: llm\n"
" type: model\n"
" port: 12345\n"
"model_providers: []\n"
)
cd_calls = _stub_cd(monkeypatch)
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
runner = CliRunner()
result = runner.invoke(
lc.launch,
[
"claude-desktop",
"--config",
str(config),
"--base-url",
"http://10.0.0.5:8080",
"--no-launch",
],
)
assert result.exit_code == 0, result.output
assert cd_calls["configure"] == ["http://10.0.0.5:8080"]
def test_unsupported_platform_errors(monkeypatch):
monkeypatch.setattr(
cd,
"supported",
lambda: "Claude Desktop launch is only supported on macOS and Windows",
)
runner = CliRunner()
result = runner.invoke(lc.launch, ["claude-desktop"])
assert result.exit_code != 0
assert "macOS" in result.output
def test_help_lists_new_flags(monkeypatch):
runner = CliRunner()
result = runner.invoke(lc.launch, ["claude-desktop", "--help"])
assert result.exit_code == 0, result.output
assert "--config" in result.output
assert "--no-launch" in result.output
assert "--restore" in result.output

View file

@ -0,0 +1,423 @@
# Frontier Model Routing: Sonnet 4.6 + GPT 5.5 + Opus 4.7
A worked example of using Plano to route across the three current frontier
LLMs from three different providers — without your application caring which
model handled any given request, and with **per-route fallbacks** so a
provider outage never takes the demo down.
| Tier | Primary model | Provider | What it's great at |
| ---------------- | -------------------------------------- | ------------------ | -------------------------------------------------------- |
| `frontier.fast` | `anthropic-claude-sonnet-4-6` | DigitalOcean | Daily driver — chat, summaries, drafts, light reasoning |
| `frontier.smart` | `gpt-5.5` | OpenAI | Multi-step reasoning, math, tool/function calling |
| `frontier.max` | `claude-opus-4-7` | Anthropic | Code, deep analysis, long-context evaluation, refactors |
The same prompt picks the right model automatically — Plano's preference
aligned router (Plano-Orchestrator) reads the user's intent and dispatches to
the route whose `routing_preferences` description best matches. Each route
is backed by an **ordered candidate pool**, so when the primary provider
returns a `429`/`5xx` the next entry in the pool serves the request.
```
┌────────────────────────────────────┐
client ──── /v1 ───▶ │ Plano gateway (port 12000) │
(OpenAI / Anthropic / │ ├── Plano-Orchestrator (router) │
Claude Desktop / SDK) │ └── Envoy + brightstaff │
└────────────────────────────────────┘
│ │ │
┌───────────┘ │ └────────────┐
▼ ▼ ▼
DigitalOcean Gradient AI OpenAI Anthropic
anthropic-claude-sonnet-4-6 gpt-5.5 claude-opus-4-7
(daily conversation route) (complex reasoning) (code + deep analysis)
```
## Why this layout
- **Cost-quality fit per request.** Casual prompts go to Sonnet 4.6 on
DigitalOcean (cheaper inference, still excellent quality); complex
reasoning goes to GPT 5.5; code and deep analysis go to Opus 4.7.
- **Provider diversity = resilience.** Every route lists a fallback model
from a different provider — if Anthropic rate-limits Opus, Plano hands
the next request in that route to GPT 5.5 with no client changes.
- **Zero client changes.** The OpenAI SDK, Anthropic SDK, Claude Desktop,
Codex CLI, and curl all hit the same `:12000` endpoint and use the same
alias names. Switching `frontier.max` from Opus to whatever ships next
is a one-line config change.
## The new routing-preferences architecture (v0.4.0)
This demo uses Plano's **top-level `routing_preferences`** block — the
canonical shape since `v0.4.0`. The older inline form (preferences nested
under each `model_provider`) is auto-migrated by the Plano CLI but emits a
deprecation warning. The top-level shape gives each route an ordered
candidate pool, which is what makes per-route fallbacks possible.
```yaml
routing_preferences:
- name: code generation
description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests
models:
- anthropic/claude-opus-4-7 # primary
- openai/gpt-5.5 # fallback on 429/5xx
```
What changes vs. the v0.3.0 inline style:
| Capability | v0.3.0 inline | v0.4.0 top-level |
| ----------------------------------------- | :-----------: | :--------------: |
| Multiple models can serve the same route | no | yes |
| Explicit primary + ranked fallback chain | no | yes |
| Per-request override via request body | no | yes |
| Decision-only endpoint (`/routing/v1/...`)| no | yes |
| `X-Model-Affinity` header for agent loops | no | yes |
## Prerequisites
- **Plano CLI**`uv tool install planoai` or `pip install planoai`
- API keys for all three providers:
| Env var | Where to get it |
| ------------------- | ------------------------------------------------------------------------ |
| `DO_API_KEY` | <https://cloud.digitalocean.com/account/api/tokens> (Gradient AI access) |
| `OPENAI_API_KEY` | <https://platform.openai.com/api-keys> |
| `ANTHROPIC_API_KEY` | <https://console.anthropic.com/> |
## Quick start
```bash
export DO_API_KEY=...
export OPENAI_API_KEY=...
export ANTHROPIC_API_KEY=...
cd demos/llm_routing/frontier_model_routing
./run_demo.sh
```
`run_demo.sh` writes a local `.env`, then runs `planoai up config.yaml`.
Plano daemonizes and is ready when the script returns.
To shut down:
```bash
./run_demo.sh down
```
## Try it
### Let Plano pick the right tier
```bash
./test.sh
```
The script does two things for each prompt:
1. Calls `POST /routing/v1/chat/completions` — Plano's **decision-only**
endpoint — to print the matched route name and the ranked candidate
pool for that prompt.
2. Calls `POST /v1/chat/completions` to actually run the request and
prints the model that handled it.
A healthy run resolves like this:
```
[daily conversation -> expects DigitalOcean Sonnet 4.6]
matched route: daily conversation
ranked models: ["digitalocean/anthropic-claude-sonnet-4-6","openai/gpt-5.5"]
routed_to: digitalocean/anthropic-claude-sonnet-4-6
[complex reasoning -> expects OpenAI GPT 5.5]
matched route: complex reasoning
ranked models: ["openai/gpt-5.5","anthropic/claude-opus-4-7"]
routed_to: openai/gpt-5.5
[code generation -> expects Anthropic Opus 4.7]
matched route: code generation
ranked models: ["anthropic/claude-opus-4-7","openai/gpt-5.5"]
routed_to: anthropic/claude-opus-4-7
```
The trick: every request is sent with `model: frontier.fast`, but Plano runs
the orchestrator on every chat completion when `routing_preferences` are
configured and overrides the `model` when a preference matches. The
`frontier.fast` value is the explicit fallback used when no preference
matches — so casual prompts stay on the cheap tier and only "real" reasoning
or code work escalates to GPT 5.5 or Opus 4.7.
Want to watch the router decide live? In a second terminal:
```bash
planoai trace
```
You'll see the orchestrator's route selection for each request, including
the matched preference, ranked models, and response time.
### Inspect the routing decision without burning a token
The `/routing/v1/...` endpoint returns the routing decision **without
calling the upstream model**. Useful for previewing classification, building
a UI, or wiring fallback logic into a custom client.
```bash
curl -sS -X POST http://localhost:12000/routing/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "frontier.fast",
"messages": [{"role":"user","content":"refactor this function to remove the global"}]
}' | jq .
```
```json
{
"models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"],
"route": "code generation",
"trace_id": "4bf92f3577b34da6a3ce929d0e0e4736",
"pinned": false
}
```
Use `models[0]` as the primary; retry with `models[1]` on `429` / `5xx`.
### Pin a route across an agent loop with `X-Model-Affinity`
In a tool-using agent loop a single user task may produce a dozen LLM
calls. Their topics drift (tool selection looks like code, summarising
results looks like analysis), and the router would otherwise route each
turn independently — bouncing between providers and invalidating their
KV caches. Pin the decision once with an arbitrary session id:
```bash
SID=$(uuidgen)
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "X-Model-Affinity: $SID" \
-d '{"model":"frontier.fast","messages":[{"role":"user","content":"start a refactor of the auth module"}]}'
# every subsequent call with the same SID skips routing and reuses the
# cached model decision until the session TTL (10 min by default) expires.
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "X-Model-Affinity: $SID" \
-d '{"model":"frontier.fast","messages":[{"role":"user","content":"now write the unit tests"}]}'
```
TTL and cache size are configurable under `routing:` in `config.yaml`.
### Override the routing policy per-request
Sometimes one caller needs a different policy without redeploying the
gateway. Send `routing_preferences` inline in the request body — it is
stripped before forwarding upstream:
```bash
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "frontier.fast",
"messages": [{"role":"user","content":"draft me a haiku about Postgres"}],
"routing_preferences": [
{
"name": "creative writing",
"description": "poetry, fiction, lyrical or playful prose",
"models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"]
}
]
}' | jq .
```
### Pin a request to a specific tier (skip routing)
For prompts that don't match any preference description, the requested
model is what serves the request. Pin to a tier by sending its alias
directly:
```bash
# DigitalOcean Sonnet 4.6 — fast and cheap
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model":"frontier.fast","messages":[{"role":"user","content":"hello"}]}' | jq .
# OpenAI GPT 5.5
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model":"frontier.smart","messages":[{"role":"user","content":"hello"}]}' | jq .
# Anthropic Opus 4.7
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq .
```
### From a Claude-native client (Anthropic Messages API)
Plano translates between OpenAI and Anthropic shapes, so the same gateway
serves both client SDKs:
```bash
curl -sS -X POST http://localhost:12000/v1/messages \
-H "Content-Type: application/json" \
-H "anthropic-version: 2023-06-01" \
-H "x-api-key: test-key" \
-d '{
"model": "frontier.max",
"max_tokens": 512,
"messages": [{"role":"user","content":"explain CAP theorem like I have a CS undergrad background"}]
}' | jq .
```
### From Claude Desktop
Once Plano is up, point Claude Desktop at it with one command:
```bash
planoai launch claude-desktop --config config.yaml
```
Claude Desktop will switch into third-party gateway mode pointed at
`http://localhost:12000`, auto-discover the three model aliases via
`/v1/models`, and let you pick `frontier.fast` / `.smart` / `.max` from the
in-app model selector. To revert: `planoai launch claude-desktop --restore`.
### From Codex CLI
```bash
planoai launch codex
codex --model frontier.smart # or frontier.fast / frontier.max
```
### From the Claude Code CLI
```bash
planoai launch claude-cli
```
The CLI will use Plano as its Anthropic endpoint; ask it for code-heavy work
and it'll resolve to Opus 4.7 automatically.
## Config walkthrough
[`config.yaml`](config.yaml) declares each provider once, then declares
**top-level routing preferences** that reference those providers by their
full `<provider>/<model>` name. Each route owns an ordered `models` pool —
primary first, fallbacks next.
```yaml
model_providers:
- model: digitalocean/anthropic-claude-sonnet-4-6
access_key: $DO_API_KEY
default: true # used when no preference matches
- model: openai/gpt-5.5
access_key: $OPENAI_API_KEY
- model: anthropic/claude-opus-4-7
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: code generation
description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code
models:
- anthropic/claude-opus-4-7 # primary
- openai/gpt-5.5 # fallback on 429 / 5xx
- name: deep analysis
description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique
models:
- anthropic/claude-opus-4-7
- openai/gpt-5.5
- name: complex reasoning
description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction
models:
- openai/gpt-5.5
- anthropic/claude-opus-4-7
- name: daily conversation
description: general chat, casual Q&A, summaries, drafting messages, quick rewrites
models:
- digitalocean/anthropic-claude-sonnet-4-6
- openai/gpt-5.5
model_aliases:
frontier.fast: { target: anthropic-claude-sonnet-4-6 }
frontier.smart: { target: gpt-5.5 }
frontier.max: { target: claude-opus-4-7 }
```
A few things to call out:
1. **Preference *descriptions* drive routing accuracy.** They're embedded
into the orchestrator's prompt; vague descriptions = vague routing.
Following the [LLM Routing best practices](../../../docs/source/guides/llm_router.rst):
- keep names specific and non-overlapping,
- prefer noun-centric descriptors over imperative phrasing,
- always include a generic "domain"-style route — here that's
`daily conversation` pinned to the cheapest tier — so unmatched
prompts still land somewhere deliberate.
2. **Ordered `models`** is a candidate pool. `models[0]` is the primary;
anything after it is a fallback that the client (or Plano's retry
logic) tries on `429`/`5xx`. Mix providers across the pool so a single
provider outage doesn't break the route.
3. **The `default: true` provider** is the safety net for prompts the
orchestrator can't confidently classify (e.g. one-word "thanks!").
4. **Aliases** decouple your callers from provider/model strings. When the
next Sonnet ships, change the alias target — every caller picks it up
instantly.
## Tracing
`tracing.random_sampling: 100` in the config enables full OTLP tracing. Open
a second terminal and run:
```bash
planoai trace
```
Each routed call shows up with the matched preference, ranked candidate
pool, selected model, end-to-end latency, and per-stage spans (router
decision, provider call, streaming chunks).
## Cost framing
A rough mix of 60% conversation, 30% reasoning, 10% deep code work — say
1,000 prompts/day at 1k input + 500 output tokens each — illustrates why
this layout pays off. Exact numbers depend on per-provider pricing the day
you read this; the point is that calling Opus 4.7 for casual chat is wasted
spend, and falling back to a small model on complex code is wasted output.
Plano's job is to let each provider do what it's best at, and to fail over
to the next entry in `models` when the primary throttles.
## Customizing
- **Swap a provider:** change the model string and `access_key`. e.g.
point `frontier.smart` at `azure_openai/gpt-5.5` by replacing the OpenAI
block with an Azure block, then update the matching entries inside
`routing_preferences[].models`.
- **Add fallbacks:** append more entries to any route's `models` list.
The orchestrator returns the full ranked pool, and Plano (or your
client) walks it on `429`/`5xx`.
- **Add a new route:** add another entry under `routing_preferences` with
a noun-centric description and its own `models` pool. No code change,
no client change — every existing caller benefits immediately.
- **Per-call policy override:** ship a `routing_preferences` field in the
request body to override the config for that one call (see the curl
example above).
- **Self-host the orchestrator:** see
[`../preference_based_routing/plano_config_local.yaml`](../preference_based_routing/plano_config_local.yaml)
for an Ollama-backed orchestrator. Drop the `overrides.llm_routing_model`
block into this config and you're off the hosted Plano-Orchestrator.
## Files
| File | Purpose |
| --------------------------------------------- | ---------------------------------------------------------------------- |
| [`config.yaml`](config.yaml) | Plano configuration (top-level routing_preferences + aliases) |
| [`run_demo.sh`](run_demo.sh) | Bring the demo up/down (`./run_demo.sh [down]`) |
| [`test.sh`](test.sh) | Per-prompt routing decision + chat completion across all three routes |
| [`test.rest`](test.rest) | REST Client snippets for VS Code / IntelliJ |
## Stopping
```bash
./run_demo.sh down # or: planoai down
```

View file

@ -0,0 +1,103 @@
version: v0.4.0
# Frontier-tier model routing across three providers.
#
# - DigitalOcean Sonnet 4.6 -> daily driver: balanced quality + cost
# - OpenAI GPT 5.5 -> multimodal reasoning, tool use, math
# - Anthropic Opus 4.7 -> top-tier reasoning, long-form analysis, code
#
# Plano's preference-aligned router (Plano-Orchestrator) inspects each prompt
# and dispatches to the model whose top-level `routing_preferences` entry best
# matches the user's intent. Each route owns an ordered `models` list:
# `models[0]` is the primary; subsequent entries are fallbacks the client
# (or Plano's retry logic) can try on `429`/`5xx` errors.
listeners:
- type: model
name: model_listener
port: 12000
# ---------------------------------------------------------------------------
# Model providers (declared once, referenced by every route below)
# ---------------------------------------------------------------------------
# The `digitalocean/`, `openai/`, and `anthropic/` prefixes are recognized
# natively by Plano — no `base_url` or provider interface override needed.
model_providers:
- model: digitalocean/anthropic-claude-4.6-sonnet
access_key: $DO_API_KEY
default: true # used when no routing preference matches
- model: digitalocean/openai-gpt-5.5
access_key: $DO_API_KEY
- model: digitalocean/anthropic-claude-opus-4.7
access_key: $DO_API_KEY
# ---------------------------------------------------------------------------
# Routing preferences (v0.4.0 top-level form)
# ---------------------------------------------------------------------------
# Best-practice notes (per the LLM Routing guide):
# - Names should be specific, non-overlapping, and aligned with the
# description so the orchestrator can disambiguate cleanly.
# - Descriptions are noun-centric phrases describing *the work*, not
# conversational instructions.
# - Always include a generic "domain" route so prompts that don't match a
# specific action still land on a deliberate model — here that's
# "daily conversation" pinned to the cheapest tier.
# - `models` is an ordered candidate pool; entry 0 is primary and entries
# 1..n are fallbacks (clients retry on 429/5xx).
routing_preferences:
- name: code generation
description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code
models:
- digitalocean/anthropic-claude-opus-4.7 # primary: top-tier code quality
- digitalocean/openai-gpt-5.5 # fallback if Opus is rate-limited / down
- name: deep analysis
description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique
models:
- digitalocean/anthropic-claude-opus-4.7
- digitalocean/openai-gpt-5.5
- name: complex reasoning
description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction
models:
- digitalocean/openai-gpt-5.5 # primary: strong reasoning + tool use
- digitalocean/anthropic-claude-opus-4.7
- name: daily conversation
description: general chat, casual Q&A, summaries, drafting messages, quick rewrites, day-to-day requests where speed and cost matter
models:
- digitalocean/anthropic-claude-4.6-sonnet
- digitalocean/openai-gpt-5.5
# ---------------------------------------------------------------------------
# Aliases — stable, human-friendly handles for clients
# ---------------------------------------------------------------------------
# Clients can pin to a tier without thinking about the underlying provider,
# and the underlying model can change without breaking callers.
model_aliases:
# Daily driver -> Claude Sonnet 4.6
frontier.fast:
target: digitalocean/anthropic-claude-4.6-sonnet
# Reasoning + tool calling -> OpenAI GPT 5.5
frontier.smart:
target: digitalocean/openai-gpt-5.5
# Code + deep analysis -> Anthropic Opus 4.7
frontier.max:
target: digitalocean/anthropic-claude-opus-4.7
# ---------------------------------------------------------------------------
# Model affinity for agentic loops
# ---------------------------------------------------------------------------
# In a tool-using agent loop, successive prompts can look like different
# routes (tool selection ~ code, reasoning ~ analysis), causing the router
# to flip between models mid-session. Clients send `X-Model-Affinity: <id>`
# and Plano caches the routing decision for the session TTL below.
routing:
session_ttl_seconds: 600
tracing:
random_sampling: 100

View file

@ -0,0 +1,63 @@
#!/bin/bash
set -e
# ---------------------------------------------------------------------------
# Frontier model routing demo: DigitalOcean Sonnet 4.6 + GPT 5.5 + Opus 4.7
# ---------------------------------------------------------------------------
start_demo() {
if [ -f ".env" ]; then
echo ".env file already exists. Skipping creation."
else
missing=()
[ -z "$DO_API_KEY" ] && missing+=("DO_API_KEY")
[ -z "$OPENAI_API_KEY" ] && missing+=("OPENAI_API_KEY")
[ -z "$ANTHROPIC_API_KEY" ] && missing+=("ANTHROPIC_API_KEY")
if [ ${#missing[@]} -ne 0 ]; then
echo "Error: the following environment variables are not set:"
for key in "${missing[@]}"; do echo " - $key"; done
echo
echo "Set them in your shell, then re-run this script. Example:"
echo " export DO_API_KEY=... # from https://cloud.digitalocean.com/account/api/tokens"
echo " export OPENAI_API_KEY=... # from https://platform.openai.com/api-keys"
echo " export ANTHROPIC_API_KEY=... # from https://console.anthropic.com/"
exit 1
fi
echo "Creating .env file..."
{
echo "DO_API_KEY=$DO_API_KEY"
echo "OPENAI_API_KEY=$OPENAI_API_KEY"
echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
} > .env
echo ".env file created."
fi
echo "Starting Plano with config.yaml..."
planoai up config.yaml
cat <<'EOF'
Plano is up. Try the demo with:
./test.sh # runs three sample prompts and shows which model handled each
planoai trace # live router decisions in a separate terminal
Or call any model directly using its alias:
curl -sS -X POST http://localhost:12000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq .
EOF
}
stop_demo() {
echo "Stopping Plano..."
planoai down
}
if [ "$1" == "down" ]; then
stop_demo
else
start_demo
fi

View file

@ -0,0 +1,212 @@
### Frontier model routing — REST Client / VS Code REST snippets
###
### Plano runs the preference-aligned orchestrator on every chat request
### when top-level `routing_preferences` are configured. The `model` field
### in the body is the *fallback* if no preference matches; pinning it to
### `frontier.fast` gives a cheap default. Each route owns an ordered
### `models` pool — primary first, fallbacks next — that the client (or
### Plano's retry logic) walks on 429/5xx.
@endpoint = http://localhost:12000
### -------------------------------------------------------------------------
### 1. Decision-only endpoint: see what the router would pick (no upstream call)
### Returns: { "models": [...ranked pool...], "route": "...", "trace_id": "..." }
### -------------------------------------------------------------------------
POST {{endpoint}}/routing/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"messages": [
{
"role": "user",
"content": "Refactor this Rust function to remove the global mutable state and add unit tests."
}
]
}
### -------------------------------------------------------------------------
### 2. Routed by intent: daily conversation -> DigitalOcean Sonnet 4.6
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"max_tokens": 256,
"messages": [
{
"role": "user",
"content": "Hey! Give me three fun facts about octopuses I can drop into a dinner conversation."
}
]
}
### -------------------------------------------------------------------------
### 3. Routed by intent: complex reasoning -> OpenAI GPT 5.5
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"max_tokens": 512,
"messages": [
{
"role": "user",
"content": "A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet."
}
]
}
### -------------------------------------------------------------------------
### 4. Routed by intent: code generation -> Anthropic Opus 4.7
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"max_tokens": 800,
"messages": [
{
"role": "user",
"content": "Write a Rust function that takes a Vec<u8> of UTF-8 bytes and returns a HashMap<char, usize> with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully."
}
]
}
### -------------------------------------------------------------------------
### 5. Routed by intent: deep analysis -> Anthropic Opus 4.7
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"max_tokens": 600,
"messages": [
{
"role": "user",
"content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);"
}
]
}
### -------------------------------------------------------------------------
### 6. Per-request routing override (config-defined preferences are bypassed
### for this single call). The `routing_preferences` field is stripped
### before the upstream provider sees the body.
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"max_tokens": 256,
"messages": [
{ "role": "user", "content": "Draft me a haiku about Postgres replication slots." }
],
"routing_preferences": [
{
"name": "creative writing",
"description": "poetry, fiction, lyrical or playful prose",
"models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"]
}
]
}
### -------------------------------------------------------------------------
### 7. Pin a routing decision across an agentic loop with X-Model-Affinity.
### The first call routes normally and caches the decision. Subsequent
### calls with the same id reuse the cached model until the session TTL
### (default 10 min) expires.
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
X-Model-Affinity: agent-session-7f3e
{
"model": "frontier.fast",
"max_tokens": 256,
"messages": [
{ "role": "user", "content": "Plan a small refactor of an auth module — order of operations?" }
]
}
### Same affinity id — reuses cached routing decision (no re-classification)
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
X-Model-Affinity: agent-session-7f3e
{
"model": "frontier.fast",
"max_tokens": 256,
"messages": [
{ "role": "user", "content": "Now write the unit tests for step one." }
]
}
### -------------------------------------------------------------------------
### 8. Pin to DigitalOcean Sonnet 4.6 via alias (skip routing entirely)
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.fast",
"max_tokens": 128,
"messages": [
{ "role": "user", "content": "One sentence: who painted the ceiling of the Sistine Chapel?" }
]
}
### -------------------------------------------------------------------------
### 9. Pin to OpenAI GPT 5.5 via alias
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.smart",
"max_tokens": 256,
"messages": [
{ "role": "user", "content": "Outline a 30/60/90 day plan for a new platform engineering hire." }
]
}
### -------------------------------------------------------------------------
### 10. Pin to Anthropic Opus 4.7 via alias
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/chat/completions HTTP/1.1
Content-Type: application/json
{
"model": "frontier.max",
"max_tokens": 600,
"messages": [
{ "role": "user", "content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);" }
]
}
### -------------------------------------------------------------------------
### 11. Anthropic Messages API (Claude-native client) -> routed by intent
### -------------------------------------------------------------------------
POST {{endpoint}}/v1/messages HTTP/1.1
Content-Type: application/json
anthropic-version: 2023-06-01
x-api-key: test-key
{
"model": "frontier.fast",
"max_tokens": 256,
"messages": [
{ "role": "user", "content": "Recommend a senior engineering reading list with three picks and one sentence each." }
]
}
### -------------------------------------------------------------------------
### 12. Inspect available models (auto-discovered for Claude Desktop / clients)
### -------------------------------------------------------------------------
GET {{endpoint}}/v1/models HTTP/1.1

View file

@ -0,0 +1,119 @@
#!/bin/bash
# ---------------------------------------------------------------------------
# Frontier Model Routing demo — driver script
#
# For each of three intent-biased prompts we:
# 1. Hit POST /routing/v1/chat/completions (Plano's decision-only endpoint)
# to print the matched route name and the ranked candidate pool.
# 2. Hit POST /v1/chat/completions to actually run the request and print
# the model that handled it.
#
# Plano runs the orchestrator on every chat completion when top-level
# `routing_preferences` are configured. The `model` field in the request is
# the *fallback* used when no preference matches — we pin it to
# `frontier.fast` so unmatched prompts land on the cheapest tier.
# ---------------------------------------------------------------------------
set -e
GATEWAY=${GATEWAY:-http://localhost:12000}
DECISION_ENDPOINT="$GATEWAY/routing/v1/chat/completions"
CHAT_ENDPOINT="$GATEWAY/v1/chat/completions"
ask() {
local label="$1"
local prompt="$2"
local body
body="$(jq -n --arg p "$prompt" '{
"model": "frontier.fast",
"max_tokens": 256,
"messages": [{"role":"user","content":$p}]
}')"
echo
echo "=========================================================="
echo "[$label]"
echo "prompt: $prompt"
echo "----------------------------------------------------------"
# Step 1: decision-only — what would the router pick?
echo " routing decision:"
curl -sS -X POST "$DECISION_ENDPOINT" \
-H "Content-Type: application/json" \
-d "$body" \
| jq '{
matched_route: .route,
ranked_models: .models,
pinned: .pinned
}' \
| sed 's/^/ /'
# Step 2: actually run the request through the chosen model.
echo " chat completion:"
curl -sS -X POST "$CHAT_ENDPOINT" \
-H "Content-Type: application/json" \
-d "$body" \
| jq '{
routed_to: .model,
reply: .choices[0].message.content
}' \
| sed 's/^/ /'
}
ask "daily conversation -> expects DigitalOcean Sonnet 4.6" \
"Hey! Give me three fun facts about octopuses I can drop into a dinner conversation."
ask "complex reasoning -> expects OpenAI GPT 5.5" \
"A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet."
ask "code generation -> expects Anthropic Opus 4.7" \
"Write a Rust function that takes a Vec<u8> of UTF-8 bytes and returns a HashMap<char, usize> with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully."
ask "deep analysis -> expects Anthropic Opus 4.7" \
"Review this Postgres schema for normalization, indexing, and migration risk. Give me a prioritized list of issues:
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_email TEXT,
customer_name TEXT,
items_json JSONB,
total NUMERIC,
created_at TIMESTAMPTZ DEFAULT now()
);"
# ---------------------------------------------------------------------------
# Bonus: pin a routing decision across an agentic loop with X-Model-Affinity.
# Both calls hit the same gateway with the same affinity id, so the second
# call reuses the first call's routing decision instead of reclassifying.
# ---------------------------------------------------------------------------
echo
echo "=========================================================="
echo "[bonus: model affinity across two turns of an agent loop]"
echo "----------------------------------------------------------"
SID="demo-$(date +%s)-$RANDOM"
echo " X-Model-Affinity: $SID"
turn() {
local turn_label="$1"
local prompt="$2"
echo " $turn_label:"
curl -sS -X POST "$CHAT_ENDPOINT" \
-H "Content-Type: application/json" \
-H "X-Model-Affinity: $SID" \
-d "$(jq -n --arg p "$prompt" '{
"model": "frontier.fast",
"max_tokens": 128,
"messages": [{"role":"user","content":$p}]
}')" \
| jq '{ routed_to: .model }' \
| sed 's/^/ /'
}
turn "turn 1 (sets affinity)" "Plan a small refactor of an auth module — what's the order of operations?"
turn "turn 2 (reuses decision)" "Now write the unit tests for step one."
echo
echo "=========================================================="
echo "Done. Want to inspect routing decisions live? Run: planoai trace"
echo "=========================================================="