mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
Merge 151d3a83c5 into 554a3d1f6a
This commit is contained in:
commit
4043c5e5b6
11 changed files with 2500 additions and 76 deletions
625
cli/planoai/claude_desktop.py
Normal file
625
cli/planoai/claude_desktop.py
Normal file
|
|
@ -0,0 +1,625 @@
|
|||
"""Configure Claude Desktop to use the local Plano gateway.
|
||||
|
||||
Python port of Ollama's `cmd/launch/claude_desktop.go` tailored for Plano. The
|
||||
flow is intentionally simpler than Ollama's:
|
||||
|
||||
1. Detect Claude Desktop on macOS / Windows.
|
||||
2. Pick a string to put in Claude's ``inferenceGatewayApiKey`` slot (Claude
|
||||
Desktop requires the field; Plano's local gateway does not enforce bearer
|
||||
auth, so a placeholder is fine — see ``_resolve_api_key`` for precedence).
|
||||
3. Rewrite Claude Desktop config JSON files with ``.bak`` backups to switch
|
||||
Claude into 3rd-party gateway mode pointed at Plano.
|
||||
4. Optionally restart Claude Desktop so the changes take effect immediately.
|
||||
|
||||
Restoring flips ``deploymentMode`` back to ``1p`` and removes the Plano gateway
|
||||
profile + meta entry.
|
||||
|
||||
The Claude Desktop ``deploymentMode = "3p"`` profile structure used here is
|
||||
defined by Anthropic / observed via the Ollama integration; we do not control
|
||||
it. We re-use the same JSON shape so Claude Desktop happily accepts the Plano
|
||||
profile alongside any other third-party profile the user may have.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import glob as _glob
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable, Optional
|
||||
|
||||
from planoai.utils import getLogger
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
INTEGRATION_NAME = "claude-desktop"
|
||||
PROFILE_NAME = "Plano"
|
||||
# Deterministic UUID-v4 distinct from Ollama's `…0114`. The trailing bytes
|
||||
# spell "PlanO" in ASCII to make it easy to identify the profile in
|
||||
# `_meta.json`.
|
||||
PROFILE_ID = "00000000-0000-4000-8000-0000506C616E"
|
||||
DEFAULT_BASE_URL = "http://localhost:12000"
|
||||
SUCCESS_MESSAGE = "Claude Desktop profile changed to Plano."
|
||||
RESTORE_HINT = (
|
||||
"To restore the usual Claude profile, run: "
|
||||
"planoai launch claude-desktop --restore"
|
||||
)
|
||||
RESTORED_MESSAGE = "Claude Desktop restored to the usual Claude profile."
|
||||
|
||||
# Placeholder Claude Desktop writes into the gateway profile when the user
|
||||
# hasn't overridden it. Plano's local gateway does not enforce a bearer
|
||||
# token; this string only exists so Claude Desktop has a non-empty value to
|
||||
# attach to outbound requests.
|
||||
DEFAULT_API_KEY = "plano"
|
||||
|
||||
# How long we wait for Claude Desktop to fully exit on restart.
|
||||
_QUIT_TIMEOUT_SECONDS = 30
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test seams: replace these in tests instead of monkey-patching os/subprocess.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Platform identifier. ``"darwin"``, ``"windows"``, or anything else (which
|
||||
# is treated as unsupported). Module-level so tests can override it.
|
||||
def _detect_goos() -> str:
|
||||
if os.name == "nt":
|
||||
return "windows"
|
||||
if sys.platform == "darwin":
|
||||
return "darwin"
|
||||
return sys.platform
|
||||
|
||||
|
||||
_GOOS: str = _detect_goos()
|
||||
|
||||
_user_home: Callable[[], str] = os.path.expanduser # called as _user_home("~")
|
||||
|
||||
|
||||
def _is_running() -> bool:
|
||||
"""Return True if Claude Desktop is currently running."""
|
||||
if _GOOS == "darwin":
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["pgrep", "-f", "Claude.app/Contents/MacOS/Claude"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return out.returncode == 0 and out.stdout.strip() != ""
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
if _GOOS == "windows":
|
||||
script = (
|
||||
"(Get-Process claude -ErrorAction SilentlyContinue "
|
||||
"| Where-Object { $_.MainWindowHandle -ne 0 } "
|
||||
"| Select-Object -First 1).Id"
|
||||
)
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["powershell.exe", "-NoProfile", "-Command", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
return out.returncode == 0 and out.stdout.strip() != ""
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def _quit() -> None:
|
||||
"""Ask Claude Desktop to quit gracefully."""
|
||||
if _GOOS == "darwin":
|
||||
subprocess.run(
|
||||
["osascript", "-e", 'tell application "Claude" to quit'],
|
||||
check=False,
|
||||
)
|
||||
return
|
||||
if _GOOS == "windows":
|
||||
script = (
|
||||
"Get-Process claude -ErrorAction SilentlyContinue "
|
||||
"| Where-Object { $_.MainWindowHandle -ne 0 } "
|
||||
"| ForEach-Object { [void]$_.CloseMainWindow() }"
|
||||
)
|
||||
subprocess.run(
|
||||
["powershell.exe", "-NoProfile", "-Command", script],
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def _open() -> None:
|
||||
"""Launch Claude Desktop."""
|
||||
if _GOOS == "darwin":
|
||||
subprocess.run(["open", "-a", "Claude"], check=False)
|
||||
return
|
||||
if _GOOS == "windows":
|
||||
path = _claude_app_path()
|
||||
if not path:
|
||||
raise RuntimeError(
|
||||
"Claude Desktop executable was not found; open Claude Desktop "
|
||||
"manually once and re-run 'planoai launch claude-desktop'"
|
||||
)
|
||||
ps_path = "'" + path.replace("'", "''") + "'"
|
||||
subprocess.run(
|
||||
[
|
||||
"powershell.exe",
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
f"Start-Process -FilePath {ps_path}",
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def _sleep(seconds: float) -> None:
|
||||
time.sleep(seconds)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path discovery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ThirdPartyPaths:
|
||||
desktop_config: str
|
||||
meta: str
|
||||
profile: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Targets:
|
||||
normal_configs: list[str] = field(default_factory=list)
|
||||
third_party_profiles: list[_ThirdPartyPaths] = field(default_factory=list)
|
||||
|
||||
|
||||
def supported() -> Optional[str]:
|
||||
"""Return ``None`` if the platform is supported, else an error message."""
|
||||
if _GOOS in ("darwin", "windows"):
|
||||
return None
|
||||
return "Claude Desktop launch is only supported on macOS and Windows"
|
||||
|
||||
|
||||
def _home() -> str:
|
||||
home = _user_home("~")
|
||||
if home == "~" or not home:
|
||||
raise RuntimeError("could not resolve user home directory")
|
||||
return home
|
||||
|
||||
|
||||
def _local_app_data() -> str:
|
||||
val = (os.environ.get("LOCALAPPDATA") or "").strip()
|
||||
if val:
|
||||
return val
|
||||
user = (os.environ.get("USERPROFILE") or "").strip()
|
||||
if user:
|
||||
return os.path.join(user, "AppData", "Local")
|
||||
return os.path.join(_home(), "AppData", "Local")
|
||||
|
||||
|
||||
def _darwin_profile_roots() -> tuple[list[str], list[str]]:
|
||||
base = os.path.join(_home(), "Library", "Application Support")
|
||||
return ([os.path.join(base, "Claude")], [os.path.join(base, "Claude-3p")])
|
||||
|
||||
|
||||
def _windows_profile_roots() -> tuple[list[str], list[str]]:
|
||||
local = _local_app_data()
|
||||
normal = [
|
||||
os.path.join(local, "Claude"),
|
||||
os.path.join(local, "Claude Nest"),
|
||||
]
|
||||
third_party = [
|
||||
os.path.join(local, "Claude-3p"),
|
||||
os.path.join(local, "Claude Nest-3p"),
|
||||
]
|
||||
return normal, third_party
|
||||
|
||||
|
||||
def _dedupe_paths(paths: list[str]) -> list[str]:
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for path in paths:
|
||||
if not path or not path.strip():
|
||||
continue
|
||||
key = path.lower()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(path)
|
||||
return out
|
||||
|
||||
|
||||
def _target_paths() -> _Targets:
|
||||
err = supported()
|
||||
if err is not None:
|
||||
raise RuntimeError(err)
|
||||
|
||||
if _GOOS == "darwin":
|
||||
normal, third = _darwin_profile_roots()
|
||||
else:
|
||||
normal, third = _windows_profile_roots()
|
||||
|
||||
targets = _Targets()
|
||||
for root in _dedupe_paths(normal):
|
||||
targets.normal_configs.append(os.path.join(root, "claude_desktop_config.json"))
|
||||
for root in _dedupe_paths(third):
|
||||
targets.third_party_profiles.append(
|
||||
_ThirdPartyPaths(
|
||||
desktop_config=os.path.join(root, "claude_desktop_config.json"),
|
||||
meta=os.path.join(root, "configLibrary", "_meta.json"),
|
||||
profile=os.path.join(root, "configLibrary", f"{PROFILE_ID}.json"),
|
||||
)
|
||||
)
|
||||
return targets
|
||||
|
||||
|
||||
def _claude_app_path() -> str:
|
||||
"""Return path to the Claude Desktop executable, or ``""`` if unknown."""
|
||||
if _GOOS == "darwin":
|
||||
candidates = ["/Applications/Claude.app"]
|
||||
candidates.append(os.path.join(_home(), "Applications", "Claude.app"))
|
||||
for path in candidates:
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
return ""
|
||||
if _GOOS == "windows":
|
||||
local = _local_app_data()
|
||||
candidates = [
|
||||
os.path.join(local, "Programs", "Claude", "Claude.exe"),
|
||||
os.path.join(local, "Programs", "Claude Desktop", "Claude.exe"),
|
||||
os.path.join(local, "Claude", "Claude.exe"),
|
||||
os.path.join(local, "Claude Nest", "Claude.exe"),
|
||||
os.path.join(local, "Claude Desktop", "Claude.exe"),
|
||||
os.path.join(local, "AnthropicClaude", "Claude.exe"),
|
||||
]
|
||||
for pattern in (
|
||||
os.path.join(local, "AnthropicClaude", "app-*", "Claude.exe"),
|
||||
os.path.join(local, "Programs", "Claude", "app-*", "Claude.exe"),
|
||||
os.path.join(local, "Programs", "Claude Desktop", "app-*", "Claude.exe"),
|
||||
):
|
||||
candidates.extend(_glob.glob(pattern))
|
||||
for path in _dedupe_paths(candidates):
|
||||
if os.path.exists(path):
|
||||
return path
|
||||
return ""
|
||||
return ""
|
||||
|
||||
|
||||
def is_installed() -> bool:
|
||||
"""Best-effort check: app binary or any profile dir is present."""
|
||||
if _claude_app_path():
|
||||
return True
|
||||
if _GOOS == "windows" and _is_running():
|
||||
return True
|
||||
if _GOOS == "darwin":
|
||||
normal, third = _darwin_profile_roots()
|
||||
elif _GOOS == "windows":
|
||||
normal, third = _windows_profile_roots()
|
||||
else:
|
||||
return False
|
||||
for path in normal + third:
|
||||
if os.path.isdir(path):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JSON IO with atomic write + .bak backup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _read_json(path: str) -> dict:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = f.read()
|
||||
if not data.strip():
|
||||
return {}
|
||||
parsed = json.loads(data)
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
|
||||
|
||||
def _read_json_allow_missing(path: str) -> dict:
|
||||
try:
|
||||
return _read_json(path)
|
||||
except FileNotFoundError:
|
||||
return {}
|
||||
|
||||
|
||||
def _atomic_write_with_backup(path: str, payload: bytes) -> None:
|
||||
"""Write ``payload`` to ``path`` atomically, keeping a ``.bak`` copy."""
|
||||
parent = os.path.dirname(path)
|
||||
if parent:
|
||||
os.makedirs(parent, exist_ok=True)
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
shutil.copy2(path, path + ".bak")
|
||||
except OSError as e:
|
||||
log.debug("could not write backup for %s: %s", path, e)
|
||||
|
||||
fd, tmp_path = tempfile.mkstemp(prefix=".plano_", suffix=".tmp", dir=parent or None)
|
||||
try:
|
||||
with os.fdopen(fd, "wb") as f:
|
||||
f.write(payload)
|
||||
os.replace(tmp_path, path)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
|
||||
def _write_json(path: str, value: dict) -> None:
|
||||
payload = (json.dumps(value, indent=2) + "\n").encode("utf-8")
|
||||
_atomic_write_with_backup(path, payload)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JSON shape mutators (1:1 with Ollama)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_deployment_mode(path: str, mode: str) -> None:
|
||||
cfg = _read_json_allow_missing(path)
|
||||
cfg["deploymentMode"] = mode
|
||||
_write_json(path, cfg)
|
||||
|
||||
|
||||
def _write_meta(path: str, profile_id: str, name: str) -> None:
|
||||
meta = _read_json_allow_missing(path)
|
||||
meta["appliedId"] = profile_id
|
||||
|
||||
raw_entries = meta.get("entries")
|
||||
entries: list = []
|
||||
if isinstance(raw_entries, list):
|
||||
for entry in raw_entries:
|
||||
if isinstance(entry, dict) and entry.get("id") == profile_id:
|
||||
continue
|
||||
entries.append(entry)
|
||||
entries.append({"id": profile_id, "name": name})
|
||||
meta["entries"] = entries
|
||||
_write_json(path, meta)
|
||||
|
||||
|
||||
def _write_gateway_profile(
|
||||
path: str, api_key: str, base_url: str, force_chooser: bool
|
||||
) -> None:
|
||||
cfg = _read_json_allow_missing(path)
|
||||
cfg["inferenceProvider"] = "gateway"
|
||||
cfg["inferenceGatewayBaseUrl"] = base_url
|
||||
cfg["inferenceGatewayApiKey"] = api_key
|
||||
cfg["inferenceGatewayAuthScheme"] = "bearer"
|
||||
cfg.pop("inferenceModels", None)
|
||||
cfg["disableDeploymentModeChooser"] = force_chooser
|
||||
_write_json(path, cfg)
|
||||
|
||||
|
||||
def _restore_meta(path: str) -> None:
|
||||
meta = _read_json_allow_missing(path)
|
||||
if not meta:
|
||||
return
|
||||
changed = False
|
||||
if meta.get("appliedId") == PROFILE_ID:
|
||||
meta.pop("appliedId", None)
|
||||
changed = True
|
||||
|
||||
raw_entries = meta.get("entries")
|
||||
if isinstance(raw_entries, list):
|
||||
filtered: list = []
|
||||
for entry in raw_entries:
|
||||
if isinstance(entry, dict) and entry.get("id") == PROFILE_ID:
|
||||
changed = True
|
||||
continue
|
||||
filtered.append(entry)
|
||||
meta["entries"] = filtered
|
||||
|
||||
if changed:
|
||||
_write_json(path, meta)
|
||||
|
||||
|
||||
def _restore_profile(path: str) -> None:
|
||||
cfg = _read_json_allow_missing(path)
|
||||
if not cfg:
|
||||
return
|
||||
cfg["disableDeploymentModeChooser"] = False
|
||||
for key in (
|
||||
"inferenceProvider",
|
||||
"inferenceGatewayBaseUrl",
|
||||
"inferenceGatewayAuthScheme",
|
||||
"inferenceModels",
|
||||
):
|
||||
cfg.pop(key, None)
|
||||
_write_json(path, cfg)
|
||||
|
||||
|
||||
def _read_applied_id(path: str) -> str:
|
||||
try:
|
||||
meta = _read_json(path)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return ""
|
||||
val = meta.get("appliedId")
|
||||
return val if isinstance(val, str) else ""
|
||||
|
||||
|
||||
def _read_deployment_mode(path: str) -> str:
|
||||
try:
|
||||
cfg = _read_json(path)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return ""
|
||||
val = cfg.get("deploymentMode")
|
||||
return val if isinstance(val, str) else ""
|
||||
|
||||
|
||||
def _read_gateway_api_key(path: str) -> str:
|
||||
try:
|
||||
cfg = _read_json(path)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return ""
|
||||
val = cfg.get("inferenceGatewayApiKey")
|
||||
return val.strip() if isinstance(val, str) else ""
|
||||
|
||||
|
||||
def _third_party_profile_ok(t: _ThirdPartyPaths) -> bool:
|
||||
if _read_applied_id(t.meta) != PROFILE_ID:
|
||||
return False
|
||||
try:
|
||||
cfg = _read_json(t.profile)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return False
|
||||
if cfg.get("inferenceProvider") != "gateway":
|
||||
return False
|
||||
base_url = cfg.get("inferenceGatewayBaseUrl")
|
||||
if not isinstance(base_url, str) or not base_url.strip():
|
||||
return False
|
||||
api_key = cfg.get("inferenceGatewayApiKey")
|
||||
if not isinstance(api_key, str) or not api_key.strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_configured() -> bool:
|
||||
try:
|
||||
targets = _target_paths()
|
||||
except RuntimeError:
|
||||
return False
|
||||
if not targets.normal_configs or not targets.third_party_profiles:
|
||||
return False
|
||||
for path in targets.normal_configs:
|
||||
if _read_deployment_mode(path) != "3p":
|
||||
return False
|
||||
for t in targets.third_party_profiles:
|
||||
if _read_deployment_mode(t.desktop_config) != "3p":
|
||||
return False
|
||||
if not _third_party_profile_ok(t):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API key resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Plano's local gateway does not enforce bearer auth — there's no such thing
|
||||
# as a "Plano API key". Claude Desktop's third-party profile schema, however,
|
||||
# requires ``inferenceGatewayApiKey`` to be a non-empty string before it will
|
||||
# treat the profile as configured. We therefore pick *some* string to write
|
||||
# into that slot, with the following precedence so users running Plano behind
|
||||
# their own auth proxy can opt-in:
|
||||
#
|
||||
# 1. ``$PLANO_API_KEY`` — explicit override (e.g. an internal auth token).
|
||||
# 2. The existing ``inferenceGatewayApiKey`` already in Claude's 3p profile,
|
||||
# so re-running ``planoai launch claude-desktop`` does not clobber a
|
||||
# value the user manually set.
|
||||
# 3. The fixed placeholder ``DEFAULT_API_KEY`` ("plano").
|
||||
#
|
||||
# We do not validate this string against the gateway. The gateway's
|
||||
# reachability is already surfaced by ``launch_cmd._is_plano_running()``
|
||||
# before this module is invoked.
|
||||
|
||||
|
||||
def _resolve_api_key(profile_paths: list[str]) -> str:
|
||||
env_key = (os.environ.get("PLANO_API_KEY") or "").strip()
|
||||
if env_key:
|
||||
return env_key
|
||||
|
||||
for path in profile_paths:
|
||||
existing = _read_gateway_api_key(path)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
return DEFAULT_API_KEY
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public configure / restore / launch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def configure(base_url: str = DEFAULT_BASE_URL, *, force_chooser: bool = True) -> None:
|
||||
"""Switch Claude Desktop into 3p mode pointed at the local Plano gateway."""
|
||||
err = supported()
|
||||
if err is not None:
|
||||
raise RuntimeError(err)
|
||||
|
||||
targets = _target_paths()
|
||||
profile_paths = [t.profile for t in targets.third_party_profiles]
|
||||
api_key = _resolve_api_key(profile_paths)
|
||||
|
||||
for path in targets.normal_configs:
|
||||
_write_deployment_mode(path, "3p")
|
||||
for t in targets.third_party_profiles:
|
||||
_write_deployment_mode(t.desktop_config, "3p")
|
||||
_write_meta(t.meta, PROFILE_ID, PROFILE_NAME)
|
||||
_write_gateway_profile(t.profile, api_key, base_url, force_chooser)
|
||||
|
||||
|
||||
def restore() -> None:
|
||||
"""Flip Claude Desktop back to the default Anthropic profile."""
|
||||
err = supported()
|
||||
if err is not None:
|
||||
raise RuntimeError(err)
|
||||
|
||||
targets = _target_paths()
|
||||
for path in targets.normal_configs:
|
||||
_write_deployment_mode(path, "1p")
|
||||
for t in targets.third_party_profiles:
|
||||
_write_deployment_mode(t.desktop_config, "1p")
|
||||
_restore_meta(t.meta)
|
||||
_restore_profile(t.profile)
|
||||
|
||||
|
||||
def _can_prompt() -> bool:
|
||||
return sys.stdin.isatty() and sys.stderr.isatty()
|
||||
|
||||
|
||||
def _confirm(prompt: str, yes: bool) -> bool:
|
||||
if yes:
|
||||
return True
|
||||
if not _can_prompt():
|
||||
return False
|
||||
try:
|
||||
answer = input(f"{prompt} [Y/n] ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
sys.stderr.write("\n")
|
||||
return False
|
||||
return answer in ("", "y", "yes")
|
||||
|
||||
|
||||
def launch_or_restart(prompt: str, yes: bool) -> None:
|
||||
"""Open Claude Desktop, restarting it first if it is already running."""
|
||||
err = supported()
|
||||
if err is not None:
|
||||
raise RuntimeError(err)
|
||||
|
||||
if not _is_running():
|
||||
_open()
|
||||
return
|
||||
|
||||
if not _confirm(prompt, yes):
|
||||
sys.stderr.write(
|
||||
"Quit and reopen Claude Desktop when you're ready for the "
|
||||
"profile change to take effect.\n"
|
||||
)
|
||||
return
|
||||
|
||||
_quit()
|
||||
deadline = time.time() + _QUIT_TIMEOUT_SECONDS
|
||||
while time.time() < deadline:
|
||||
if not _is_running():
|
||||
break
|
||||
_sleep(0.2)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Claude Desktop did not quit; quit it manually and re-run " "the command"
|
||||
)
|
||||
_open()
|
||||
331
cli/planoai/launch_cmd.py
Normal file
331
cli/planoai/launch_cmd.py
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
"""``planoai launch`` command group.
|
||||
|
||||
Launches CLI agents (Claude Code, Codex) or the Claude Desktop app against the
|
||||
local Plano gateway. This replaces the old ``planoai cli-agent`` command.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
import rich_click as click
|
||||
import yaml
|
||||
|
||||
from planoai import claude_desktop as _cd
|
||||
from planoai.consts import NATIVE_PID_FILE, PLANO_DOCKER_NAME
|
||||
from planoai.core import _resolve_cli_agent_endpoint, start_cli_agent
|
||||
from planoai.docker_cli import docker_container_status
|
||||
from planoai.defaults import DEFAULT_LLM_LISTENER_PORT
|
||||
from planoai.utils import find_config_file, getLogger
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_native_plano_running() -> bool:
|
||||
if not os.path.exists(NATIVE_PID_FILE):
|
||||
return False
|
||||
try:
|
||||
with open(NATIVE_PID_FILE, "r") as f:
|
||||
pids = json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return False
|
||||
|
||||
envoy_pid = pids.get("envoy_pid")
|
||||
brightstaff_pid = pids.get("brightstaff_pid")
|
||||
if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int):
|
||||
return False
|
||||
|
||||
for pid in (envoy_pid, brightstaff_pid):
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
continue
|
||||
return True
|
||||
|
||||
|
||||
def _is_plano_running() -> bool:
|
||||
if _is_native_plano_running():
|
||||
return True
|
||||
return docker_container_status(PLANO_DOCKER_NAME) == "running"
|
||||
|
||||
|
||||
def _require_plano_running(console) -> None:
|
||||
if _is_plano_running():
|
||||
return
|
||||
console.print("[red]✗[/red] Plano is not running.")
|
||||
console.print(
|
||||
"[dim]Start Plano first using 'planoai up <config.yaml>' "
|
||||
"(native or --docker mode).[/dim]"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _start_plano_with_config(config_path: str, console) -> None:
|
||||
"""Invoke `planoai up` against the given config and wait for it to be healthy.
|
||||
|
||||
Reuses the click ``up`` command's callback so we get the same validation,
|
||||
env loading, and native runner behavior as a top-level invocation. ``up``
|
||||
runs in detached/background mode by default and only returns once Plano is
|
||||
healthy, so we can safely continue with the Claude Desktop config flow
|
||||
after it returns.
|
||||
"""
|
||||
# Lazy import: ``planoai.main`` pulls in heavy modules (rich, native runner,
|
||||
# etc.) and would create a circular import at module-load time.
|
||||
from planoai.main import up
|
||||
|
||||
abs_path = os.path.abspath(config_path)
|
||||
if not os.path.exists(abs_path):
|
||||
console.print(f"[red]✗[/red] Config file not found: {abs_path}")
|
||||
sys.exit(1)
|
||||
|
||||
console.print(
|
||||
f"[dim]Starting Plano with config " f"[cyan]{abs_path}[/cyan]...[/dim]"
|
||||
)
|
||||
up.callback(
|
||||
file=abs_path,
|
||||
path=".",
|
||||
foreground=False,
|
||||
with_tracing=False,
|
||||
tracing_port=4317,
|
||||
docker=False,
|
||||
verbose=False,
|
||||
listener_port=DEFAULT_LLM_LISTENER_PORT,
|
||||
)
|
||||
|
||||
|
||||
def _base_url_from_config_file(config_path: str) -> Optional[str]:
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
except (OSError, yaml.YAMLError):
|
||||
return None
|
||||
_host, port = _resolve_cli_agent_endpoint(cfg)
|
||||
return f"http://localhost:{port}"
|
||||
|
||||
|
||||
def _resolve_plano_config(file: Optional[str], path: str, console) -> str:
|
||||
plano_config_file = find_config_file(path, file)
|
||||
if not os.path.exists(plano_config_file):
|
||||
console.print(f"[red]✗[/red] Config file not found: {plano_config_file}")
|
||||
sys.exit(1)
|
||||
return plano_config_file
|
||||
|
||||
|
||||
def _run_cli_agent(agent_type: str, file, path, settings) -> None:
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
_require_plano_running(console)
|
||||
plano_config_file = _resolve_plano_config(file, path, console)
|
||||
try:
|
||||
start_cli_agent(plano_config_file, agent_type, settings)
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Group + subcommands
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@click.group()
|
||||
def launch():
|
||||
"""Launch a CLI agent or desktop app against the local Plano gateway."""
|
||||
|
||||
|
||||
@launch.command("claude-cli")
|
||||
@click.argument("file", required=False)
|
||||
@click.option(
|
||||
"--path", default=".", help="Path to the directory containing plano_config.yaml"
|
||||
)
|
||||
@click.option(
|
||||
"--settings",
|
||||
default="{}",
|
||||
help="Additional settings as JSON string for the CLI agent.",
|
||||
)
|
||||
def claude_cli(file, path, settings):
|
||||
"""Launch the Claude Code CLI connected to Plano."""
|
||||
_run_cli_agent("claude", file, path, settings)
|
||||
|
||||
|
||||
@launch.command("codex")
|
||||
@click.argument("file", required=False)
|
||||
@click.option(
|
||||
"--path", default=".", help="Path to the directory containing plano_config.yaml"
|
||||
)
|
||||
@click.option(
|
||||
"--settings",
|
||||
default="{}",
|
||||
help="Additional settings as JSON string for the CLI agent.",
|
||||
)
|
||||
def codex(file, path, settings):
|
||||
"""Launch the Codex CLI connected to Plano."""
|
||||
_run_cli_agent("codex", file, path, settings)
|
||||
|
||||
|
||||
@launch.command("claude-desktop")
|
||||
@click.option(
|
||||
"--config",
|
||||
"config_path",
|
||||
type=click.Path(dir_okay=False),
|
||||
default=None,
|
||||
help="Path to a Plano config; if Plano isn't already running, "
|
||||
"`planoai up <config>` is invoked first so the gateway is ready before "
|
||||
"Claude Desktop is configured.",
|
||||
)
|
||||
@click.option(
|
||||
"--no-launch",
|
||||
"no_launch",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Configure Claude Desktop but do not (re)open the app afterwards.",
|
||||
)
|
||||
@click.option(
|
||||
"--restore",
|
||||
"restore_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Switch Claude Desktop back to its usual Anthropic Claude profile.",
|
||||
)
|
||||
@click.option(
|
||||
"--yes",
|
||||
"-y",
|
||||
"yes_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Auto-approve restart prompts.",
|
||||
)
|
||||
@click.option(
|
||||
"--base-url",
|
||||
default=None,
|
||||
help="Plano LLM listener URL (default: derived from --config or running Plano, falling back to http://localhost:12000).",
|
||||
)
|
||||
def claude_desktop_cmd(config_path, no_launch, restore_flag, yes_flag, base_url):
|
||||
"""Configure Claude Desktop to use the local Plano gateway.
|
||||
|
||||
Mirrors `ollama launch claude-desktop`: rewrites Claude Desktop's profile
|
||||
JSONs (with `.bak` backups) to switch into third-party gateway mode pointed
|
||||
at Plano, then optionally restarts Claude Desktop so the change takes
|
||||
effect. When `--config <path>` is supplied and Plano is not already
|
||||
running, this command also starts Plano with that config first, so the
|
||||
end-to-end flow is a single command.
|
||||
"""
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
err = _cd.supported()
|
||||
if err is not None:
|
||||
console.print(f"[red]✗[/red] {err}")
|
||||
sys.exit(1)
|
||||
|
||||
if restore_flag:
|
||||
if config_path is not None:
|
||||
console.print(
|
||||
"[yellow]⚠[/yellow] --config is ignored when --restore is set."
|
||||
)
|
||||
try:
|
||||
_cd.restore()
|
||||
except Exception as e:
|
||||
console.print(f"[red]✗[/red] Failed to restore Claude Desktop: {e}")
|
||||
sys.exit(1)
|
||||
console.print(f"[green]✓[/green] {_cd.RESTORED_MESSAGE}")
|
||||
if no_launch:
|
||||
return
|
||||
try:
|
||||
_cd.launch_or_restart(
|
||||
"Restart Claude Desktop to use the usual Claude profile?",
|
||||
yes_flag,
|
||||
)
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}")
|
||||
return
|
||||
|
||||
# Auto-start Plano if --config was provided and nothing is running yet.
|
||||
if config_path is not None:
|
||||
abs_config = os.path.abspath(config_path)
|
||||
if not os.path.exists(abs_config):
|
||||
console.print(f"[red]✗[/red] Config file not found: {abs_config}")
|
||||
sys.exit(1)
|
||||
if _is_plano_running():
|
||||
console.print(
|
||||
"[dim]Plano already running; skipping startup. Using listener "
|
||||
"from [cyan]"
|
||||
f"{abs_config}[/cyan] for the gateway URL.[/dim]"
|
||||
)
|
||||
else:
|
||||
_start_plano_with_config(abs_config, console)
|
||||
|
||||
# Resolve base URL precedence: --base-url > --config file > running Plano > default.
|
||||
resolved_url = (
|
||||
base_url
|
||||
or (
|
||||
_base_url_from_config_file(os.path.abspath(config_path))
|
||||
if config_path is not None
|
||||
else None
|
||||
)
|
||||
or _resolve_base_url_from_running_plano()
|
||||
or _cd.DEFAULT_BASE_URL
|
||||
)
|
||||
|
||||
if not _is_plano_running():
|
||||
console.print(
|
||||
"[yellow]⚠[/yellow] Plano does not appear to be running. "
|
||||
"Start it with [cyan]planoai up[/cyan] (or pass [cyan]--config "
|
||||
"<path>[/cyan]) before using Claude Desktop."
|
||||
)
|
||||
|
||||
console.print(
|
||||
f"[dim]Configuring Claude Desktop to use Plano at "
|
||||
f"[cyan]{resolved_url}[/cyan][/dim]"
|
||||
)
|
||||
try:
|
||||
_cd.configure(resolved_url)
|
||||
except Exception as e:
|
||||
console.print(f"[red]✗[/red] Failed to configure Claude Desktop: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
console.print(f"[green]✓[/green] {_cd.SUCCESS_MESSAGE}")
|
||||
console.print(f"[dim]{_cd.RESTORE_HINT}[/dim]")
|
||||
|
||||
if no_launch:
|
||||
return
|
||||
|
||||
try:
|
||||
_cd.launch_or_restart("Restart Claude Desktop to use Plano?", yes_flag)
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]⚠[/yellow] Could not restart Claude Desktop: {e}")
|
||||
|
||||
|
||||
def _resolve_base_url_from_running_plano() -> Optional[str]:
|
||||
"""Return ``http://localhost:<port>`` for the active Plano LLM listener.
|
||||
|
||||
Best-effort: if no config can be located, return ``None`` so the caller
|
||||
falls back to ``DEFAULT_BASE_URL``.
|
||||
"""
|
||||
try:
|
||||
plano_config_file = find_config_file(".", None)
|
||||
except Exception:
|
||||
return None
|
||||
if not plano_config_file or not os.path.exists(plano_config_file):
|
||||
return None
|
||||
try:
|
||||
with open(plano_config_file, "r") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
except (OSError, yaml.YAMLError):
|
||||
return None
|
||||
_host, port = _resolve_cli_agent_endpoint(cfg)
|
||||
return f"http://localhost:{port}"
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
import json
|
||||
import os
|
||||
import multiprocessing
|
||||
import subprocess
|
||||
|
|
@ -19,7 +18,6 @@ PLANO_COLOR = "#969FF4"
|
|||
from planoai.docker_cli import (
|
||||
docker_validate_plano_schema,
|
||||
stream_gateway_logs,
|
||||
docker_container_status,
|
||||
)
|
||||
from planoai.utils import (
|
||||
getLogger,
|
||||
|
|
@ -33,19 +31,17 @@ from planoai.utils import (
|
|||
from planoai.core import (
|
||||
start_plano,
|
||||
stop_docker_container,
|
||||
start_cli_agent,
|
||||
)
|
||||
from planoai.init_cmd import init as init_cmd
|
||||
from planoai.launch_cmd import launch as launch_cmd
|
||||
from planoai.trace_cmd import trace as trace_cmd, start_trace_listener_background
|
||||
from planoai.chatgpt_cmd import chatgpt as chatgpt_cmd
|
||||
from planoai.obs_cmd import obs as obs_cmd
|
||||
from planoai.consts import (
|
||||
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
|
||||
DEFAULT_NATIVE_OTEL_TRACING_GRPC_ENDPOINT,
|
||||
NATIVE_PID_FILE,
|
||||
PLANO_RUN_DIR,
|
||||
PLANO_DOCKER_IMAGE,
|
||||
PLANO_DOCKER_NAME,
|
||||
)
|
||||
from planoai.rich_click_config import configure_rich_click
|
||||
from planoai.versioning import check_version_status, get_latest_version, get_version
|
||||
|
|
@ -53,30 +49,6 @@ from planoai.versioning import check_version_status, get_latest_version, get_ver
|
|||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def _is_native_plano_running() -> bool:
|
||||
if not os.path.exists(NATIVE_PID_FILE):
|
||||
return False
|
||||
try:
|
||||
with open(NATIVE_PID_FILE, "r") as f:
|
||||
pids = json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return False
|
||||
|
||||
envoy_pid = pids.get("envoy_pid")
|
||||
brightstaff_pid = pids.get("brightstaff_pid")
|
||||
if not isinstance(envoy_pid, int) or not isinstance(brightstaff_pid, int):
|
||||
return False
|
||||
|
||||
for pid in (envoy_pid, brightstaff_pid):
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
continue
|
||||
return True
|
||||
|
||||
|
||||
def _is_port_in_use(port: int) -> bool:
|
||||
"""Check if a TCP port is already bound on localhost."""
|
||||
import socket
|
||||
|
|
@ -690,57 +662,12 @@ def logs(debug, follow, docker):
|
|||
plano_process.terminate()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("type", type=click.Choice(["claude", "codex"]), required=True)
|
||||
@click.argument("file", required=False) # Optional file argument
|
||||
@click.option(
|
||||
"--path", default=".", help="Path to the directory containing plano_config.yaml"
|
||||
)
|
||||
@click.option(
|
||||
"--settings",
|
||||
default="{}",
|
||||
help="Additional settings as JSON string for the CLI agent.",
|
||||
)
|
||||
def cli_agent(type, file, path, settings):
|
||||
"""Start a CLI agent connected to Plano.
|
||||
|
||||
CLI_AGENT: The type of CLI agent to start ('claude' or 'codex')
|
||||
"""
|
||||
|
||||
native_running = _is_native_plano_running()
|
||||
docker_running = False
|
||||
if not native_running:
|
||||
docker_running = docker_container_status(PLANO_DOCKER_NAME) == "running"
|
||||
|
||||
if not (native_running or docker_running):
|
||||
log.error("Plano is not running.")
|
||||
log.error(
|
||||
"Start Plano first using 'planoai up <config.yaml>' (native or --docker mode)."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# Determine plano_config.yaml path
|
||||
plano_config_file = find_config_file(path, file)
|
||||
if not os.path.exists(plano_config_file):
|
||||
log.error(f"Config file not found: {plano_config_file}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
start_cli_agent(plano_config_file, type, settings)
|
||||
except SystemExit:
|
||||
# Re-raise SystemExit to preserve exit codes
|
||||
raise
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# add commands to the main group
|
||||
main.add_command(up)
|
||||
main.add_command(down)
|
||||
main.add_command(build)
|
||||
main.add_command(logs)
|
||||
main.add_command(cli_agent)
|
||||
main.add_command(launch_cmd, name="launch")
|
||||
main.add_command(generate_prompt_targets)
|
||||
main.add_command(init_cmd, name="init")
|
||||
main.add_command(trace_cmd, name="trace")
|
||||
|
|
|
|||
|
|
@ -46,6 +46,20 @@ def configure_rich_click(plano_color: str) -> None:
|
|||
"options": ["--debug", "--follow"],
|
||||
},
|
||||
],
|
||||
"planoai launch claude-desktop": [
|
||||
{
|
||||
"name": "Plano gateway",
|
||||
"options": ["--config", "--base-url"],
|
||||
},
|
||||
{
|
||||
"name": "Mode",
|
||||
"options": ["--no-launch", "--restore"],
|
||||
},
|
||||
{
|
||||
"name": "Confirmation",
|
||||
"options": ["--yes"],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
# Command groups for main help.
|
||||
|
|
@ -57,7 +71,7 @@ def configure_rich_click(plano_color: str) -> None:
|
|||
},
|
||||
{
|
||||
"name": "Agent Commands",
|
||||
"commands": ["cli-agent"],
|
||||
"commands": ["launch"],
|
||||
},
|
||||
{
|
||||
"name": "Observability",
|
||||
|
|
@ -68,4 +82,14 @@ def configure_rich_click(plano_color: str) -> None:
|
|||
"commands": ["generate-prompt-targets"],
|
||||
},
|
||||
],
|
||||
"planoai launch": [
|
||||
{
|
||||
"name": "CLI Agents",
|
||||
"commands": ["claude-cli", "codex"],
|
||||
},
|
||||
{
|
||||
"name": "Desktop Apps",
|
||||
"commands": ["claude-desktop"],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
|
|||
366
cli/test/test_claude_desktop.py
Normal file
366
cli/test/test_claude_desktop.py
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
"""Tests for `planoai launch claude-desktop` configuration logic."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from planoai import claude_desktop as cd
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_home(tmp_path, monkeypatch):
|
||||
"""Pretend we're on macOS with a fresh home directory.
|
||||
|
||||
Plano's local gateway has no API key concept, so by default we ensure
|
||||
``$PLANO_API_KEY`` is unset; tests that exercise the env-override path
|
||||
re-set it explicitly.
|
||||
"""
|
||||
monkeypatch.setattr(cd, "_GOOS", "darwin")
|
||||
monkeypatch.setattr(cd, "_user_home", lambda _: str(tmp_path))
|
||||
monkeypatch.delenv("PLANO_API_KEY", raising=False)
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _normal_config_path(home: Path) -> Path:
|
||||
return (
|
||||
home
|
||||
/ "Library"
|
||||
/ "Application Support"
|
||||
/ "Claude"
|
||||
/ "claude_desktop_config.json"
|
||||
)
|
||||
|
||||
|
||||
def _third_party_root(home: Path) -> Path:
|
||||
return home / "Library" / "Application Support" / "Claude-3p"
|
||||
|
||||
|
||||
def _third_party_config_path(home: Path) -> Path:
|
||||
return _third_party_root(home) / "claude_desktop_config.json"
|
||||
|
||||
|
||||
def _meta_path(home: Path) -> Path:
|
||||
return _third_party_root(home) / "configLibrary" / "_meta.json"
|
||||
|
||||
|
||||
def _profile_path(home: Path) -> Path:
|
||||
return _third_party_root(home) / "configLibrary" / f"{cd.PROFILE_ID}.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# configure() / restore()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_configure_writes_all_four_files_with_default_api_key(fake_home):
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
normal_cfg = json.loads(_normal_config_path(fake_home).read_text())
|
||||
assert normal_cfg["deploymentMode"] == "3p"
|
||||
|
||||
third_cfg = json.loads(_third_party_config_path(fake_home).read_text())
|
||||
assert third_cfg["deploymentMode"] == "3p"
|
||||
|
||||
meta = json.loads(_meta_path(fake_home).read_text())
|
||||
assert meta["appliedId"] == cd.PROFILE_ID
|
||||
assert any(
|
||||
isinstance(e, dict) and e.get("id") == cd.PROFILE_ID for e in meta["entries"]
|
||||
)
|
||||
|
||||
profile = json.loads(_profile_path(fake_home).read_text())
|
||||
assert profile["inferenceProvider"] == "gateway"
|
||||
assert profile["inferenceGatewayBaseUrl"] == "http://localhost:12000"
|
||||
# No env override and no pre-existing profile -> placeholder is written.
|
||||
assert profile["inferenceGatewayApiKey"] == cd.DEFAULT_API_KEY
|
||||
assert profile["inferenceGatewayAuthScheme"] == "bearer"
|
||||
assert profile["disableDeploymentModeChooser"] is True
|
||||
assert "inferenceModels" not in profile
|
||||
|
||||
|
||||
def test_configure_uses_env_override_when_set(fake_home, monkeypatch):
|
||||
monkeypatch.setenv("PLANO_API_KEY", "from-env")
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
profile = json.loads(_profile_path(fake_home).read_text())
|
||||
assert profile["inferenceGatewayApiKey"] == "from-env"
|
||||
|
||||
|
||||
def test_configure_preserves_existing_profile_api_key(fake_home):
|
||||
profile = _profile_path(fake_home)
|
||||
profile.parent.mkdir(parents=True, exist_ok=True)
|
||||
profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
|
||||
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
written = json.loads(profile.read_text())
|
||||
assert written["inferenceGatewayApiKey"] == "from-profile"
|
||||
|
||||
|
||||
def test_configure_does_not_call_network(fake_home, monkeypatch):
|
||||
"""Plano's local gateway is not validated at configure time. We must not
|
||||
attempt any HTTP request — a 503 from the gateway must not block setup.
|
||||
"""
|
||||
|
||||
def boom(*_args, **_kwargs):
|
||||
raise AssertionError("configure() must not perform network calls")
|
||||
|
||||
monkeypatch.setattr("urllib.request.urlopen", boom)
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
profile = json.loads(_profile_path(fake_home).read_text())
|
||||
assert profile["inferenceProvider"] == "gateway"
|
||||
|
||||
|
||||
def test_configure_preserves_existing_unrelated_keys(fake_home):
|
||||
normal_path = _normal_config_path(fake_home)
|
||||
normal_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
normal_path.write_text(
|
||||
json.dumps({"someOtherSetting": 123, "deploymentMode": "1p"})
|
||||
)
|
||||
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
cfg = json.loads(normal_path.read_text())
|
||||
assert cfg["someOtherSetting"] == 123
|
||||
assert cfg["deploymentMode"] == "3p"
|
||||
|
||||
|
||||
def test_configure_writes_backup_of_existing_files(fake_home):
|
||||
normal_path = _normal_config_path(fake_home)
|
||||
normal_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
normal_path.write_text('{"deploymentMode":"1p"}')
|
||||
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
backup = normal_path.with_suffix(normal_path.suffix + ".bak")
|
||||
assert backup.exists()
|
||||
assert json.loads(backup.read_text())["deploymentMode"] == "1p"
|
||||
|
||||
|
||||
def test_restore_reverts_deployment_mode_and_strips_gateway_keys(fake_home):
|
||||
cd.configure("http://localhost:12000")
|
||||
cd.restore()
|
||||
|
||||
assert (
|
||||
json.loads(_normal_config_path(fake_home).read_text())["deploymentMode"] == "1p"
|
||||
)
|
||||
third_cfg = json.loads(_third_party_config_path(fake_home).read_text())
|
||||
assert third_cfg["deploymentMode"] == "1p"
|
||||
|
||||
meta = json.loads(_meta_path(fake_home).read_text())
|
||||
assert meta.get("appliedId") != cd.PROFILE_ID
|
||||
assert all(
|
||||
not (isinstance(e, dict) and e.get("id") == cd.PROFILE_ID)
|
||||
for e in meta.get("entries", [])
|
||||
)
|
||||
|
||||
profile = json.loads(_profile_path(fake_home).read_text())
|
||||
assert profile["disableDeploymentModeChooser"] is False
|
||||
for stripped in (
|
||||
"inferenceProvider",
|
||||
"inferenceGatewayBaseUrl",
|
||||
"inferenceGatewayAuthScheme",
|
||||
"inferenceModels",
|
||||
):
|
||||
assert stripped not in profile
|
||||
|
||||
|
||||
def test_restore_meta_keeps_unrelated_entries(fake_home):
|
||||
meta_path = _meta_path(fake_home)
|
||||
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
meta_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"appliedId": cd.PROFILE_ID,
|
||||
"entries": [
|
||||
{"id": cd.PROFILE_ID, "name": "Plano"},
|
||||
{"id": "00000000-0000-0000-0000-000000000001", "name": "Other"},
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
cd._restore_meta(str(meta_path))
|
||||
|
||||
meta = json.loads(meta_path.read_text())
|
||||
assert meta.get("appliedId") in (None, "")
|
||||
ids = [e["id"] for e in meta["entries"] if isinstance(e, dict)]
|
||||
assert ids == ["00000000-0000-0000-0000-000000000001"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_configured()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_is_configured_false_on_fresh_home(fake_home):
|
||||
assert cd.is_configured() is False
|
||||
|
||||
|
||||
def test_is_configured_true_after_configure(fake_home):
|
||||
cd.configure("http://localhost:12000")
|
||||
assert cd.is_configured() is True
|
||||
|
||||
|
||||
def test_is_configured_false_when_only_normal_config_set(fake_home):
|
||||
cd.configure("http://localhost:12000")
|
||||
|
||||
third_cfg = _third_party_config_path(fake_home)
|
||||
data = json.loads(third_cfg.read_text())
|
||||
data["deploymentMode"] = "1p"
|
||||
third_cfg.write_text(json.dumps(data))
|
||||
|
||||
assert cd.is_configured() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API key resolution (placeholder by default; env override; profile preserve)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_resolve_api_key_returns_placeholder_when_no_inputs(fake_home):
|
||||
assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY
|
||||
|
||||
|
||||
def test_resolve_api_key_uses_env_when_set(fake_home, monkeypatch):
|
||||
monkeypatch.setenv("PLANO_API_KEY", "from-env")
|
||||
profile = _profile_path(fake_home)
|
||||
profile.parent.mkdir(parents=True, exist_ok=True)
|
||||
profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
|
||||
|
||||
# Env wins over profile.
|
||||
assert cd._resolve_api_key([str(profile)]) == "from-env"
|
||||
|
||||
|
||||
def test_resolve_api_key_falls_back_to_existing_profile(fake_home):
|
||||
profile = _profile_path(fake_home)
|
||||
profile.parent.mkdir(parents=True, exist_ok=True)
|
||||
profile.write_text(json.dumps({"inferenceGatewayApiKey": "from-profile"}))
|
||||
|
||||
assert cd._resolve_api_key([str(profile)]) == "from-profile"
|
||||
|
||||
|
||||
def test_resolve_api_key_skips_blank_env(fake_home, monkeypatch):
|
||||
monkeypatch.setenv("PLANO_API_KEY", " ")
|
||||
assert cd._resolve_api_key([]) == cd.DEFAULT_API_KEY
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Atomic write
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_atomic_write_creates_backup_of_existing_file(tmp_path):
|
||||
target = tmp_path / "deep" / "nested" / "file.json"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text("ORIGINAL")
|
||||
|
||||
cd._atomic_write_with_backup(str(target), b"NEW")
|
||||
|
||||
assert target.read_text() == "NEW"
|
||||
assert (tmp_path / "deep" / "nested" / "file.json.bak").read_text() == "ORIGINAL"
|
||||
|
||||
|
||||
def test_atomic_write_skips_backup_when_no_existing_file(tmp_path):
|
||||
target = tmp_path / "fresh.json"
|
||||
cd._atomic_write_with_backup(str(target), b"DATA")
|
||||
|
||||
assert target.read_text() == "DATA"
|
||||
assert not (tmp_path / "fresh.json.bak").exists()
|
||||
|
||||
|
||||
def test_atomic_write_does_not_truncate_on_failure(tmp_path, monkeypatch):
|
||||
target = tmp_path / "file.json"
|
||||
target.write_text("ORIGINAL")
|
||||
|
||||
real_replace = os.replace
|
||||
|
||||
def boom(_src, _dst):
|
||||
raise OSError("disk full")
|
||||
|
||||
monkeypatch.setattr(os, "replace", boom)
|
||||
with pytest.raises(OSError):
|
||||
cd._atomic_write_with_backup(str(target), b"NEW")
|
||||
monkeypatch.setattr(os, "replace", real_replace)
|
||||
|
||||
assert target.read_text() == "ORIGINAL"
|
||||
leftover = list(tmp_path.glob(".plano_*.tmp"))
|
||||
assert leftover == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Platform support
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_supported_returns_error_on_linux(monkeypatch):
|
||||
monkeypatch.setattr(cd, "_GOOS", "linux")
|
||||
msg = cd.supported()
|
||||
assert msg is not None
|
||||
assert "macOS" in msg and "Windows" in msg
|
||||
|
||||
|
||||
def test_supported_returns_none_on_darwin(monkeypatch):
|
||||
monkeypatch.setattr(cd, "_GOOS", "darwin")
|
||||
assert cd.supported() is None
|
||||
|
||||
|
||||
def test_configure_raises_on_unsupported_platform(monkeypatch):
|
||||
monkeypatch.setattr(cd, "_GOOS", "linux")
|
||||
with pytest.raises(RuntimeError, match="macOS"):
|
||||
cd.configure()
|
||||
|
||||
|
||||
def test_restore_raises_on_unsupported_platform(monkeypatch):
|
||||
monkeypatch.setattr(cd, "_GOOS", "linux")
|
||||
with pytest.raises(RuntimeError, match="macOS"):
|
||||
cd.restore()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# launch_or_restart()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_launch_or_restart_opens_when_not_running(monkeypatch):
|
||||
monkeypatch.setattr(cd, "_GOOS", "darwin")
|
||||
monkeypatch.setattr(cd, "_is_running", lambda: False)
|
||||
opened = []
|
||||
monkeypatch.setattr(cd, "_open", lambda: opened.append(True))
|
||||
monkeypatch.setattr(
|
||||
cd, "_quit", lambda: pytest.fail("should not quit when not running")
|
||||
)
|
||||
|
||||
cd.launch_or_restart("prompt", yes=True)
|
||||
assert opened == [True]
|
||||
|
||||
|
||||
def test_launch_or_restart_with_yes_quits_then_opens(monkeypatch):
|
||||
monkeypatch.setattr(cd, "_GOOS", "darwin")
|
||||
running = [True]
|
||||
monkeypatch.setattr(cd, "_is_running", lambda: running[0])
|
||||
|
||||
def quit_app():
|
||||
running[0] = False
|
||||
|
||||
quit_calls = []
|
||||
open_calls = []
|
||||
monkeypatch.setattr(
|
||||
cd,
|
||||
"_quit",
|
||||
lambda: (quit_calls.append(True), quit_app()),
|
||||
)
|
||||
monkeypatch.setattr(cd, "_open", lambda: open_calls.append(True))
|
||||
monkeypatch.setattr(cd, "_sleep", lambda _: None)
|
||||
|
||||
cd.launch_or_restart("Restart?", yes=True)
|
||||
assert quit_calls == [True]
|
||||
assert open_calls == [True]
|
||||
231
cli/test/test_launch_cmd.py
Normal file
231
cli/test/test_launch_cmd.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
"""Tests for the `planoai launch claude-desktop` click command.
|
||||
|
||||
Focused on the wiring between the CLI flags and the underlying
|
||||
`claude_desktop` module / `up` invocation. The actual JSON-rewriting and key
|
||||
validation are covered in `test_claude_desktop.py`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from click.testing import CliRunner
|
||||
|
||||
from planoai import claude_desktop as cd
|
||||
from planoai import launch_cmd as lc
|
||||
|
||||
|
||||
def _stub_cd(monkeypatch):
|
||||
"""Replace ``claude_desktop`` side-effects with no-ops + call recorders."""
|
||||
calls: dict[str, list] = {
|
||||
"configure": [],
|
||||
"restore": [],
|
||||
"launch_or_restart": [],
|
||||
}
|
||||
monkeypatch.setattr(cd, "supported", lambda: None)
|
||||
monkeypatch.setattr(
|
||||
cd,
|
||||
"configure",
|
||||
lambda base_url, **_kw: calls["configure"].append(base_url),
|
||||
)
|
||||
monkeypatch.setattr(cd, "restore", lambda: calls["restore"].append(True))
|
||||
monkeypatch.setattr(
|
||||
cd,
|
||||
"launch_or_restart",
|
||||
lambda prompt, yes: calls["launch_or_restart"].append((prompt, yes)),
|
||||
)
|
||||
return calls
|
||||
|
||||
|
||||
def test_config_path_starts_plano_when_not_running(tmp_path, monkeypatch):
|
||||
config = tmp_path / "plano_config.yaml"
|
||||
config.write_text(
|
||||
"version: v0.4.0\n"
|
||||
"listeners:\n"
|
||||
" - name: llm\n"
|
||||
" type: model\n"
|
||||
" port: 12345\n"
|
||||
" address: 0.0.0.0\n"
|
||||
"model_providers: []\n"
|
||||
)
|
||||
|
||||
cd_calls = _stub_cd(monkeypatch)
|
||||
monkeypatch.setattr(lc, "_is_plano_running", lambda: False)
|
||||
|
||||
up_calls = []
|
||||
|
||||
def fake_up(
|
||||
file,
|
||||
path,
|
||||
foreground,
|
||||
with_tracing,
|
||||
tracing_port,
|
||||
docker,
|
||||
verbose,
|
||||
listener_port,
|
||||
):
|
||||
up_calls.append(
|
||||
{
|
||||
"file": file,
|
||||
"foreground": foreground,
|
||||
"docker": docker,
|
||||
"listener_port": listener_port,
|
||||
}
|
||||
)
|
||||
|
||||
from planoai.main import up as up_cmd
|
||||
|
||||
monkeypatch.setattr(up_cmd, "callback", fake_up)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
lc.launch,
|
||||
["claude-desktop", "--config", str(config), "--yes"],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert len(up_calls) == 1
|
||||
assert up_calls[0]["file"] == str(config)
|
||||
assert up_calls[0]["foreground"] is False
|
||||
assert cd_calls["configure"] == ["http://localhost:12345"]
|
||||
# --yes implies we restart Claude Desktop after configuring.
|
||||
assert cd_calls["launch_or_restart"]
|
||||
assert cd_calls["launch_or_restart"][0][1] is True
|
||||
|
||||
|
||||
def test_config_path_skips_up_when_plano_already_running(tmp_path, monkeypatch):
|
||||
config = tmp_path / "plano_config.yaml"
|
||||
config.write_text(
|
||||
"version: v0.4.0\n"
|
||||
"listeners:\n"
|
||||
" - name: llm\n"
|
||||
" type: model\n"
|
||||
" port: 12500\n"
|
||||
"model_providers: []\n"
|
||||
)
|
||||
|
||||
cd_calls = _stub_cd(monkeypatch)
|
||||
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
|
||||
|
||||
sentinel = []
|
||||
|
||||
def boom(*args, **kwargs):
|
||||
sentinel.append("called")
|
||||
|
||||
from planoai.main import up as up_cmd
|
||||
|
||||
monkeypatch.setattr(up_cmd, "callback", boom)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
lc.launch,
|
||||
["claude-desktop", "--config", str(config), "--no-launch"],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert sentinel == [], "should not invoke up.callback when Plano is already running"
|
||||
assert cd_calls["configure"] == ["http://localhost:12500"]
|
||||
# --no-launch skips the restart step.
|
||||
assert cd_calls["launch_or_restart"] == []
|
||||
|
||||
|
||||
def test_config_path_must_exist(tmp_path, monkeypatch):
|
||||
cd_calls = _stub_cd(monkeypatch)
|
||||
monkeypatch.setattr(lc, "_is_plano_running", lambda: False)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
lc.launch,
|
||||
["claude-desktop", "--config", str(tmp_path / "nope.yaml")],
|
||||
)
|
||||
|
||||
assert result.exit_code != 0
|
||||
assert "not found" in result.output.lower()
|
||||
assert cd_calls["configure"] == []
|
||||
|
||||
|
||||
def test_no_launch_skips_open(monkeypatch):
|
||||
cd_calls = _stub_cd(monkeypatch)
|
||||
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
lc.launch,
|
||||
["claude-desktop", "--no-launch", "--base-url", "http://localhost:9999"],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert cd_calls["configure"] == ["http://localhost:9999"]
|
||||
assert cd_calls["launch_or_restart"] == []
|
||||
|
||||
|
||||
def test_restore_ignores_config_path(tmp_path, monkeypatch):
|
||||
config = tmp_path / "plano_config.yaml"
|
||||
config.write_text("version: v0.4.0\nmodel_providers: []\n")
|
||||
|
||||
cd_calls = _stub_cd(monkeypatch)
|
||||
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
lc.launch,
|
||||
["claude-desktop", "--restore", "--config", str(config), "--yes"],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert cd_calls["restore"] == [True]
|
||||
assert cd_calls["configure"] == []
|
||||
assert "ignored" in result.output.lower()
|
||||
|
||||
|
||||
def test_base_url_overrides_config_file(tmp_path, monkeypatch):
|
||||
config = tmp_path / "plano_config.yaml"
|
||||
config.write_text(
|
||||
"version: v0.4.0\n"
|
||||
"listeners:\n"
|
||||
" - name: llm\n"
|
||||
" type: model\n"
|
||||
" port: 12345\n"
|
||||
"model_providers: []\n"
|
||||
)
|
||||
|
||||
cd_calls = _stub_cd(monkeypatch)
|
||||
monkeypatch.setattr(lc, "_is_plano_running", lambda: True)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
lc.launch,
|
||||
[
|
||||
"claude-desktop",
|
||||
"--config",
|
||||
str(config),
|
||||
"--base-url",
|
||||
"http://10.0.0.5:8080",
|
||||
"--no-launch",
|
||||
],
|
||||
)
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert cd_calls["configure"] == ["http://10.0.0.5:8080"]
|
||||
|
||||
|
||||
def test_unsupported_platform_errors(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
cd,
|
||||
"supported",
|
||||
lambda: "Claude Desktop launch is only supported on macOS and Windows",
|
||||
)
|
||||
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(lc.launch, ["claude-desktop"])
|
||||
|
||||
assert result.exit_code != 0
|
||||
assert "macOS" in result.output
|
||||
|
||||
|
||||
def test_help_lists_new_flags(monkeypatch):
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(lc.launch, ["claude-desktop", "--help"])
|
||||
|
||||
assert result.exit_code == 0, result.output
|
||||
assert "--config" in result.output
|
||||
assert "--no-launch" in result.output
|
||||
assert "--restore" in result.output
|
||||
423
demos/llm_routing/frontier_model_routing/README.md
Normal file
423
demos/llm_routing/frontier_model_routing/README.md
Normal file
|
|
@ -0,0 +1,423 @@
|
|||
# Frontier Model Routing: Sonnet 4.6 + GPT 5.5 + Opus 4.7
|
||||
|
||||
A worked example of using Plano to route across the three current frontier
|
||||
LLMs from three different providers — without your application caring which
|
||||
model handled any given request, and with **per-route fallbacks** so a
|
||||
provider outage never takes the demo down.
|
||||
|
||||
| Tier | Primary model | Provider | What it's great at |
|
||||
| ---------------- | -------------------------------------- | ------------------ | -------------------------------------------------------- |
|
||||
| `frontier.fast` | `anthropic-claude-sonnet-4-6` | DigitalOcean | Daily driver — chat, summaries, drafts, light reasoning |
|
||||
| `frontier.smart` | `gpt-5.5` | OpenAI | Multi-step reasoning, math, tool/function calling |
|
||||
| `frontier.max` | `claude-opus-4-7` | Anthropic | Code, deep analysis, long-context evaluation, refactors |
|
||||
|
||||
The same prompt picks the right model automatically — Plano's preference
|
||||
aligned router (Plano-Orchestrator) reads the user's intent and dispatches to
|
||||
the route whose `routing_preferences` description best matches. Each route
|
||||
is backed by an **ordered candidate pool**, so when the primary provider
|
||||
returns a `429`/`5xx` the next entry in the pool serves the request.
|
||||
|
||||
```
|
||||
┌────────────────────────────────────┐
|
||||
client ──── /v1 ───▶ │ Plano gateway (port 12000) │
|
||||
(OpenAI / Anthropic / │ ├── Plano-Orchestrator (router) │
|
||||
Claude Desktop / SDK) │ └── Envoy + brightstaff │
|
||||
└────────────────────────────────────┘
|
||||
│ │ │
|
||||
┌───────────┘ │ └────────────┐
|
||||
▼ ▼ ▼
|
||||
DigitalOcean Gradient AI OpenAI Anthropic
|
||||
anthropic-claude-sonnet-4-6 gpt-5.5 claude-opus-4-7
|
||||
(daily conversation route) (complex reasoning) (code + deep analysis)
|
||||
```
|
||||
|
||||
## Why this layout
|
||||
|
||||
- **Cost-quality fit per request.** Casual prompts go to Sonnet 4.6 on
|
||||
DigitalOcean (cheaper inference, still excellent quality); complex
|
||||
reasoning goes to GPT 5.5; code and deep analysis go to Opus 4.7.
|
||||
- **Provider diversity = resilience.** Every route lists a fallback model
|
||||
from a different provider — if Anthropic rate-limits Opus, Plano hands
|
||||
the next request in that route to GPT 5.5 with no client changes.
|
||||
- **Zero client changes.** The OpenAI SDK, Anthropic SDK, Claude Desktop,
|
||||
Codex CLI, and curl all hit the same `:12000` endpoint and use the same
|
||||
alias names. Switching `frontier.max` from Opus to whatever ships next
|
||||
is a one-line config change.
|
||||
|
||||
## The new routing-preferences architecture (v0.4.0)
|
||||
|
||||
This demo uses Plano's **top-level `routing_preferences`** block — the
|
||||
canonical shape since `v0.4.0`. The older inline form (preferences nested
|
||||
under each `model_provider`) is auto-migrated by the Plano CLI but emits a
|
||||
deprecation warning. The top-level shape gives each route an ordered
|
||||
candidate pool, which is what makes per-route fallbacks possible.
|
||||
|
||||
```yaml
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests
|
||||
models:
|
||||
- anthropic/claude-opus-4-7 # primary
|
||||
- openai/gpt-5.5 # fallback on 429/5xx
|
||||
```
|
||||
|
||||
What changes vs. the v0.3.0 inline style:
|
||||
|
||||
| Capability | v0.3.0 inline | v0.4.0 top-level |
|
||||
| ----------------------------------------- | :-----------: | :--------------: |
|
||||
| Multiple models can serve the same route | no | yes |
|
||||
| Explicit primary + ranked fallback chain | no | yes |
|
||||
| Per-request override via request body | no | yes |
|
||||
| Decision-only endpoint (`/routing/v1/...`)| no | yes |
|
||||
| `X-Model-Affinity` header for agent loops | no | yes |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **Plano CLI** — `uv tool install planoai` or `pip install planoai`
|
||||
- API keys for all three providers:
|
||||
|
||||
| Env var | Where to get it |
|
||||
| ------------------- | ------------------------------------------------------------------------ |
|
||||
| `DO_API_KEY` | <https://cloud.digitalocean.com/account/api/tokens> (Gradient AI access) |
|
||||
| `OPENAI_API_KEY` | <https://platform.openai.com/api-keys> |
|
||||
| `ANTHROPIC_API_KEY` | <https://console.anthropic.com/> |
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
export DO_API_KEY=...
|
||||
export OPENAI_API_KEY=...
|
||||
export ANTHROPIC_API_KEY=...
|
||||
|
||||
cd demos/llm_routing/frontier_model_routing
|
||||
./run_demo.sh
|
||||
```
|
||||
|
||||
`run_demo.sh` writes a local `.env`, then runs `planoai up config.yaml`.
|
||||
Plano daemonizes and is ready when the script returns.
|
||||
|
||||
To shut down:
|
||||
|
||||
```bash
|
||||
./run_demo.sh down
|
||||
```
|
||||
|
||||
## Try it
|
||||
|
||||
### Let Plano pick the right tier
|
||||
|
||||
```bash
|
||||
./test.sh
|
||||
```
|
||||
|
||||
The script does two things for each prompt:
|
||||
|
||||
1. Calls `POST /routing/v1/chat/completions` — Plano's **decision-only**
|
||||
endpoint — to print the matched route name and the ranked candidate
|
||||
pool for that prompt.
|
||||
2. Calls `POST /v1/chat/completions` to actually run the request and
|
||||
prints the model that handled it.
|
||||
|
||||
A healthy run resolves like this:
|
||||
|
||||
```
|
||||
[daily conversation -> expects DigitalOcean Sonnet 4.6]
|
||||
matched route: daily conversation
|
||||
ranked models: ["digitalocean/anthropic-claude-sonnet-4-6","openai/gpt-5.5"]
|
||||
routed_to: digitalocean/anthropic-claude-sonnet-4-6
|
||||
|
||||
[complex reasoning -> expects OpenAI GPT 5.5]
|
||||
matched route: complex reasoning
|
||||
ranked models: ["openai/gpt-5.5","anthropic/claude-opus-4-7"]
|
||||
routed_to: openai/gpt-5.5
|
||||
|
||||
[code generation -> expects Anthropic Opus 4.7]
|
||||
matched route: code generation
|
||||
ranked models: ["anthropic/claude-opus-4-7","openai/gpt-5.5"]
|
||||
routed_to: anthropic/claude-opus-4-7
|
||||
```
|
||||
|
||||
The trick: every request is sent with `model: frontier.fast`, but Plano runs
|
||||
the orchestrator on every chat completion when `routing_preferences` are
|
||||
configured and overrides the `model` when a preference matches. The
|
||||
`frontier.fast` value is the explicit fallback used when no preference
|
||||
matches — so casual prompts stay on the cheap tier and only "real" reasoning
|
||||
or code work escalates to GPT 5.5 or Opus 4.7.
|
||||
|
||||
Want to watch the router decide live? In a second terminal:
|
||||
|
||||
```bash
|
||||
planoai trace
|
||||
```
|
||||
|
||||
You'll see the orchestrator's route selection for each request, including
|
||||
the matched preference, ranked models, and response time.
|
||||
|
||||
### Inspect the routing decision without burning a token
|
||||
|
||||
The `/routing/v1/...` endpoint returns the routing decision **without
|
||||
calling the upstream model**. Useful for previewing classification, building
|
||||
a UI, or wiring fallback logic into a custom client.
|
||||
|
||||
```bash
|
||||
curl -sS -X POST http://localhost:12000/routing/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "frontier.fast",
|
||||
"messages": [{"role":"user","content":"refactor this function to remove the global"}]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"],
|
||||
"route": "code generation",
|
||||
"trace_id": "4bf92f3577b34da6a3ce929d0e0e4736",
|
||||
"pinned": false
|
||||
}
|
||||
```
|
||||
|
||||
Use `models[0]` as the primary; retry with `models[1]` on `429` / `5xx`.
|
||||
|
||||
### Pin a route across an agent loop with `X-Model-Affinity`
|
||||
|
||||
In a tool-using agent loop a single user task may produce a dozen LLM
|
||||
calls. Their topics drift (tool selection looks like code, summarising
|
||||
results looks like analysis), and the router would otherwise route each
|
||||
turn independently — bouncing between providers and invalidating their
|
||||
KV caches. Pin the decision once with an arbitrary session id:
|
||||
|
||||
```bash
|
||||
SID=$(uuidgen)
|
||||
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Model-Affinity: $SID" \
|
||||
-d '{"model":"frontier.fast","messages":[{"role":"user","content":"start a refactor of the auth module"}]}'
|
||||
|
||||
# every subsequent call with the same SID skips routing and reuses the
|
||||
# cached model decision until the session TTL (10 min by default) expires.
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Model-Affinity: $SID" \
|
||||
-d '{"model":"frontier.fast","messages":[{"role":"user","content":"now write the unit tests"}]}'
|
||||
```
|
||||
|
||||
TTL and cache size are configurable under `routing:` in `config.yaml`.
|
||||
|
||||
### Override the routing policy per-request
|
||||
|
||||
Sometimes one caller needs a different policy without redeploying the
|
||||
gateway. Send `routing_preferences` inline in the request body — it is
|
||||
stripped before forwarding upstream:
|
||||
|
||||
```bash
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "frontier.fast",
|
||||
"messages": [{"role":"user","content":"draft me a haiku about Postgres"}],
|
||||
"routing_preferences": [
|
||||
{
|
||||
"name": "creative writing",
|
||||
"description": "poetry, fiction, lyrical or playful prose",
|
||||
"models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"]
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### Pin a request to a specific tier (skip routing)
|
||||
|
||||
For prompts that don't match any preference description, the requested
|
||||
model is what serves the request. Pin to a tier by sending its alias
|
||||
directly:
|
||||
|
||||
```bash
|
||||
# DigitalOcean Sonnet 4.6 — fast and cheap
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"frontier.fast","messages":[{"role":"user","content":"hello"}]}' | jq .
|
||||
|
||||
# OpenAI GPT 5.5
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"frontier.smart","messages":[{"role":"user","content":"hello"}]}' | jq .
|
||||
|
||||
# Anthropic Opus 4.7
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq .
|
||||
```
|
||||
|
||||
### From a Claude-native client (Anthropic Messages API)
|
||||
|
||||
Plano translates between OpenAI and Anthropic shapes, so the same gateway
|
||||
serves both client SDKs:
|
||||
|
||||
```bash
|
||||
curl -sS -X POST http://localhost:12000/v1/messages \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-H "x-api-key: test-key" \
|
||||
-d '{
|
||||
"model": "frontier.max",
|
||||
"max_tokens": 512,
|
||||
"messages": [{"role":"user","content":"explain CAP theorem like I have a CS undergrad background"}]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### From Claude Desktop
|
||||
|
||||
Once Plano is up, point Claude Desktop at it with one command:
|
||||
|
||||
```bash
|
||||
planoai launch claude-desktop --config config.yaml
|
||||
```
|
||||
|
||||
Claude Desktop will switch into third-party gateway mode pointed at
|
||||
`http://localhost:12000`, auto-discover the three model aliases via
|
||||
`/v1/models`, and let you pick `frontier.fast` / `.smart` / `.max` from the
|
||||
in-app model selector. To revert: `planoai launch claude-desktop --restore`.
|
||||
|
||||
### From Codex CLI
|
||||
|
||||
```bash
|
||||
planoai launch codex
|
||||
codex --model frontier.smart # or frontier.fast / frontier.max
|
||||
```
|
||||
|
||||
### From the Claude Code CLI
|
||||
|
||||
```bash
|
||||
planoai launch claude-cli
|
||||
```
|
||||
|
||||
The CLI will use Plano as its Anthropic endpoint; ask it for code-heavy work
|
||||
and it'll resolve to Opus 4.7 automatically.
|
||||
|
||||
## Config walkthrough
|
||||
|
||||
[`config.yaml`](config.yaml) declares each provider once, then declares
|
||||
**top-level routing preferences** that reference those providers by their
|
||||
full `<provider>/<model>` name. Each route owns an ordered `models` pool —
|
||||
primary first, fallbacks next.
|
||||
|
||||
```yaml
|
||||
model_providers:
|
||||
- model: digitalocean/anthropic-claude-sonnet-4-6
|
||||
access_key: $DO_API_KEY
|
||||
default: true # used when no preference matches
|
||||
- model: openai/gpt-5.5
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: anthropic/claude-opus-4-7
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code
|
||||
models:
|
||||
- anthropic/claude-opus-4-7 # primary
|
||||
- openai/gpt-5.5 # fallback on 429 / 5xx
|
||||
|
||||
- name: deep analysis
|
||||
description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique
|
||||
models:
|
||||
- anthropic/claude-opus-4-7
|
||||
- openai/gpt-5.5
|
||||
|
||||
- name: complex reasoning
|
||||
description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction
|
||||
models:
|
||||
- openai/gpt-5.5
|
||||
- anthropic/claude-opus-4-7
|
||||
|
||||
- name: daily conversation
|
||||
description: general chat, casual Q&A, summaries, drafting messages, quick rewrites
|
||||
models:
|
||||
- digitalocean/anthropic-claude-sonnet-4-6
|
||||
- openai/gpt-5.5
|
||||
|
||||
model_aliases:
|
||||
frontier.fast: { target: anthropic-claude-sonnet-4-6 }
|
||||
frontier.smart: { target: gpt-5.5 }
|
||||
frontier.max: { target: claude-opus-4-7 }
|
||||
```
|
||||
|
||||
A few things to call out:
|
||||
|
||||
1. **Preference *descriptions* drive routing accuracy.** They're embedded
|
||||
into the orchestrator's prompt; vague descriptions = vague routing.
|
||||
Following the [LLM Routing best practices](../../../docs/source/guides/llm_router.rst):
|
||||
- keep names specific and non-overlapping,
|
||||
- prefer noun-centric descriptors over imperative phrasing,
|
||||
- always include a generic "domain"-style route — here that's
|
||||
`daily conversation` pinned to the cheapest tier — so unmatched
|
||||
prompts still land somewhere deliberate.
|
||||
2. **Ordered `models`** is a candidate pool. `models[0]` is the primary;
|
||||
anything after it is a fallback that the client (or Plano's retry
|
||||
logic) tries on `429`/`5xx`. Mix providers across the pool so a single
|
||||
provider outage doesn't break the route.
|
||||
3. **The `default: true` provider** is the safety net for prompts the
|
||||
orchestrator can't confidently classify (e.g. one-word "thanks!").
|
||||
4. **Aliases** decouple your callers from provider/model strings. When the
|
||||
next Sonnet ships, change the alias target — every caller picks it up
|
||||
instantly.
|
||||
|
||||
## Tracing
|
||||
|
||||
`tracing.random_sampling: 100` in the config enables full OTLP tracing. Open
|
||||
a second terminal and run:
|
||||
|
||||
```bash
|
||||
planoai trace
|
||||
```
|
||||
|
||||
Each routed call shows up with the matched preference, ranked candidate
|
||||
pool, selected model, end-to-end latency, and per-stage spans (router
|
||||
decision, provider call, streaming chunks).
|
||||
|
||||
## Cost framing
|
||||
|
||||
A rough mix of 60% conversation, 30% reasoning, 10% deep code work — say
|
||||
1,000 prompts/day at 1k input + 500 output tokens each — illustrates why
|
||||
this layout pays off. Exact numbers depend on per-provider pricing the day
|
||||
you read this; the point is that calling Opus 4.7 for casual chat is wasted
|
||||
spend, and falling back to a small model on complex code is wasted output.
|
||||
Plano's job is to let each provider do what it's best at, and to fail over
|
||||
to the next entry in `models` when the primary throttles.
|
||||
|
||||
## Customizing
|
||||
|
||||
- **Swap a provider:** change the model string and `access_key`. e.g.
|
||||
point `frontier.smart` at `azure_openai/gpt-5.5` by replacing the OpenAI
|
||||
block with an Azure block, then update the matching entries inside
|
||||
`routing_preferences[].models`.
|
||||
- **Add fallbacks:** append more entries to any route's `models` list.
|
||||
The orchestrator returns the full ranked pool, and Plano (or your
|
||||
client) walks it on `429`/`5xx`.
|
||||
- **Add a new route:** add another entry under `routing_preferences` with
|
||||
a noun-centric description and its own `models` pool. No code change,
|
||||
no client change — every existing caller benefits immediately.
|
||||
- **Per-call policy override:** ship a `routing_preferences` field in the
|
||||
request body to override the config for that one call (see the curl
|
||||
example above).
|
||||
- **Self-host the orchestrator:** see
|
||||
[`../preference_based_routing/plano_config_local.yaml`](../preference_based_routing/plano_config_local.yaml)
|
||||
for an Ollama-backed orchestrator. Drop the `overrides.llm_routing_model`
|
||||
block into this config and you're off the hosted Plano-Orchestrator.
|
||||
|
||||
## Files
|
||||
|
||||
| File | Purpose |
|
||||
| --------------------------------------------- | ---------------------------------------------------------------------- |
|
||||
| [`config.yaml`](config.yaml) | Plano configuration (top-level routing_preferences + aliases) |
|
||||
| [`run_demo.sh`](run_demo.sh) | Bring the demo up/down (`./run_demo.sh [down]`) |
|
||||
| [`test.sh`](test.sh) | Per-prompt routing decision + chat completion across all three routes |
|
||||
| [`test.rest`](test.rest) | REST Client snippets for VS Code / IntelliJ |
|
||||
|
||||
## Stopping
|
||||
|
||||
```bash
|
||||
./run_demo.sh down # or: planoai down
|
||||
```
|
||||
103
demos/llm_routing/frontier_model_routing/config.yaml
Normal file
103
demos/llm_routing/frontier_model_routing/config.yaml
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
version: v0.4.0
|
||||
|
||||
# Frontier-tier model routing across three providers.
|
||||
#
|
||||
# - DigitalOcean Sonnet 4.6 -> daily driver: balanced quality + cost
|
||||
# - OpenAI GPT 5.5 -> multimodal reasoning, tool use, math
|
||||
# - Anthropic Opus 4.7 -> top-tier reasoning, long-form analysis, code
|
||||
#
|
||||
# Plano's preference-aligned router (Plano-Orchestrator) inspects each prompt
|
||||
# and dispatches to the model whose top-level `routing_preferences` entry best
|
||||
# matches the user's intent. Each route owns an ordered `models` list:
|
||||
# `models[0]` is the primary; subsequent entries are fallbacks the client
|
||||
# (or Plano's retry logic) can try on `429`/`5xx` errors.
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
name: model_listener
|
||||
port: 12000
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model providers (declared once, referenced by every route below)
|
||||
# ---------------------------------------------------------------------------
|
||||
# The `digitalocean/`, `openai/`, and `anthropic/` prefixes are recognized
|
||||
# natively by Plano — no `base_url` or provider interface override needed.
|
||||
model_providers:
|
||||
- model: digitalocean/anthropic-claude-4.6-sonnet
|
||||
access_key: $DO_API_KEY
|
||||
default: true # used when no routing preference matches
|
||||
|
||||
- model: digitalocean/openai-gpt-5.5
|
||||
access_key: $DO_API_KEY
|
||||
|
||||
- model: digitalocean/anthropic-claude-opus-4.7
|
||||
access_key: $DO_API_KEY
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Routing preferences (v0.4.0 top-level form)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Best-practice notes (per the LLM Routing guide):
|
||||
# - Names should be specific, non-overlapping, and aligned with the
|
||||
# description so the orchestrator can disambiguate cleanly.
|
||||
# - Descriptions are noun-centric phrases describing *the work*, not
|
||||
# conversational instructions.
|
||||
# - Always include a generic "domain" route so prompts that don't match a
|
||||
# specific action still land on a deliberate model — here that's
|
||||
# "daily conversation" pinned to the cheapest tier.
|
||||
# - `models` is an ordered candidate pool; entry 0 is primary and entries
|
||||
# 1..n are fallbacks (clients retry on 429/5xx).
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: writing new functions, classes, scripts, or boilerplate; implementing APIs; producing unit tests; refactoring code
|
||||
models:
|
||||
- digitalocean/anthropic-claude-opus-4.7 # primary: top-tier code quality
|
||||
- digitalocean/openai-gpt-5.5 # fallback if Opus is rate-limited / down
|
||||
|
||||
- name: deep analysis
|
||||
description: long-form analysis, architecture review, security review, evaluating tradeoffs, structured critique
|
||||
models:
|
||||
- digitalocean/anthropic-claude-opus-4.7
|
||||
- digitalocean/openai-gpt-5.5
|
||||
|
||||
- name: complex reasoning
|
||||
description: multi-step reasoning, mathematical problem solving, structured planning, tool and function calling, data extraction
|
||||
models:
|
||||
- digitalocean/openai-gpt-5.5 # primary: strong reasoning + tool use
|
||||
- digitalocean/anthropic-claude-opus-4.7
|
||||
|
||||
- name: daily conversation
|
||||
description: general chat, casual Q&A, summaries, drafting messages, quick rewrites, day-to-day requests where speed and cost matter
|
||||
models:
|
||||
- digitalocean/anthropic-claude-4.6-sonnet
|
||||
- digitalocean/openai-gpt-5.5
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Aliases — stable, human-friendly handles for clients
|
||||
# ---------------------------------------------------------------------------
|
||||
# Clients can pin to a tier without thinking about the underlying provider,
|
||||
# and the underlying model can change without breaking callers.
|
||||
model_aliases:
|
||||
# Daily driver -> Claude Sonnet 4.6
|
||||
frontier.fast:
|
||||
target: digitalocean/anthropic-claude-4.6-sonnet
|
||||
|
||||
# Reasoning + tool calling -> OpenAI GPT 5.5
|
||||
frontier.smart:
|
||||
target: digitalocean/openai-gpt-5.5
|
||||
|
||||
# Code + deep analysis -> Anthropic Opus 4.7
|
||||
frontier.max:
|
||||
target: digitalocean/anthropic-claude-opus-4.7
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model affinity for agentic loops
|
||||
# ---------------------------------------------------------------------------
|
||||
# In a tool-using agent loop, successive prompts can look like different
|
||||
# routes (tool selection ~ code, reasoning ~ analysis), causing the router
|
||||
# to flip between models mid-session. Clients send `X-Model-Affinity: <id>`
|
||||
# and Plano caches the routing decision for the session TTL below.
|
||||
routing:
|
||||
session_ttl_seconds: 600
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
63
demos/llm_routing/frontier_model_routing/run_demo.sh
Executable file
63
demos/llm_routing/frontier_model_routing/run_demo.sh
Executable file
|
|
@ -0,0 +1,63 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontier model routing demo: DigitalOcean Sonnet 4.6 + GPT 5.5 + Opus 4.7
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
start_demo() {
|
||||
if [ -f ".env" ]; then
|
||||
echo ".env file already exists. Skipping creation."
|
||||
else
|
||||
missing=()
|
||||
[ -z "$DO_API_KEY" ] && missing+=("DO_API_KEY")
|
||||
[ -z "$OPENAI_API_KEY" ] && missing+=("OPENAI_API_KEY")
|
||||
[ -z "$ANTHROPIC_API_KEY" ] && missing+=("ANTHROPIC_API_KEY")
|
||||
|
||||
if [ ${#missing[@]} -ne 0 ]; then
|
||||
echo "Error: the following environment variables are not set:"
|
||||
for key in "${missing[@]}"; do echo " - $key"; done
|
||||
echo
|
||||
echo "Set them in your shell, then re-run this script. Example:"
|
||||
echo " export DO_API_KEY=... # from https://cloud.digitalocean.com/account/api/tokens"
|
||||
echo " export OPENAI_API_KEY=... # from https://platform.openai.com/api-keys"
|
||||
echo " export ANTHROPIC_API_KEY=... # from https://console.anthropic.com/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Creating .env file..."
|
||||
{
|
||||
echo "DO_API_KEY=$DO_API_KEY"
|
||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||
echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||
} > .env
|
||||
echo ".env file created."
|
||||
fi
|
||||
|
||||
echo "Starting Plano with config.yaml..."
|
||||
planoai up config.yaml
|
||||
|
||||
cat <<'EOF'
|
||||
|
||||
Plano is up. Try the demo with:
|
||||
./test.sh # runs three sample prompts and shows which model handled each
|
||||
planoai trace # live router decisions in a separate terminal
|
||||
|
||||
Or call any model directly using its alias:
|
||||
curl -sS -X POST http://localhost:12000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"frontier.max","messages":[{"role":"user","content":"hello"}]}' | jq .
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
stop_demo() {
|
||||
echo "Stopping Plano..."
|
||||
planoai down
|
||||
}
|
||||
|
||||
if [ "$1" == "down" ]; then
|
||||
stop_demo
|
||||
else
|
||||
start_demo
|
||||
fi
|
||||
212
demos/llm_routing/frontier_model_routing/test.rest
Normal file
212
demos/llm_routing/frontier_model_routing/test.rest
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
### Frontier model routing — REST Client / VS Code REST snippets
|
||||
###
|
||||
### Plano runs the preference-aligned orchestrator on every chat request
|
||||
### when top-level `routing_preferences` are configured. The `model` field
|
||||
### in the body is the *fallback* if no preference matches; pinning it to
|
||||
### `frontier.fast` gives a cheap default. Each route owns an ordered
|
||||
### `models` pool — primary first, fallbacks next — that the client (or
|
||||
### Plano's retry logic) walks on 429/5xx.
|
||||
|
||||
@endpoint = http://localhost:12000
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 1. Decision-only endpoint: see what the router would pick (no upstream call)
|
||||
### Returns: { "models": [...ranked pool...], "route": "...", "trace_id": "..." }
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/routing/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Refactor this Rust function to remove the global mutable state and add unit tests."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 2. Routed by intent: daily conversation -> DigitalOcean Sonnet 4.6
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 256,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hey! Give me three fun facts about octopuses I can drop into a dinner conversation."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 3. Routed by intent: complex reasoning -> OpenAI GPT 5.5
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 512,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 4. Routed by intent: code generation -> Anthropic Opus 4.7
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 800,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Write a Rust function that takes a Vec<u8> of UTF-8 bytes and returns a HashMap<char, usize> with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 5. Routed by intent: deep analysis -> Anthropic Opus 4.7
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 600,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 6. Per-request routing override (config-defined preferences are bypassed
|
||||
### for this single call). The `routing_preferences` field is stripped
|
||||
### before the upstream provider sees the body.
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 256,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "Draft me a haiku about Postgres replication slots." }
|
||||
],
|
||||
"routing_preferences": [
|
||||
{
|
||||
"name": "creative writing",
|
||||
"description": "poetry, fiction, lyrical or playful prose",
|
||||
"models": ["anthropic/claude-opus-4-7", "openai/gpt-5.5"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 7. Pin a routing decision across an agentic loop with X-Model-Affinity.
|
||||
### The first call routes normally and caches the decision. Subsequent
|
||||
### calls with the same id reuse the cached model until the session TTL
|
||||
### (default 10 min) expires.
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
X-Model-Affinity: agent-session-7f3e
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 256,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "Plan a small refactor of an auth module — order of operations?" }
|
||||
]
|
||||
}
|
||||
|
||||
### Same affinity id — reuses cached routing decision (no re-classification)
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
X-Model-Affinity: agent-session-7f3e
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 256,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "Now write the unit tests for step one." }
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 8. Pin to DigitalOcean Sonnet 4.6 via alias (skip routing entirely)
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 128,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "One sentence: who painted the ceiling of the Sistine Chapel?" }
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 9. Pin to OpenAI GPT 5.5 via alias
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.smart",
|
||||
"max_tokens": 256,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "Outline a 30/60/90 day plan for a new platform engineering hire." }
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 10. Pin to Anthropic Opus 4.7 via alias
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/chat/completions HTTP/1.1
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "frontier.max",
|
||||
"max_tokens": 600,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "Review this Postgres schema for normalization issues:\nCREATE TABLE orders (id SERIAL PRIMARY KEY, customer_email TEXT, customer_name TEXT, items_json JSONB);" }
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 11. Anthropic Messages API (Claude-native client) -> routed by intent
|
||||
### -------------------------------------------------------------------------
|
||||
POST {{endpoint}}/v1/messages HTTP/1.1
|
||||
Content-Type: application/json
|
||||
anthropic-version: 2023-06-01
|
||||
x-api-key: test-key
|
||||
|
||||
{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 256,
|
||||
"messages": [
|
||||
{ "role": "user", "content": "Recommend a senior engineering reading list with three picks and one sentence each." }
|
||||
]
|
||||
}
|
||||
|
||||
### -------------------------------------------------------------------------
|
||||
### 12. Inspect available models (auto-discovered for Claude Desktop / clients)
|
||||
### -------------------------------------------------------------------------
|
||||
GET {{endpoint}}/v1/models HTTP/1.1
|
||||
119
demos/llm_routing/frontier_model_routing/test.sh
Executable file
119
demos/llm_routing/frontier_model_routing/test.sh
Executable file
|
|
@ -0,0 +1,119 @@
|
|||
#!/bin/bash
|
||||
# ---------------------------------------------------------------------------
|
||||
# Frontier Model Routing demo — driver script
|
||||
#
|
||||
# For each of three intent-biased prompts we:
|
||||
# 1. Hit POST /routing/v1/chat/completions (Plano's decision-only endpoint)
|
||||
# to print the matched route name and the ranked candidate pool.
|
||||
# 2. Hit POST /v1/chat/completions to actually run the request and print
|
||||
# the model that handled it.
|
||||
#
|
||||
# Plano runs the orchestrator on every chat completion when top-level
|
||||
# `routing_preferences` are configured. The `model` field in the request is
|
||||
# the *fallback* used when no preference matches — we pin it to
|
||||
# `frontier.fast` so unmatched prompts land on the cheapest tier.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
set -e
|
||||
|
||||
GATEWAY=${GATEWAY:-http://localhost:12000}
|
||||
DECISION_ENDPOINT="$GATEWAY/routing/v1/chat/completions"
|
||||
CHAT_ENDPOINT="$GATEWAY/v1/chat/completions"
|
||||
|
||||
ask() {
|
||||
local label="$1"
|
||||
local prompt="$2"
|
||||
|
||||
local body
|
||||
body="$(jq -n --arg p "$prompt" '{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 256,
|
||||
"messages": [{"role":"user","content":$p}]
|
||||
}')"
|
||||
|
||||
echo
|
||||
echo "=========================================================="
|
||||
echo "[$label]"
|
||||
echo "prompt: $prompt"
|
||||
echo "----------------------------------------------------------"
|
||||
|
||||
# Step 1: decision-only — what would the router pick?
|
||||
echo " routing decision:"
|
||||
curl -sS -X POST "$DECISION_ENDPOINT" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$body" \
|
||||
| jq '{
|
||||
matched_route: .route,
|
||||
ranked_models: .models,
|
||||
pinned: .pinned
|
||||
}' \
|
||||
| sed 's/^/ /'
|
||||
|
||||
# Step 2: actually run the request through the chosen model.
|
||||
echo " chat completion:"
|
||||
curl -sS -X POST "$CHAT_ENDPOINT" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$body" \
|
||||
| jq '{
|
||||
routed_to: .model,
|
||||
reply: .choices[0].message.content
|
||||
}' \
|
||||
| sed 's/^/ /'
|
||||
}
|
||||
|
||||
ask "daily conversation -> expects DigitalOcean Sonnet 4.6" \
|
||||
"Hey! Give me three fun facts about octopuses I can drop into a dinner conversation."
|
||||
|
||||
ask "complex reasoning -> expects OpenAI GPT 5.5" \
|
||||
"A train leaves Chicago at 9:14am traveling 72 mph. Another leaves St Louis at 10:02am traveling 65 mph toward Chicago. The cities are 297 miles apart. Walk through the math step by step and give me the time and place they meet."
|
||||
|
||||
ask "code generation -> expects Anthropic Opus 4.7" \
|
||||
"Write a Rust function that takes a Vec<u8> of UTF-8 bytes and returns a HashMap<char, usize> with grapheme cluster counts. Include unit tests and handle invalid UTF-8 gracefully."
|
||||
|
||||
ask "deep analysis -> expects Anthropic Opus 4.7" \
|
||||
"Review this Postgres schema for normalization, indexing, and migration risk. Give me a prioritized list of issues:
|
||||
CREATE TABLE orders (
|
||||
id SERIAL PRIMARY KEY,
|
||||
customer_email TEXT,
|
||||
customer_name TEXT,
|
||||
items_json JSONB,
|
||||
total NUMERIC,
|
||||
created_at TIMESTAMPTZ DEFAULT now()
|
||||
);"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bonus: pin a routing decision across an agentic loop with X-Model-Affinity.
|
||||
# Both calls hit the same gateway with the same affinity id, so the second
|
||||
# call reuses the first call's routing decision instead of reclassifying.
|
||||
# ---------------------------------------------------------------------------
|
||||
echo
|
||||
echo "=========================================================="
|
||||
echo "[bonus: model affinity across two turns of an agent loop]"
|
||||
echo "----------------------------------------------------------"
|
||||
|
||||
SID="demo-$(date +%s)-$RANDOM"
|
||||
echo " X-Model-Affinity: $SID"
|
||||
|
||||
turn() {
|
||||
local turn_label="$1"
|
||||
local prompt="$2"
|
||||
echo " $turn_label:"
|
||||
curl -sS -X POST "$CHAT_ENDPOINT" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Model-Affinity: $SID" \
|
||||
-d "$(jq -n --arg p "$prompt" '{
|
||||
"model": "frontier.fast",
|
||||
"max_tokens": 128,
|
||||
"messages": [{"role":"user","content":$p}]
|
||||
}')" \
|
||||
| jq '{ routed_to: .model }' \
|
||||
| sed 's/^/ /'
|
||||
}
|
||||
|
||||
turn "turn 1 (sets affinity)" "Plan a small refactor of an auth module — what's the order of operations?"
|
||||
turn "turn 2 (reuses decision)" "Now write the unit tests for step one."
|
||||
|
||||
echo
|
||||
echo "=========================================================="
|
||||
echo "Done. Want to inspect routing decisions live? Run: planoai trace"
|
||||
echo "=========================================================="
|
||||
Loading…
Add table
Add a link
Reference in a new issue