feat(claude-cli): add local Claude Code CLI provider bridge

Spawn the local `claude` binary as a subprocess and expose it as an
Anthropic Messages-compatible provider. Hosted in brightstaff
(`CLAUDE_CLI_LISTEN_ADDR`), with session reuse, idle TTL, and watchdog.

User-facing surface is `model_providers: [{ model: claude-cli/* }]` —
the Python CLI auto-fills name/provider_interface/base_url/access_key
and the launcher (native + supervisord) enables the bridge listener
only when at least one claude-cli provider is present.
This commit is contained in:
Spherrrical 2026-05-04 12:57:53 -07:00
parent b71a555f19
commit 9fdfeb7cbf
26 changed files with 2847 additions and 2 deletions

View file

@ -39,11 +39,64 @@ CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex"
CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs"
CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown"
# Local-only bridge that runs Claude Code CLI as a subprocess. Hosted by
# brightstaff on this loopback address; the Python CLI auto-fills the matching
# provider fields below and tells the launcher to enable the bridge.
CLAUDE_CLI_DEFAULT_BASE_URL = "http://127.0.0.1:14001"
CLAUDE_CLI_DEFAULT_LISTEN_ADDR = "127.0.0.1:14001"
CLAUDE_CLI_DEFAULT_NAME = "claude-cli/*"
CLAUDE_CLI_DEFAULT_ACCESS_KEY_PLACEHOLDER = "claude-cli-local"
SUPPORTED_PROVIDERS = (
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
)
def _is_claude_cli_provider(model_provider):
"""Return True iff this provider entry refers to the local claude-cli
bridge. Triggered by any of `model`, `name`, or `provider_interface`
matching the `claude-cli/...` namespace.
"""
model = (model_provider.get("model") or "").strip()
name = (model_provider.get("name") or "").strip()
interface = (model_provider.get("provider_interface") or "").strip()
return (
model.startswith("claude-cli/")
or name.startswith("claude-cli/")
or interface == "claude-cli"
)
def _apply_claude_cli_autofill(model_provider):
"""Fill in implicit fields for `claude-cli/*` provider entries so the
user only has to write `model: claude-cli/*` (or any `claude-cli/...`)
and everything else is wired automatically: a localhost cluster pointing
at the brightstaff bridge, the `claude-cli` provider_interface, and a
placeholder access key so downstream validation does not reject the entry.
Returns True iff this entry was treated as a claude-cli provider (so the
caller can flip the launcher's `needs_claude_cli_runtime` flag).
"""
if not _is_claude_cli_provider(model_provider):
return False
if not model_provider.get("name"):
model_provider["name"] = model_provider.get("model") or CLAUDE_CLI_DEFAULT_NAME
if not model_provider.get("provider_interface"):
model_provider["provider_interface"] = "claude-cli"
if not model_provider.get("base_url"):
model_provider["base_url"] = CLAUDE_CLI_DEFAULT_BASE_URL
# Keep passthrough_auth users alone; the bridge ignores the access key
# anyway (it uses the host's `claude auth login` keychain), so a
# placeholder is fine for everyone else.
if not model_provider.get("access_key") and not model_provider.get(
"passthrough_auth"
):
model_provider["access_key"] = CLAUDE_CLI_DEFAULT_ACCESS_KEY_PLACEHOLDER
return True
def get_endpoint_and_port(endpoint, protocol):
endpoint_tokens = endpoint.split(":")
if len(endpoint_tokens) > 1:
@ -329,6 +382,12 @@ def validate_and_render_schema():
name = listener.get("name", None)
for model_provider in listener.get("model_providers", []):
# Auto-fill the implicit fields for `claude-cli/*` providers
# before the rest of the loop runs validation. This makes
# `model_providers: [{model: claude-cli/*}]` a fully-formed
# entry by the time we reach the wildcard checks below.
_apply_claude_cli_autofill(model_provider)
if model_provider.get("usage", None):
llms_with_usage.append(model_provider["name"])
if model_provider.get("name") in model_provider_name_set:

View file

@ -22,6 +22,61 @@ from planoai.utils import find_repo_root, getLogger
log = getLogger(__name__)
CLAUDE_CLI_DEFAULT_LISTEN_ADDR = "127.0.0.1:14001"
# Env vars the user can set to customize the bridge. We always honor a
# pre-set CLAUDE_CLI_LISTEN_ADDR (so power users can move the listener)
# but otherwise inject the default whenever a claude-cli provider is
# detected in the rendered config.
CLAUDE_CLI_PASSTHROUGH_ENV = (
"CLAUDE_CLI_LISTEN_ADDR",
"CLAUDE_CLI_BIN",
"CLAUDE_CLI_PERMISSION_MODE",
"CLAUDE_CLI_SESSION_TTL_SECS",
"CLAUDE_CLI_WATCHDOG_SECS",
"CLAUDE_CLI_MAX_SESSIONS",
)
def _needs_claude_cli_runtime(plano_config_rendered_path) -> bool:
"""True iff the rendered config has at least one model_provider whose
`provider_interface` is `claude-cli`. The Python config_generator
auto-fills this field when it sees a `claude-cli/*` model entry, so the
detection is one-step regardless of how the user wrote the original
provider line.
"""
import yaml
try:
with open(plano_config_rendered_path, "r") as f:
rendered = yaml.safe_load(f) or {}
except FileNotFoundError:
return False
for provider in rendered.get("model_providers") or []:
if (provider or {}).get("provider_interface") == "claude-cli":
return True
return False
def _apply_claude_cli_env(brightstaff_env, plano_config_rendered_path):
"""If the rendered config opts into the claude-cli bridge, ensure
`CLAUDE_CLI_LISTEN_ADDR` is set in the brightstaff process environment so
the bridge listener actually starts. Honors any pre-set values from the
caller's env (so users can override the listen address, binary path, or
permission mode without editing this file).
"""
if not _needs_claude_cli_runtime(plano_config_rendered_path):
return False
if not brightstaff_env.get("CLAUDE_CLI_LISTEN_ADDR"):
brightstaff_env["CLAUDE_CLI_LISTEN_ADDR"] = CLAUDE_CLI_DEFAULT_LISTEN_ADDR
for key in CLAUDE_CLI_PASSTHROUGH_ENV:
if key in os.environ and key not in brightstaff_env:
brightstaff_env[key] = os.environ[key]
log.info(
"claude-cli bridge enabled: brightstaff will listen on %s",
brightstaff_env["CLAUDE_CLI_LISTEN_ADDR"],
)
return True
def _find_config_dir():
"""Locate the directory containing plano_config_schema.yaml and envoy.template.yaml.
@ -197,6 +252,11 @@ def start_native(
for key, value in env.items():
brightstaff_env[key] = value
# Enable the claude-cli bridge if the rendered config asks for it. Done
# after `env.items()` is merged so user-set CLAUDE_CLI_* env vars take
# precedence over the auto-injected defaults.
_apply_claude_cli_env(brightstaff_env, plano_config_rendered_path)
brightstaff_pid = _daemon_exec(
[brightstaff_path],
brightstaff_env,

View file

@ -3,8 +3,11 @@ import pytest
import yaml
from unittest import mock
from planoai.config_generator import (
validate_and_render_schema,
CLAUDE_CLI_DEFAULT_BASE_URL,
_apply_claude_cli_autofill,
_is_claude_cli_provider,
migrate_inline_routing_preferences,
validate_and_render_schema,
)
@ -738,3 +741,64 @@ model_providers:
migrate_inline_routing_preferences(config_yaml)
assert config_yaml["version"] == "v0.5.0"
def test_claude_cli_autofill_wildcard_provider():
provider = {"model": "claude-cli/*"}
assert _is_claude_cli_provider(provider) is True
assert _apply_claude_cli_autofill(provider) is True
assert provider["name"] == "claude-cli/*"
assert provider["provider_interface"] == "claude-cli"
assert provider["base_url"] == CLAUDE_CLI_DEFAULT_BASE_URL
assert provider["access_key"] == "claude-cli-local"
# `model` itself must not be rewritten — the wildcard expansion happens
# downstream and we want to preserve the user's intent.
assert provider["model"] == "claude-cli/*"
def test_claude_cli_autofill_specific_model():
provider = {"model": "claude-cli/sonnet", "default": True}
assert _apply_claude_cli_autofill(provider) is True
assert provider["name"] == "claude-cli/sonnet"
assert provider["provider_interface"] == "claude-cli"
assert provider["base_url"] == CLAUDE_CLI_DEFAULT_BASE_URL
# Existing fields like `default` survive.
assert provider["default"] is True
def test_claude_cli_autofill_does_not_override_user_fields():
provider = {
"model": "claude-cli/*",
"name": "custom-name",
"base_url": "http://192.0.2.10:9000",
"access_key": "do-not-touch",
}
assert _apply_claude_cli_autofill(provider) is True
assert provider["name"] == "custom-name"
assert provider["base_url"] == "http://192.0.2.10:9000"
assert provider["access_key"] == "do-not-touch"
# provider_interface still gets injected because it was missing.
assert provider["provider_interface"] == "claude-cli"
def test_claude_cli_autofill_skips_non_matching_providers():
provider = {"model": "openai/gpt-4o"}
assert _is_claude_cli_provider(provider) is False
assert _apply_claude_cli_autofill(provider) is False
assert "provider_interface" not in provider
def test_claude_cli_autofill_passthrough_auth_skips_access_key():
provider = {"model": "claude-cli/*", "passthrough_auth": True}
assert _apply_claude_cli_autofill(provider) is True
# Honor passthrough_auth: do not inject a placeholder access_key.
assert "access_key" not in provider
assert provider["passthrough_auth"] is True
def test_claude_cli_autofill_detects_via_provider_interface_only():
provider = {"model": "sonnet", "provider_interface": "claude-cli"}
assert _is_claude_cli_provider(provider) is True
assert _apply_claude_cli_autofill(provider) is True
assert provider["base_url"] == CLAUDE_CLI_DEFAULT_BASE_URL
assert provider["name"] == "sonnet"

View file

@ -0,0 +1,112 @@
"""Unit tests for the claude-cli env wiring in native_runner.py."""
import os
import textwrap
from planoai.native_runner import (
CLAUDE_CLI_DEFAULT_LISTEN_ADDR,
_apply_claude_cli_env,
_needs_claude_cli_runtime,
)
def _write(path, body):
path.write_text(textwrap.dedent(body).lstrip())
return str(path)
def test_needs_claude_cli_runtime_detects_provider(tmp_path):
rendered = _write(
tmp_path / "rendered.yaml",
"""
version: v0.4.0
listeners: []
model_providers:
- name: claude-cli/*
model: '*'
provider_interface: claude-cli
base_url: http://127.0.0.1:14001
""",
)
assert _needs_claude_cli_runtime(rendered) is True
def test_needs_claude_cli_runtime_skips_other_providers(tmp_path):
rendered = _write(
tmp_path / "rendered.yaml",
"""
version: v0.4.0
model_providers:
- name: openai/gpt-4o
model: gpt-4o
provider_interface: openai
""",
)
assert _needs_claude_cli_runtime(rendered) is False
def test_needs_claude_cli_runtime_handles_missing_file(tmp_path):
assert _needs_claude_cli_runtime(str(tmp_path / "does-not-exist.yaml")) is False
def test_apply_claude_cli_env_injects_default_addr(tmp_path, monkeypatch):
rendered = _write(
tmp_path / "rendered.yaml",
"""
model_providers:
- provider_interface: claude-cli
model: '*'
""",
)
monkeypatch.delenv("CLAUDE_CLI_LISTEN_ADDR", raising=False)
monkeypatch.delenv("CLAUDE_CLI_BIN", raising=False)
env = {}
assert _apply_claude_cli_env(env, rendered) is True
assert env["CLAUDE_CLI_LISTEN_ADDR"] == CLAUDE_CLI_DEFAULT_LISTEN_ADDR
def test_apply_claude_cli_env_honors_user_override(tmp_path, monkeypatch):
rendered = _write(
tmp_path / "rendered.yaml",
"""
model_providers:
- provider_interface: claude-cli
model: '*'
""",
)
monkeypatch.delenv("CLAUDE_CLI_LISTEN_ADDR", raising=False)
env = {"CLAUDE_CLI_LISTEN_ADDR": "127.0.0.1:25000"}
assert _apply_claude_cli_env(env, rendered) is True
assert env["CLAUDE_CLI_LISTEN_ADDR"] == "127.0.0.1:25000"
def test_apply_claude_cli_env_passes_through_user_env(tmp_path, monkeypatch):
rendered = _write(
tmp_path / "rendered.yaml",
"""
model_providers:
- provider_interface: claude-cli
model: '*'
""",
)
monkeypatch.delenv("CLAUDE_CLI_LISTEN_ADDR", raising=False)
monkeypatch.setenv("CLAUDE_CLI_BIN", "/usr/local/bin/claude-test")
monkeypatch.setenv("CLAUDE_CLI_PERMISSION_MODE", "default")
env = {}
assert _apply_claude_cli_env(env, rendered) is True
assert env["CLAUDE_CLI_BIN"] == "/usr/local/bin/claude-test"
assert env["CLAUDE_CLI_PERMISSION_MODE"] == "default"
def test_apply_claude_cli_env_noop_for_other_configs(tmp_path):
rendered = _write(
tmp_path / "rendered.yaml",
"""
model_providers:
- provider_interface: openai
model: gpt-4o
""",
)
env = {}
assert _apply_claude_cli_env(env, rendered) is False
assert "CLAUDE_CLI_LISTEN_ADDR" not in env

View file

@ -184,6 +184,7 @@ properties:
enum:
- plano
- claude
- claude-cli
- deepseek
- groq
- mistral
@ -242,6 +243,7 @@ properties:
enum:
- plano
- claude
- claude-cli
- deepseek
- groq
- mistral

View file

@ -18,8 +18,16 @@ stdout_logfile_maxbytes=0
stderr_logfile_maxbytes=0
[program:brightstaff]
# CLAUDE_CLI_LISTEN_ADDR is set automatically when the rendered config has at
# least one provider with `provider_interface: claude-cli` (the Python config
# generator auto-fills that field for any `model: claude-cli/*` entry). The
# bridge listener stays off otherwise — matches native_runner.py behavior.
command=sh -c "\
while [ ! -f /tmp/config_ready ]; do echo '[brightstaff] Waiting for config generation...'; sleep 0.5; done && \
if grep -q 'provider_interface: claude-cli' /app/plano_config_rendered.env_sub.yaml 2>/dev/null; then \
export CLAUDE_CLI_LISTEN_ADDR=${CLAUDE_CLI_LISTEN_ADDR:-127.0.0.1:14001}; \
echo '[brightstaff] claude-cli bridge enabled on '$CLAUDE_CLI_LISTEN_ADDR; \
fi; \
RUST_LOG=${LOG_LEVEL:-info} \
PLANO_CONFIG_PATH_RENDERED=/app/plano_config_rendered.env_sub.yaml \
/app/brightstaff 2>&1 | \

View file

@ -0,0 +1,22 @@
//! Bridge that exposes the local `claude` CLI as an Anthropic Messages API
//! endpoint on a localhost port, allowing it to be used as just another
//! `model_provider` in Plano.
//!
//! Wire-up:
//! - `process` — spawns and manages the `claude -p --output-format stream-json
//! --input-format stream-json` subprocess.
//! - `session` — keys long-lived processes by session id (header or hash) and
//! enforces idle TTL / cap.
//! - `server` — hyper listener that speaks `POST /v1/messages` and bridges
//! between Anthropic SSE and the CLI's NDJSON.
//!
//! Translation between the two wire formats lives in
//! `hermesllm::apis::claude_cli`; this module only owns runtime concerns.
pub mod process;
pub mod server;
pub mod session;
pub use process::{ClaudeCliConfig, ClaudeProcess, ProcessError};
pub use server::run_listener;
pub use session::{SessionManager, SessionManagerConfig, SESSION_HEADER};

View file

@ -0,0 +1,330 @@
//! Manages the lifetime of one `claude -p` child process for a single
//! conversation session. Spawning, env scrubbing, NDJSON line reading and the
//! per-line watchdog all live here. Translation between Anthropic Messages
//! and stream-json lives in `hermesllm::apis::claude_cli`.
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
use hermesllm::apis::claude_cli::{parse_ndjson_line, ClaudeCliEvent, ClaudeCliInputEvent};
use thiserror::Error;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::process::{Child, ChildStdin, Command};
use tokio::sync::{mpsc, Mutex, OwnedMutexGuard};
use tokio::time::{self, Instant};
use tracing::{debug, info, warn};
/// Tunables for one `ClaudeProcess`. Defaults match the OpenClaw reference
/// configuration: `bypassPermissions`, ~120 s watchdog window, ~10 min idle TTL.
#[derive(Debug, Clone)]
pub struct ClaudeCliConfig {
/// Path or name of the `claude` binary (looked up via `$PATH`).
pub binary: String,
/// Value passed to `--permission-mode`. The CLI accepts `default`,
/// `acceptEdits`, `plan`, `auto`, `dontAsk`, `bypassPermissions`.
pub permission_mode: String,
/// Idle session TTL — after this many seconds without a request the
/// session manager kills the child.
pub session_ttl: Duration,
/// Per-line watchdog: if no NDJSON line arrives for this long during a
/// turn, kill the child. Reset on every line (not every byte).
pub watchdog: Duration,
}
impl Default for ClaudeCliConfig {
fn default() -> Self {
Self {
binary: "claude".to_string(),
permission_mode: "bypassPermissions".to_string(),
session_ttl: Duration::from_secs(600),
watchdog: Duration::from_secs(120),
}
}
}
/// Errors produced while interacting with the child process.
#[derive(Debug, Error)]
pub enum ProcessError {
#[error("failed to spawn `{binary}`: {source}")]
Spawn {
binary: String,
#[source]
source: std::io::Error,
},
#[error("failed to write to claude stdin: {0}")]
StdinWrite(#[source] std::io::Error),
#[error("claude process exited unexpectedly")]
ExitedEarly,
#[error("claude watchdog fired after {0:?} of silence")]
WatchdogTimeout(Duration),
#[error("failed to serialize stdin payload: {0}")]
Serialize(#[from] serde_json::Error),
#[error("turn already in progress for this session")]
TurnInProgress,
}
/// Strip down to the model alias / id the CLI's `--model` flag accepts.
/// Models registered via the wildcard `claude-cli/*` arrive prefixed with
/// `claude-cli/` (or just bare, e.g. `sonnet`); both forms are normalized
/// here.
pub fn normalize_model_arg(model: &str) -> &str {
model.strip_prefix("claude-cli/").unwrap_or(model)
}
/// Environment variables that must be removed before exec'ing `claude` so the
/// child uses its own login keychain rather than picking up server-side
/// credentials. The list mirrors the OpenClaw scrub list.
const SCRUB_ENV_PREFIXES: &[&str] = &["ANTHROPIC_", "CLAUDE_CODE_", "OTEL_"];
fn scrubbed_env_for_spawn() -> Vec<(String, String)> {
std::env::vars()
.filter(|(k, _)| !SCRUB_ENV_PREFIXES.iter().any(|p| k.starts_with(p)))
.collect()
}
/// One running `claude -p` subprocess plus the channels we use to talk to it.
/// Each `ClaudeProcess` is owned by exactly one session.
pub struct ClaudeProcess {
child: Mutex<Option<Child>>,
stdin: Mutex<Option<ChildStdin>>,
/// The receiver of `ClaudeCliEvent`s parsed from the child's stdout.
/// Wrapped in `Arc<Mutex>` so a `TurnStream` can hold an owned guard for
/// the duration of one turn (which serializes turns within a session).
event_rx: Arc<Mutex<mpsc::Receiver<ClaudeCliEvent>>>,
config: ClaudeCliConfig,
/// Last time a request was served on this session — used by the session
/// manager to enforce the idle TTL.
last_used: Mutex<Instant>,
pub session_id: String,
}
impl ClaudeProcess {
/// Spawn a new child for `session_id`. The first turn for a new session
/// should be the user's Anthropic request body — see
/// [`ClaudeProcess::send_user_turn`] for that.
pub async fn spawn(
session_id: String,
model: &str,
system_prompt: Option<&str>,
cwd: Option<&std::path::Path>,
config: ClaudeCliConfig,
) -> Result<Arc<Self>, ProcessError> {
let mut cmd = Command::new(&config.binary);
cmd.arg("-p")
.arg("--output-format")
.arg("stream-json")
.arg("--input-format")
.arg("stream-json")
.arg("--verbose")
.arg("--include-partial-messages")
.arg("--permission-mode")
.arg(&config.permission_mode)
.arg("--model")
.arg(normalize_model_arg(model))
.arg("--session-id")
.arg(&session_id)
.arg("--no-session-persistence");
if let Some(prompt) = system_prompt {
// Append (don't replace) so Claude Code's built-in system prompt
// — which carries tool definitions — is preserved.
cmd.arg("--append-system-prompt").arg(prompt);
}
if let Some(dir) = cwd {
cmd.current_dir(dir);
}
cmd.env_clear();
for (k, v) in scrubbed_env_for_spawn() {
cmd.env(k, v);
}
cmd.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true);
let mut child = cmd.spawn().map_err(|e| ProcessError::Spawn {
binary: config.binary.clone(),
source: e,
})?;
let stdin = child.stdin.take().ok_or(ProcessError::ExitedEarly)?;
let stdout = child.stdout.take().ok_or(ProcessError::ExitedEarly)?;
let stderr = child.stderr.take().ok_or(ProcessError::ExitedEarly)?;
// Bounded channel — backpressure if the consumer is slow, but large
// enough that bursts of small text deltas do not block stdout drain.
let (tx, rx) = mpsc::channel::<ClaudeCliEvent>(256);
let session_for_log = session_id.clone();
tokio::spawn(async move {
let mut reader = BufReader::new(stdout).lines();
loop {
match reader.next_line().await {
Ok(Some(line)) => {
if let Some(parsed) = parse_ndjson_line(&line) {
match parsed {
Ok(ev) => {
if tx.send(ev).await.is_err() {
break;
}
}
Err(err) => {
warn!(
session = %session_for_log,
error = %err,
line = %line,
"failed to parse claude NDJSON line"
);
}
}
}
}
Ok(None) => {
debug!(session = %session_for_log, "claude stdout closed");
break;
}
Err(err) => {
warn!(
session = %session_for_log,
error = %err,
"claude stdout read error"
);
break;
}
}
}
});
let session_for_stderr = session_id.clone();
tokio::spawn(async move {
let mut reader = BufReader::new(stderr).lines();
while let Ok(Some(line)) = reader.next_line().await {
if !line.trim().is_empty() {
warn!(session = %session_for_stderr, line = %line, "claude stderr");
}
}
});
info!(
session = %session_id,
model = %normalize_model_arg(model),
"spawned claude-cli"
);
Ok(Arc::new(Self {
child: Mutex::new(Some(child)),
stdin: Mutex::new(Some(stdin)),
event_rx: Arc::new(Mutex::new(rx)),
config,
last_used: Mutex::new(Instant::now()),
session_id,
}))
}
/// Write the user-turn JSONL events to the child's stdin and return a
/// stream that yields parsed CLI events for this turn until the terminal
/// `result` event (or watchdog) ends it.
///
/// Holds an exclusive lock on the event receiver for the duration of the
/// turn, so concurrent calls return [`ProcessError::TurnInProgress`].
pub async fn send_user_turn(
&self,
events: &[ClaudeCliInputEvent],
) -> Result<TurnStream, ProcessError> {
*self.last_used.lock().await = Instant::now();
// Claim the event receiver for the lifetime of this turn.
let rx_guard = Arc::clone(&self.event_rx)
.try_lock_owned()
.map_err(|_| ProcessError::TurnInProgress)?;
let mut stdin_guard = self.stdin.lock().await;
let stdin = stdin_guard.as_mut().ok_or(ProcessError::ExitedEarly)?;
for ev in events {
let mut bytes = serde_json::to_vec(ev)?;
bytes.push(b'\n');
stdin
.write_all(&bytes)
.await
.map_err(ProcessError::StdinWrite)?;
}
stdin.flush().await.map_err(ProcessError::StdinWrite)?;
Ok(TurnStream {
rx: rx_guard,
watchdog: self.config.watchdog,
done: false,
})
}
/// Most-recent activity timestamp; used by the session manager's reaper.
pub async fn last_used(&self) -> Instant {
*self.last_used.lock().await
}
/// Forcefully terminate the child. Safe to call multiple times.
pub async fn shutdown(&self) {
if let Some(mut child) = self.child.lock().await.take() {
let _ = child.start_kill();
let _ = child.wait().await;
}
// Dropping stdin signals the child if it survived `start_kill`.
let _ = self.stdin.lock().await.take();
}
}
/// One-shot stream of CLI events for a single user turn. Yields events until
/// the terminal `result` event is observed (or the watchdog fires). Drops the
/// owned receiver lock when finished, allowing the next turn to start.
pub struct TurnStream {
rx: OwnedMutexGuard<mpsc::Receiver<ClaudeCliEvent>>,
watchdog: Duration,
done: bool,
}
impl TurnStream {
/// Pull the next CLI event from the child, applying the per-line
/// watchdog. Returns `Ok(None)` when the turn's terminal `result` event
/// has been delivered.
pub async fn next(&mut self) -> Result<Option<ClaudeCliEvent>, ProcessError> {
if self.done {
return Ok(None);
}
match time::timeout(self.watchdog, self.rx.recv()).await {
Ok(Some(ev)) => {
if matches!(ev, ClaudeCliEvent::Result { .. }) {
self.done = true;
}
Ok(Some(ev))
}
Ok(None) => {
self.done = true;
Err(ProcessError::ExitedEarly)
}
Err(_) => {
self.done = true;
Err(ProcessError::WatchdogTimeout(self.watchdog))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_model_arg_strips_prefix() {
assert_eq!(normalize_model_arg("claude-cli/sonnet"), "sonnet");
assert_eq!(
normalize_model_arg("claude-cli/claude-opus-4-7"),
"claude-opus-4-7"
);
assert_eq!(normalize_model_arg("sonnet"), "sonnet");
}
// Note: cannot mutate process env in unit tests safely since tests run
// in parallel; spawn integration tests cover env behavior end-to-end via
// the fake_claude.sh fixture.
}

View file

@ -0,0 +1,335 @@
//! HTTP server fronting the claude-cli bridge. Speaks Anthropic Messages API
//! (`POST /v1/messages`) on a localhost port; everything inside this module
//! delegates to `hermesllm::apis::claude_cli` for translation and to
//! `super::session::SessionManager` for subprocess lifecycle.
use std::convert::Infallible;
use std::net::SocketAddr;
use std::sync::Arc;
use bytes::Bytes;
use futures::stream;
use hermesllm::apis::anthropic::MessagesRequest;
use hermesllm::apis::claude_cli::{
cli_error_to_anthropic_error_body, cli_event_to_messages_stream_event,
collect_to_messages_response, extract_system_prompt, messages_request_to_stdin_payload,
synthetic_message_start, ClaudeCliEvent,
};
use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Full, StreamBody};
use hyper::body::{Frame, Incoming};
use hyper::header::{self, HeaderValue};
use hyper::server::conn::http1;
use hyper::service::service_fn;
use hyper::{Method, Request, Response, StatusCode};
use hyper_util::rt::TokioIo;
use tokio::net::TcpListener;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, error, info, warn};
use super::session::{SessionManager, SESSION_HEADER};
/// Spawn the claude-cli bridge listener. The returned `JoinHandle` resolves
/// when the listener loop exits (either via the provided shutdown signal or a
/// fatal accept error). On shutdown the manager drains all active sessions.
pub async fn run_listener<F>(
addr: SocketAddr,
manager: Arc<SessionManager>,
shutdown: F,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>>
where
F: std::future::Future<Output = ()> + Send + 'static,
{
let listener = TcpListener::bind(addr).await?;
info!(%addr, "claude-cli bridge listening");
let manager_for_shutdown = Arc::clone(&manager);
tokio::pin!(shutdown);
loop {
tokio::select! {
accept = listener.accept() => {
let (stream, peer) = match accept {
Ok(s) => s,
Err(err) => {
warn!(error = ?err, "claude-cli accept error");
continue;
}
};
debug!(peer = ?peer, "claude-cli accepted connection");
let manager = Arc::clone(&manager);
let io = TokioIo::new(stream);
tokio::task::spawn(async move {
let svc = service_fn(move |req| {
let manager = Arc::clone(&manager);
async move { handle(req, manager).await }
});
if let Err(err) = http1::Builder::new().serve_connection(io, svc).await {
warn!(error = ?err, "claude-cli connection error");
}
});
}
_ = &mut shutdown => {
info!("claude-cli bridge shutting down");
manager_for_shutdown.shutdown_all().await;
return Ok(());
}
}
}
}
async fn handle(
req: Request<Incoming>,
manager: Arc<SessionManager>,
) -> Result<Response<BoxBody<Bytes, Infallible>>, hyper::Error> {
let path = req.uri().path();
let method = req.method();
if method == Method::GET && path == "/healthz" {
return Ok(text_response(StatusCode::OK, "ok"));
}
if method != Method::POST || path != "/v1/messages" {
return Ok(text_response(StatusCode::NOT_FOUND, "not found"));
}
// Pull out the optional session header up front so we can drop the
// request after consuming the body.
let session_header = req
.headers()
.get(SESSION_HEADER)
.and_then(|h| h.to_str().ok())
.map(|s| s.to_string());
let body_bytes = match req.collect().await {
Ok(c) => c.to_bytes(),
Err(err) => {
warn!(error = %err, "failed to read claude-cli request body");
return Ok(json_error(StatusCode::BAD_REQUEST, "failed to read body"));
}
};
let parsed: MessagesRequest = match serde_json::from_slice(&body_bytes) {
Ok(p) => p,
Err(err) => {
warn!(error = %err, "failed to parse Anthropic MessagesRequest");
return Ok(json_error(
StatusCode::BAD_REQUEST,
&format!("invalid Anthropic MessagesRequest: {err}"),
));
}
};
let session_id = SessionManager::resolve_session_id(session_header.as_deref(), &parsed);
let system_prompt = extract_system_prompt(&parsed);
let process = match manager
.get_or_spawn(&session_id, &parsed.model, system_prompt.as_deref(), None)
.await
{
Ok(p) => p,
Err(err) => {
error!(session = %session_id, error = %err, "failed to spawn claude-cli");
return Ok(json_error(
StatusCode::BAD_GATEWAY,
&format!("failed to spawn claude-cli: {err}"),
));
}
};
let stdin_payload = match messages_request_to_stdin_payload(&parsed, Some(&session_id)) {
Ok(p) => p,
Err(err) => {
warn!(error = %err, "failed to build claude-cli stdin payload");
return Ok(json_error(
StatusCode::BAD_REQUEST,
&format!("failed to build claude-cli stdin payload: {err}"),
));
}
};
let streaming = parsed.stream.unwrap_or(false);
let model = parsed.model.clone();
let mut turn = match process.send_user_turn(&stdin_payload).await {
Ok(t) => t,
Err(err) => {
error!(session = %session_id, error = %err, "failed to send user turn");
return Ok(json_error(
StatusCode::BAD_GATEWAY,
&format!("failed to send user turn: {err}"),
));
}
};
if streaming {
Ok(stream_response(turn, model, session_id))
} else {
// Drain the entire turn before answering.
let mut events: Vec<ClaudeCliEvent> = Vec::new();
loop {
match turn.next().await {
Ok(Some(ev)) => events.push(ev),
Ok(None) => break,
Err(err) => {
warn!(session = %session_id, error = %err, "claude-cli turn failed");
let body = cli_error_to_anthropic_error_body(&err.to_string());
return Ok(json_response(StatusCode::BAD_GATEWAY, &body));
}
}
}
match collect_to_messages_response(&model, events) {
Ok(resp) => Ok(json_response(StatusCode::OK, &resp)),
Err(err) => {
let body = cli_error_to_anthropic_error_body(&err.to_string());
Ok(json_response(StatusCode::BAD_GATEWAY, &body))
}
}
}
}
fn stream_response(
mut turn: super::process::TurnStream,
model: String,
session_id: String,
) -> Response<BoxBody<Bytes, Infallible>> {
let (tx, rx) = mpsc::channel::<Result<Frame<Bytes>, Infallible>>(64);
tokio::spawn(async move {
// Some short turns skip MessageStart; emit a synthetic one so the
// client always sees a complete stream.
let mut emitted_message_start = false;
loop {
let ev = match turn.next().await {
Ok(Some(ev)) => ev,
Ok(None) => break,
Err(err) => {
warn!(session = %session_id, error = %err, "claude-cli streaming turn failed");
let body = cli_error_to_anthropic_error_body(&err.to_string());
let frame =
Frame::data(format_sse("error", &serde_json::to_string(&body).unwrap()));
let _ = tx.send(Ok(frame)).await;
break;
}
};
if !emitted_message_start {
if let ClaudeCliEvent::StreamEvent {
event: hermesllm::apis::anthropic::MessagesStreamEvent::MessageStart { .. },
} = &ev
{
emitted_message_start = true;
} else if matches!(&ev, ClaudeCliEvent::Result { .. }) {
// No actual content was streamed; synthesize a
// MessageStart so the SSE stream is well-formed.
let synthetic = synthetic_message_start(&model, Some(&session_id));
if let Some(frame) = sse_frame_for_event(&synthetic) {
let _ = tx.send(Ok(frame)).await;
}
emitted_message_start = true;
}
}
if let Some(translated) = cli_event_to_messages_stream_event(&ev) {
if let Some(frame) = sse_frame_for_event(&translated) {
if tx.send(Ok(frame)).await.is_err() {
break;
}
}
}
if let ClaudeCliEvent::Result {
is_error, result, ..
} = &ev
{
if *is_error {
let msg = result
.clone()
.unwrap_or_else(|| "claude-cli returned an error".to_string());
let body = cli_error_to_anthropic_error_body(&msg);
let frame =
Frame::data(format_sse("error", &serde_json::to_string(&body).unwrap()));
let _ = tx.send(Ok(frame)).await;
}
break;
}
}
});
let body = StreamBody::new(ReceiverStream::new(rx));
let mut resp = Response::new(body.boxed());
*resp.status_mut() = StatusCode::OK;
resp.headers_mut().insert(
header::CONTENT_TYPE,
HeaderValue::from_static("text/event-stream"),
);
resp.headers_mut()
.insert(header::CACHE_CONTROL, HeaderValue::from_static("no-cache"));
resp.headers_mut()
.insert("X-Accel-Buffering", HeaderValue::from_static("no"));
resp
}
fn sse_frame_for_event(
event: &hermesllm::apis::anthropic::MessagesStreamEvent,
) -> Option<Frame<Bytes>> {
use hermesllm::apis::anthropic::MessagesStreamEvent;
let event_name = match event {
MessagesStreamEvent::MessageStart { .. } => "message_start",
MessagesStreamEvent::ContentBlockStart { .. } => "content_block_start",
MessagesStreamEvent::ContentBlockDelta { .. } => "content_block_delta",
MessagesStreamEvent::ContentBlockStop { .. } => "content_block_stop",
MessagesStreamEvent::MessageDelta { .. } => "message_delta",
MessagesStreamEvent::MessageStop => "message_stop",
MessagesStreamEvent::Ping => "ping",
};
let data = serde_json::to_string(event).ok()?;
Some(Frame::data(format_sse(event_name, &data)))
}
fn format_sse(event: &str, data: &str) -> Bytes {
Bytes::from(format!("event: {event}\ndata: {data}\n\n"))
}
fn json_response<T: serde::Serialize>(
status: StatusCode,
body: &T,
) -> Response<BoxBody<Bytes, Infallible>> {
let bytes = serde_json::to_vec(body).unwrap_or_else(|_| b"{}".to_vec());
let body = Full::new(Bytes::from(bytes))
.map_err(|e| match e {})
.boxed();
let mut resp = Response::new(body);
*resp.status_mut() = status;
resp.headers_mut().insert(
header::CONTENT_TYPE,
HeaderValue::from_static("application/json"),
);
resp
}
fn json_error(status: StatusCode, message: &str) -> Response<BoxBody<Bytes, Infallible>> {
let body = cli_error_to_anthropic_error_body(message);
json_response(status, &body)
}
fn text_response(
status: StatusCode,
message: &'static str,
) -> Response<BoxBody<Bytes, Infallible>> {
let body = Full::new(Bytes::from_static(message.as_bytes()))
.map_err(|e| match e {})
.boxed();
let mut resp = Response::new(body);
*resp.status_mut() = status;
resp.headers_mut()
.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain"));
resp
}
// Ensure a no-op import so that `stream` (re-exported from futures) is
// considered used in case future expansion needs it. Avoids accidental
// deletion when running `cargo fix`.
#[allow(dead_code)]
fn _touch_stream_module() {
let _: stream::Empty<u32> = stream::empty();
}

View file

@ -0,0 +1,341 @@
//! Session manager for the claude-cli bridge. Maps a stable session id (taken
//! from a client-provided header or hashed from the conversation prefix) to a
//! long-lived `ClaudeProcess`. Enforces an idle TTL and a hard cap on the
//! number of concurrent sessions.
use std::collections::{hash_map::DefaultHasher, HashMap};
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use std::time::Duration;
use hermesllm::apis::anthropic::{
MessagesContentBlock, MessagesMessageContent, MessagesRequest, MessagesRole,
MessagesSystemPrompt,
};
use tokio::sync::Mutex;
use tokio::time::Instant;
use tracing::{debug, info};
use super::process::{ClaudeCliConfig, ClaudeProcess, ProcessError};
/// Optional client header that pins a request to a specific session id.
pub const SESSION_HEADER: &str = "x-arch-claude-cli-session";
/// Default cap. The bridge is local and per-developer; this is a guard
/// against runaway memory if a client bug churns through unique session ids.
pub const DEFAULT_MAX_SESSIONS: usize = 64;
/// Tunables for the session manager.
#[derive(Debug, Clone)]
pub struct SessionManagerConfig {
pub max_sessions: usize,
pub process: ClaudeCliConfig,
}
impl Default for SessionManagerConfig {
fn default() -> Self {
Self {
max_sessions: DEFAULT_MAX_SESSIONS,
process: ClaudeCliConfig::default(),
}
}
}
/// Holds active `ClaudeProcess` handles keyed by session id.
pub struct SessionManager {
inner: Mutex<HashMap<String, Arc<ClaudeProcess>>>,
config: SessionManagerConfig,
}
impl SessionManager {
pub fn new(config: SessionManagerConfig) -> Arc<Self> {
Arc::new(Self {
inner: Mutex::new(HashMap::new()),
config,
})
}
/// Pick (or fabricate) the session id for a given request.
///
/// Strategy (in order):
/// 1. Honor the `x-arch-claude-cli-session` header if it's a non-empty
/// valid UUID-shaped string.
/// 2. Otherwise hash `(model, system_prompt_text, first_user_message_text)`
/// and produce a deterministic UUID-shaped id so retries of the same
/// conversation reuse the same process.
pub fn resolve_session_id(client_header: Option<&str>, req: &MessagesRequest) -> String {
if let Some(raw) = client_header {
let trimmed = raw.trim();
if !trimmed.is_empty() {
// Accept any opaque token; the CLI requires UUID format, so
// we hash unknown shapes into one.
if uuid::Uuid::parse_str(trimmed).is_ok() {
return trimmed.to_string();
}
return uuid_from_seed(trimmed);
}
}
let mut hasher = DefaultHasher::new();
req.model.hash(&mut hasher);
if let Some(system) = &req.system {
system_text(system).hash(&mut hasher);
}
if let Some(first) = first_user_message_text(req) {
first.hash(&mut hasher);
}
uuid_from_seed(&hasher.finish().to_string())
}
/// Get the existing session's process or spawn a new one.
pub async fn get_or_spawn(
&self,
session_id: &str,
model: &str,
system_prompt: Option<&str>,
cwd: Option<&std::path::Path>,
) -> Result<Arc<ClaudeProcess>, ProcessError> {
// Reap idle sessions on the read path so we don't need a separate
// background task for the common one-developer-one-laptop deployment.
self.evict_idle().await;
{
let map = self.inner.lock().await;
if let Some(existing) = map.get(session_id) {
debug!(session = %session_id, "reusing claude-cli session");
return Ok(Arc::clone(existing));
}
}
let mut map = self.inner.lock().await;
if let Some(existing) = map.get(session_id) {
return Ok(Arc::clone(existing));
}
if map.len() >= self.config.max_sessions {
// Evict the least-recently-used session to keep the cap honest.
if let Some(victim_key) = lru_session_id(&map).await {
if let Some(victim) = map.remove(&victim_key) {
info!(session = %victim_key, "evicting LRU claude-cli session to make room");
drop(map);
victim.shutdown().await;
map = self.inner.lock().await;
}
}
}
let process = ClaudeProcess::spawn(
session_id.to_string(),
model,
system_prompt,
cwd,
self.config.process.clone(),
)
.await?;
map.insert(session_id.to_string(), Arc::clone(&process));
Ok(process)
}
/// Drop and kill all sessions. Called on graceful shutdown.
pub async fn shutdown_all(&self) {
let mut map = self.inner.lock().await;
let drained: Vec<_> = map.drain().collect();
drop(map);
info!(count = drained.len(), "draining claude-cli sessions");
for (_, proc) in drained {
proc.shutdown().await;
}
}
async fn evict_idle(&self) {
let ttl = self.config.process.session_ttl;
if ttl.is_zero() {
return;
}
let now = Instant::now();
let mut to_kill: Vec<(String, Arc<ClaudeProcess>)> = Vec::new();
{
let map = self.inner.lock().await;
for (k, v) in map.iter() {
if now.duration_since(v.last_used().await) > ttl {
to_kill.push((k.clone(), Arc::clone(v)));
}
}
}
if to_kill.is_empty() {
return;
}
let mut map = self.inner.lock().await;
for (k, _) in &to_kill {
map.remove(k);
}
drop(map);
for (k, proc) in to_kill {
info!(session = %k, "evicting idle claude-cli session");
proc.shutdown().await;
}
}
}
async fn lru_session_id(map: &HashMap<String, Arc<ClaudeProcess>>) -> Option<String> {
let mut oldest: Option<(String, Instant)> = None;
for (k, v) in map.iter() {
let used = v.last_used().await;
match &oldest {
Some((_, t)) if *t < used => {}
_ => oldest = Some((k.clone(), used)),
}
}
oldest.map(|(k, _)| k)
}
fn first_user_message_text(req: &MessagesRequest) -> Option<String> {
for msg in &req.messages {
if msg.role != MessagesRole::User {
continue;
}
return Some(match &msg.content {
MessagesMessageContent::Single(s) => s.clone(),
MessagesMessageContent::Blocks(blocks) => blocks
.iter()
.filter_map(|b| match b {
MessagesContentBlock::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n"),
});
}
None
}
fn system_text(system: &MessagesSystemPrompt) -> String {
match system {
MessagesSystemPrompt::Single(s) => s.clone(),
MessagesSystemPrompt::Blocks(blocks) => blocks
.iter()
.filter_map(|b| match b {
MessagesContentBlock::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n"),
}
}
/// Deterministic v5-style UUID derived from an arbitrary seed string. The
/// `claude` CLI requires `--session-id` to be a valid UUID; we use the DNS
/// namespace constant as a stable salt so the same conversation always maps
/// to the same id without us pulling in the v5 feature of the `uuid` crate.
fn uuid_from_seed(seed: &str) -> String {
let mut hasher = DefaultHasher::new();
seed.hash(&mut hasher);
let h1 = hasher.finish();
let mut hasher2 = DefaultHasher::new();
h1.hash(&mut hasher2);
seed.hash(&mut hasher2);
let h2 = hasher2.finish();
let bytes = [
(h1 >> 56) as u8,
(h1 >> 48) as u8,
(h1 >> 40) as u8,
(h1 >> 32) as u8,
(h1 >> 24) as u8,
(h1 >> 16) as u8,
(h1 >> 8) as u8,
h1 as u8,
(h2 >> 56) as u8,
(h2 >> 48) as u8,
(h2 >> 40) as u8,
(h2 >> 32) as u8,
(h2 >> 24) as u8,
(h2 >> 16) as u8,
(h2 >> 8) as u8,
h2 as u8,
];
uuid::Builder::from_random_bytes(bytes)
.into_uuid()
.to_string()
}
/// `Duration::is_zero` shim — `Duration` exposes `is_zero` only on stable
/// 1.53+, but our MSRV already covers that. Re-exporting keeps call sites
/// terse if we ever need to swap implementations.
#[allow(dead_code)]
fn is_zero(d: Duration) -> bool {
d.is_zero()
}
#[cfg(test)]
mod tests {
use super::*;
use hermesllm::apis::anthropic::MessagesMessage;
fn req(model: &str, user: &str, system: Option<&str>) -> MessagesRequest {
MessagesRequest {
model: model.to_string(),
messages: vec![MessagesMessage {
role: MessagesRole::User,
content: MessagesMessageContent::Single(user.to_string()),
}],
max_tokens: 1024,
container: None,
mcp_servers: None,
system: system.map(|s| MessagesSystemPrompt::Single(s.to_string())),
metadata: None,
service_tier: None,
thinking: None,
temperature: None,
top_p: None,
top_k: None,
stream: Some(true),
stop_sequences: None,
tools: None,
tool_choice: None,
}
}
#[test]
fn header_uuid_is_used_as_is() {
let id = "550e8400-e29b-41d4-a716-446655440000";
let r = req("sonnet", "hi", None);
assert_eq!(SessionManager::resolve_session_id(Some(id), &r), id);
}
#[test]
fn header_non_uuid_is_normalized_to_uuid() {
let r = req("sonnet", "hi", None);
let id = SessionManager::resolve_session_id(Some("my-token"), &r);
assert!(uuid::Uuid::parse_str(&id).is_ok());
let id2 = SessionManager::resolve_session_id(Some("my-token"), &r);
assert_eq!(id, id2);
}
#[test]
fn empty_header_falls_back_to_hash() {
let r = req("sonnet", "hi", Some("you are helpful"));
let id = SessionManager::resolve_session_id(Some(""), &r);
assert!(uuid::Uuid::parse_str(&id).is_ok());
let id2 = SessionManager::resolve_session_id(None, &r);
assert_eq!(id, id2);
}
#[test]
fn hash_is_stable_across_repeats_and_distinct_across_inputs() {
let r1 = req("sonnet", "hello", None);
let r2 = req("sonnet", "hello", None);
let r3 = req("sonnet", "different", None);
let r4 = req("opus", "hello", None);
assert_eq!(
SessionManager::resolve_session_id(None, &r1),
SessionManager::resolve_session_id(None, &r2)
);
assert_ne!(
SessionManager::resolve_session_id(None, &r1),
SessionManager::resolve_session_id(None, &r3)
);
assert_ne!(
SessionManager::resolve_session_id(None, &r1),
SessionManager::resolve_session_id(None, &r4)
);
}
}

View file

@ -1,4 +1,5 @@
pub mod agents;
pub mod claude_cli;
pub mod debug;
pub mod function_calling;
pub mod llm;

View file

@ -4,6 +4,9 @@ static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
use brightstaff::app_state::AppState;
use brightstaff::handlers::agents::orchestrator::agent_chat;
use brightstaff::handlers::claude_cli::{
self, ClaudeCliConfig, SessionManager, SessionManagerConfig,
};
use brightstaff::handlers::debug;
use brightstaff::handlers::empty;
use brightstaff::handlers::function_calling::function_calling_chat_handler;
@ -37,6 +40,7 @@ use opentelemetry::trace::FutureExt;
use opentelemetry_http::HeaderExtractor;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use std::{env, fs};
use tokio::net::TcpListener;
use tokio::sync::RwLock;
@ -575,6 +579,57 @@ async fn run_server(state: Arc<AppState>) -> Result<(), Box<dyn std::error::Erro
Ok(())
}
// ---------------------------------------------------------------------------
// claude-cli bridge wiring
// ---------------------------------------------------------------------------
/// Build the [`SessionManagerConfig`] from environment variables. Returns
/// `None` when `CLAUDE_CLI_LISTEN_ADDR` is unset, signaling that the bridge
/// should not start at all (zero-cost when no claude-cli provider exists).
fn claude_cli_config_from_env() -> Option<(std::net::SocketAddr, SessionManagerConfig)> {
let addr_str = env::var("CLAUDE_CLI_LISTEN_ADDR").ok()?;
let addr: std::net::SocketAddr = match addr_str.parse() {
Ok(a) => a,
Err(err) => {
warn!(
value = %addr_str,
error = %err,
"invalid CLAUDE_CLI_LISTEN_ADDR — claude-cli bridge disabled"
);
return None;
}
};
let binary = env::var("CLAUDE_CLI_BIN").unwrap_or_else(|_| "claude".to_string());
let permission_mode =
env::var("CLAUDE_CLI_PERMISSION_MODE").unwrap_or_else(|_| "bypassPermissions".to_string());
let session_ttl = env::var("CLAUDE_CLI_SESSION_TTL_SECS")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.map(Duration::from_secs)
.unwrap_or_else(|| Duration::from_secs(600));
let watchdog = env::var("CLAUDE_CLI_WATCHDOG_SECS")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.map(Duration::from_secs)
.unwrap_or_else(|| Duration::from_secs(120));
let max_sessions = env::var("CLAUDE_CLI_MAX_SESSIONS")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(claude_cli::session::DEFAULT_MAX_SESSIONS);
Some((
addr,
SessionManagerConfig {
max_sessions,
process: ClaudeCliConfig {
binary,
permission_mode,
session_ttl,
watchdog,
},
},
))
}
// ---------------------------------------------------------------------------
// Entry point
// ---------------------------------------------------------------------------
@ -586,5 +641,31 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
bs_metrics::init();
info!("loaded plano_config.yaml");
let state = Arc::new(init_app_state(&config).await?);
run_server(state).await
// Optional claude-cli bridge listener. Started iff CLAUDE_CLI_LISTEN_ADDR
// is set in the environment (the Python CLI sets this when it detects a
// `model: claude-cli/*` provider entry).
let bridge_handle = if let Some((addr, cfg)) = claude_cli_config_from_env() {
let manager = SessionManager::new(cfg);
let shutdown = async {
let _ = tokio::signal::ctrl_c().await;
};
Some(tokio::spawn(async move {
if let Err(err) = claude_cli::run_listener(addr, manager, shutdown).await {
warn!(error = ?err, "claude-cli bridge listener exited with error");
}
}))
} else {
None
};
let result = run_server(state).await;
if let Some(handle) = bridge_handle {
// Ctrl-C already triggered the bridge's own shutdown; join briefly to
// give in-flight session drains a chance to finish.
let _ = tokio::time::timeout(Duration::from_secs(5), handle).await;
}
result
}

View file

@ -0,0 +1,190 @@
//! Integration test for the claude-cli bridge. Spins up the listener with a
//! fake `claude` shell script that emits a canned NDJSON sequence, then
//! verifies both the streaming SSE and non-streaming JSON code paths produce
//! the expected Anthropic Messages output.
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use brightstaff::handlers::claude_cli::{
self, ClaudeCliConfig, SessionManager, SessionManagerConfig,
};
use serde_json::{json, Value};
use tokio::net::TcpListener;
use tokio::sync::oneshot;
fn fake_claude_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("fake_claude.sh")
}
async fn pick_free_addr() -> std::net::SocketAddr {
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
drop(listener);
addr
}
struct BridgeFixture {
addr: std::net::SocketAddr,
shutdown: Option<oneshot::Sender<()>>,
handle: Option<tokio::task::JoinHandle<()>>,
}
impl BridgeFixture {
async fn start() -> Self {
let addr = pick_free_addr().await;
let (shutdown_tx, shutdown_rx) = oneshot::channel::<()>();
let manager = SessionManager::new(SessionManagerConfig {
max_sessions: 4,
process: ClaudeCliConfig {
binary: fake_claude_path().to_string_lossy().to_string(),
permission_mode: "bypassPermissions".to_string(),
session_ttl: Duration::from_secs(60),
watchdog: Duration::from_secs(5),
},
});
let manager_for_listener = Arc::clone(&manager);
let handle = tokio::spawn(async move {
let shutdown = async move {
let _ = shutdown_rx.await;
};
if let Err(err) = claude_cli::run_listener(addr, manager_for_listener, shutdown).await {
eprintln!("listener exited with error: {err}");
}
});
// Wait for the listener to bind. Loop until we can connect.
for _ in 0..50 {
if tokio::net::TcpStream::connect(addr).await.is_ok() {
break;
}
tokio::time::sleep(Duration::from_millis(20)).await;
}
Self {
addr,
shutdown: Some(shutdown_tx),
handle: Some(handle),
}
}
async fn stop(mut self) {
if let Some(tx) = self.shutdown.take() {
let _ = tx.send(());
}
if let Some(h) = self.handle.take() {
let _ = tokio::time::timeout(Duration::from_secs(3), h).await;
}
}
}
fn anthropic_request(stream: bool) -> Value {
json!({
"model": "claude-cli/sonnet",
"max_tokens": 64,
"stream": stream,
"messages": [
{"role": "user", "content": "say hi"}
]
})
}
#[tokio::test]
async fn streaming_request_emits_anthropic_sse() {
let fixture = BridgeFixture::start().await;
let url = format!("http://{}/v1/messages", fixture.addr);
let client = reqwest::Client::new();
let resp = client
.post(&url)
.json(&anthropic_request(true))
.send()
.await
.expect("send request");
assert_eq!(resp.status(), 200);
let ct = resp
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string();
assert!(
ct.starts_with("text/event-stream"),
"expected text/event-stream, got {ct}"
);
let body = resp.text().await.expect("read body");
// SSE event names should mirror Anthropic's wire format, in order.
let events: Vec<&str> = body
.lines()
.filter_map(|l| l.strip_prefix("event: "))
.collect();
assert_eq!(
events,
vec![
"message_start",
"content_block_start",
"content_block_delta",
"content_block_delta",
"content_block_stop",
"message_delta",
"message_stop",
],
"unexpected SSE event sequence:\n{body}"
);
// The two text deltas should reconstruct "Hello, world!".
let mut combined = String::new();
for line in body.lines() {
if let Some(payload) = line.strip_prefix("data: ") {
if let Ok(v) = serde_json::from_str::<Value>(payload) {
if v.get("type").and_then(|t| t.as_str()) == Some("content_block_delta") {
if let Some(text) = v
.get("delta")
.and_then(|d| d.get("text"))
.and_then(|t| t.as_str())
{
combined.push_str(text);
}
}
}
}
}
assert_eq!(combined, "Hello, world!");
fixture.stop().await;
}
#[tokio::test]
async fn non_streaming_request_returns_messages_response() {
let fixture = BridgeFixture::start().await;
let url = format!("http://{}/v1/messages", fixture.addr);
let client = reqwest::Client::new();
let resp = client
.post(&url)
.json(&anthropic_request(false))
.send()
.await
.expect("send request");
assert_eq!(resp.status(), 200);
let body: Value = resp.json().await.expect("parse json");
assert_eq!(body["type"], "message");
assert_eq!(body["role"], "assistant");
assert_eq!(body["stop_reason"], "end_turn");
assert_eq!(body["usage"]["input_tokens"], 3);
assert_eq!(body["usage"]["output_tokens"], 4);
let content = body["content"].as_array().expect("content array");
assert_eq!(content.len(), 1);
assert_eq!(content[0]["type"], "text");
assert_eq!(content[0]["text"], "Hello, world!");
fixture.stop().await;
}

View file

@ -0,0 +1,26 @@
#!/usr/bin/env bash
# Stand-in for the real `claude` CLI used by the brightstaff integration test.
# Reads stdin (so it does not exit early when the bridge writes the user
# JSONL turn) and emits a canned `--output-format stream-json` NDJSON
# sequence that mirrors a one-turn "Hello, world!" response.
#
# All CLI flags are accepted and ignored; only the NDJSON output matters for
# the bridge-side translation.
set -euo pipefail
# Drain any stdin the parent writes so it does not see EPIPE.
( cat > /dev/null ) &
DRAIN_PID=$!
trap 'kill ${DRAIN_PID} 2>/dev/null || true' EXIT
cat <<'EOF'
{"type":"system","subtype":"init","session_id":"fake-session","model":"sonnet","cwd":"/tmp","tools":[]}
{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_fake","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"output_tokens":0}}}}
{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":", world!"}}}
{"type":"stream_event","event":{"type":"content_block_stop","index":0}}
{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":4}}}
{"type":"stream_event","event":{"type":"message_stop"}}
{"type":"result","subtype":"success","is_error":false,"duration_ms":12,"num_turns":1,"result":"Hello, world!","total_cost_usd":0.0001,"usage":{"input_tokens":3,"output_tokens":4},"session_id":"fake-session"}
EOF

View file

@ -400,6 +400,10 @@ pub enum LlmProviderType {
Vercel,
#[serde(rename = "openrouter")]
OpenRouter,
/// Claude Code CLI invoked as a local subprocess. The bridge runs inside
/// brightstaff (`CLAUDE_CLI_LISTEN_ADDR`) and exposes Anthropic Messages.
#[serde(rename = "claude-cli")]
ClaudeCli,
}
impl Display for LlmProviderType {
@ -425,6 +429,7 @@ impl Display for LlmProviderType {
LlmProviderType::DigitalOcean => write!(f, "digitalocean"),
LlmProviderType::Vercel => write!(f, "vercel"),
LlmProviderType::OpenRouter => write!(f, "openrouter"),
LlmProviderType::ClaudeCli => write!(f, "claude-cli"),
}
}
}
@ -772,6 +777,7 @@ mod test {
for (yaml_value, expected) in [
("vercel", LlmProviderType::Vercel),
("openrouter", LlmProviderType::OpenRouter),
("claude-cli", LlmProviderType::ClaudeCli),
] {
let parsed: LlmProviderType =
serde_yaml::from_str(yaml_value).expect("variant should deserialize");

View file

@ -0,0 +1,955 @@
//! Translation between Anthropic Messages API and Claude Code CLI's
//! `--output-format stream-json` / `--input-format stream-json` wire format.
//!
//! Claude Code CLI is invoked as a subprocess by `brightstaff` with flags such
//! as `claude -p --output-format stream-json --input-format stream-json
//! --include-partial-messages --verbose`. Each line on stdout is one JSON event
//! (NDJSON), and each line on stdin is a user-message JSON. This module owns
//! the pure (no-I/O) types and conversions; the runtime layer in brightstaff
//! does the actual spawning and streaming.
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use serde_with::skip_serializing_none;
use thiserror::Error;
use uuid::Uuid;
use crate::apis::anthropic::{
MessagesContentBlock, MessagesContentDelta, MessagesMessage, MessagesMessageContent,
MessagesMessageDelta, MessagesRequest, MessagesResponse, MessagesRole, MessagesStopReason,
MessagesStreamEvent, MessagesStreamMessage, MessagesSystemPrompt, MessagesUsage,
};
/// Errors produced by translation between Anthropic Messages and Claude Code
/// stream-json.
#[derive(Debug, Error)]
pub enum ClaudeCliTranslationError {
#[error("Claude CLI returned an error: {message}")]
CliError { message: String },
#[error("Failed to serialize stdin payload: {0}")]
SerializeStdin(#[from] serde_json::Error),
#[error("Claude CLI stream ended before a terminal `result` event")]
UnexpectedEnd,
}
// ---------------------------------------------------------------------------
// Wire types — output (Claude CLI -> us)
// ---------------------------------------------------------------------------
/// One line of NDJSON emitted on stdout by `claude -p --output-format
/// stream-json`. The CLI tags variants with a top-level `type` field, and
/// `system`/`result` carry an additional `subtype`.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ClaudeCliEvent {
/// `type=system` events. The actual classification lives in `subtype`
/// (e.g. `init`, `api_retry`, `rate_limit_event`). We keep the raw fields
/// rather than enumerating subtypes so a new CLI release that adds a
/// subtype does not break parsing.
System {
#[serde(default)]
subtype: Option<String>,
#[serde(default)]
session_id: Option<String>,
#[serde(default)]
model: Option<String>,
#[serde(default)]
cwd: Option<String>,
#[serde(flatten)]
extra: Value,
},
/// A complete assistant message (emitted after the corresponding
/// `stream_event` deltas finish). Useful for non-streaming consumers.
Assistant { message: ClaudeCliAssistantMessage },
/// A complete user message echoed back (when `--replay-user-messages` is
/// set). We currently ignore these in translation but keep the variant so
/// stray events do not cause deserialization failures.
User {
#[serde(default)]
message: Value,
},
/// Wrapped Anthropic SSE event. The CLI re-emits the raw streaming-API
/// shape here when `--include-partial-messages` is enabled.
StreamEvent { event: MessagesStreamEvent },
/// Terminal event marking the end of one CLI turn. `is_error == true`
/// means the underlying API call failed; `result` typically holds the
/// final assistant text or an error message.
Result {
#[serde(default)]
subtype: Option<String>,
#[serde(default)]
is_error: bool,
#[serde(default)]
duration_ms: Option<u64>,
#[serde(default)]
num_turns: Option<u32>,
#[serde(default)]
result: Option<String>,
#[serde(default)]
total_cost_usd: Option<f64>,
#[serde(default)]
usage: Option<ClaudeCliUsage>,
#[serde(default)]
session_id: Option<String>,
},
/// Catch-all for events the CLI may add in the future. We surface them in
/// logs but do not translate them to Anthropic events.
#[serde(other)]
Unknown,
}
/// Subset of the Anthropic message shape the CLI emits inside `assistant`
/// events. We keep `content` as `Value` so we can decode text + tool_use
/// blocks without re-deriving every Anthropic content variant here.
#[skip_serializing_none]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ClaudeCliAssistantMessage {
pub id: Option<String>,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub role: Option<String>,
#[serde(default)]
pub content: Vec<ClaudeCliContentBlock>,
#[serde(default)]
pub stop_reason: Option<String>,
#[serde(default)]
pub stop_sequence: Option<String>,
#[serde(default)]
pub usage: Option<ClaudeCliUsage>,
}
/// The CLI's `assistant.message.content[]` entries are a subset of Anthropic's
/// content blocks. We deserialize them into `MessagesContentBlock` directly
/// where possible and fall back to a tagged enum for the few fields we care
/// about explicitly (text + tool_use).
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum ClaudeCliContentBlock {
/// Anthropic-shaped content block (text, tool_use, thinking, ...).
Anthropic(MessagesContentBlock),
/// Anything we do not recognize is preserved as raw JSON so we can still
/// surface it in the `result` aggregation.
Unknown(Value),
}
#[skip_serializing_none]
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ClaudeCliUsage {
#[serde(default)]
pub input_tokens: Option<u32>,
#[serde(default)]
pub output_tokens: Option<u32>,
#[serde(default)]
pub cache_creation_input_tokens: Option<u32>,
#[serde(default)]
pub cache_read_input_tokens: Option<u32>,
}
impl From<ClaudeCliUsage> for MessagesUsage {
fn from(u: ClaudeCliUsage) -> Self {
MessagesUsage {
input_tokens: u.input_tokens.unwrap_or(0),
output_tokens: u.output_tokens.unwrap_or(0),
cache_creation_input_tokens: u.cache_creation_input_tokens,
cache_read_input_tokens: u.cache_read_input_tokens,
}
}
}
// ---------------------------------------------------------------------------
// Wire types — input (us -> Claude CLI)
// ---------------------------------------------------------------------------
/// One line of NDJSON written to the CLI's stdin when invoked with
/// `--input-format stream-json`.
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ClaudeCliInputEvent {
User {
message: ClaudeCliUserMessage,
/// The session id assigned by the CLI on first turn. Optional on the
/// first message; required (and must match) on subsequent turns.
#[serde(skip_serializing_if = "Option::is_none")]
session_id: Option<String>,
},
}
#[derive(Debug, Clone, Serialize)]
pub struct ClaudeCliUserMessage {
pub role: &'static str,
pub content: Value,
}
// ---------------------------------------------------------------------------
// Conversions
// ---------------------------------------------------------------------------
/// Map a `MessagesRequest` into the JSONL payload that should be written to
/// the CLI's stdin. Returns one event per user turn, in order, so callers can
/// either replay the full conversation on first spawn or send only the latest
/// turn for a hot session.
///
/// `session_id` (when set) is attached to every event so the CLI can verify
/// the turn belongs to the expected session.
pub fn messages_request_to_stdin_payload(
req: &MessagesRequest,
session_id: Option<&str>,
) -> Result<Vec<ClaudeCliInputEvent>, ClaudeCliTranslationError> {
let mut out = Vec::new();
for msg in &req.messages {
if msg.role != MessagesRole::User {
// Assistant turns are managed by the CLI internally; we skip them.
continue;
}
let content = message_content_to_cli_value(&msg.content);
out.push(ClaudeCliInputEvent::User {
message: ClaudeCliUserMessage {
role: "user",
content,
},
session_id: session_id.map(|s| s.to_string()),
});
}
Ok(out)
}
/// Build the `--append-system-prompt` value that should be passed when
/// spawning the CLI for this request. Returns `None` when the request has no
/// system prompt.
pub fn extract_system_prompt(req: &MessagesRequest) -> Option<String> {
req.system.as_ref().map(|s| match s {
MessagesSystemPrompt::Single(text) => text.clone(),
MessagesSystemPrompt::Blocks(blocks) => blocks
.iter()
.filter_map(|b| match b {
MessagesContentBlock::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n"),
})
}
fn message_content_to_cli_value(content: &MessagesMessageContent) -> Value {
match content {
MessagesMessageContent::Single(s) => Value::String(s.clone()),
MessagesMessageContent::Blocks(blocks) => {
// Preserve the structured block array so tool_result / image
// blocks survive intact across the stdin boundary.
serde_json::to_value(blocks).unwrap_or_else(|_| Value::Array(vec![]))
}
}
}
/// Translate a single CLI event into a corresponding Anthropic
/// `MessagesStreamEvent`, when one exists. Returns `None` for events that
/// have no SSE counterpart (CLI-internal `system` notifications, terminal
/// `result`, unrecognized variants, ...).
pub fn cli_event_to_messages_stream_event(ev: &ClaudeCliEvent) -> Option<MessagesStreamEvent> {
match ev {
ClaudeCliEvent::StreamEvent { event } => Some(event.clone()),
_ => None,
}
}
/// Aggregate a sequence of CLI events into a single non-streaming
/// `MessagesResponse`. Used by the bridge when the client did not request
/// streaming.
///
/// The terminal `result` event is required: if the iterator ends without one,
/// we return [`ClaudeCliTranslationError::UnexpectedEnd`].
pub fn collect_to_messages_response<I>(
model: &str,
events: I,
) -> Result<MessagesResponse, ClaudeCliTranslationError>
where
I: IntoIterator<Item = ClaudeCliEvent>,
{
let mut content_blocks: Vec<MessagesContentBlock> = Vec::new();
// Accumulate per-index text deltas + tool-use input deltas as the CLI
// emits content_block_start -> content_block_delta(s) -> content_block_stop.
let mut text_accum: std::collections::HashMap<u32, String> = std::collections::HashMap::new();
let mut tool_accum: std::collections::HashMap<u32, (String, String, String)> =
std::collections::HashMap::new();
let mut block_order: Vec<(u32, BlockKind)> = Vec::new();
let mut stop_reason = MessagesStopReason::EndTurn;
let mut stop_sequence: Option<String> = None;
let mut usage = MessagesUsage {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
};
let mut id = String::new();
let mut model_out = model.to_string();
let mut last_assistant_message: Option<ClaudeCliAssistantMessage> = None;
let mut saw_result = false;
let mut error_message: Option<String> = None;
for ev in events {
match ev {
ClaudeCliEvent::StreamEvent { event } => match event {
MessagesStreamEvent::MessageStart { message } => {
if id.is_empty() {
id = message.id.clone();
}
if !message.model.is_empty() {
model_out = message.model.clone();
}
usage = message.usage.clone();
}
MessagesStreamEvent::ContentBlockStart {
index,
content_block,
} => match content_block {
MessagesContentBlock::Text { text, .. } => {
text_accum.insert(index, text);
block_order.push((index, BlockKind::Text));
}
MessagesContentBlock::ToolUse {
id: tool_id, name, ..
} => {
// Anthropic streaming always starts a tool_use block
// with an empty `input` placeholder (`{}` or `null`);
// the real arguments arrive via `input_json_delta`s.
// Always start with an empty buffer so deltas
// assemble into valid JSON.
tool_accum.insert(index, (tool_id, name, String::new()));
block_order.push((index, BlockKind::ToolUse));
}
other => {
// Unknown block kind — preserve verbatim by pushing it
// immediately. We do not expect deltas for this index.
content_blocks.push(other);
}
},
MessagesStreamEvent::ContentBlockDelta { index, delta } => match delta {
MessagesContentDelta::TextDelta { text } => {
text_accum.entry(index).or_default().push_str(&text);
}
MessagesContentDelta::InputJsonDelta { partial_json } => {
if let Some((_, _, buf)) = tool_accum.get_mut(&index) {
buf.push_str(&partial_json);
}
}
// Thinking/signature deltas are surfaced to streaming
// clients but dropped from the non-streaming aggregate.
_ => {}
},
MessagesStreamEvent::ContentBlockStop { .. } => {}
MessagesStreamEvent::MessageDelta {
delta,
usage: msg_usage,
} => {
let MessagesMessageDelta {
stop_reason: sr,
stop_sequence: ss,
} = delta;
stop_reason = sr;
stop_sequence = ss;
// The MessageDelta usage carries final output_tokens.
usage.output_tokens = msg_usage.output_tokens;
}
MessagesStreamEvent::MessageStop | MessagesStreamEvent::Ping => {}
},
ClaudeCliEvent::Assistant { message } => {
last_assistant_message = Some(message);
}
ClaudeCliEvent::Result {
is_error,
result,
usage: result_usage,
..
} => {
saw_result = true;
if is_error {
error_message = Some(result.unwrap_or_else(|| "Claude CLI failed".to_string()));
}
if let Some(u) = result_usage {
let merged: MessagesUsage = u.into();
if merged.input_tokens > 0 {
usage.input_tokens = merged.input_tokens;
}
if merged.output_tokens > 0 {
usage.output_tokens = merged.output_tokens;
}
if merged.cache_creation_input_tokens.is_some() {
usage.cache_creation_input_tokens = merged.cache_creation_input_tokens;
}
if merged.cache_read_input_tokens.is_some() {
usage.cache_read_input_tokens = merged.cache_read_input_tokens;
}
}
}
ClaudeCliEvent::System { .. }
| ClaudeCliEvent::User { .. }
| ClaudeCliEvent::Unknown => {}
}
}
if let Some(msg) = error_message {
return Err(ClaudeCliTranslationError::CliError { message: msg });
}
if !saw_result {
return Err(ClaudeCliTranslationError::UnexpectedEnd);
}
// Materialize accumulated blocks in the order they were started.
let mut sorted_indices = block_order.clone();
sorted_indices.sort_by_key(|(idx, _)| *idx);
for (idx, kind) in sorted_indices {
match kind {
BlockKind::Text => {
if let Some(text) = text_accum.remove(&idx) {
content_blocks.push(MessagesContentBlock::Text {
text,
cache_control: None,
});
}
}
BlockKind::ToolUse => {
if let Some((tool_id, name, raw_input)) = tool_accum.remove(&idx) {
let input_value = if raw_input.is_empty() {
Value::Object(Default::default())
} else {
serde_json::from_str(&raw_input)
.unwrap_or_else(|_| Value::String(raw_input))
};
content_blocks.push(MessagesContentBlock::ToolUse {
id: tool_id,
name,
input: input_value,
cache_control: None,
});
}
}
}
}
// If the streaming events did not include any content but the CLI sent a
// final `assistant` message (common for short responses), use that as the
// body of the response.
if content_blocks.is_empty() {
if let Some(msg) = last_assistant_message {
for block in msg.content {
if let ClaudeCliContentBlock::Anthropic(b) = block {
content_blocks.push(b);
}
}
if id.is_empty() {
if let Some(msg_id) = msg.id {
id = msg_id;
}
}
if let Some(m) = msg.model {
if !m.is_empty() {
model_out = m;
}
}
if let Some(u) = msg.usage {
let merged: MessagesUsage = u.into();
if usage.input_tokens == 0 {
usage.input_tokens = merged.input_tokens;
}
if usage.output_tokens == 0 {
usage.output_tokens = merged.output_tokens;
}
if usage.cache_creation_input_tokens.is_none() {
usage.cache_creation_input_tokens = merged.cache_creation_input_tokens;
}
if usage.cache_read_input_tokens.is_none() {
usage.cache_read_input_tokens = merged.cache_read_input_tokens;
}
}
}
}
if id.is_empty() {
id = format!("msg_cli_{}", Uuid::new_v4().simple());
}
Ok(MessagesResponse {
id,
obj_type: "message".to_string(),
role: MessagesRole::Assistant,
content: content_blocks,
model: model_out,
stop_reason,
stop_sequence,
usage,
container: None,
})
}
#[derive(Clone, Copy)]
enum BlockKind {
Text,
ToolUse,
}
/// Build an Anthropic-style error envelope JSON for a CLI-level failure. The
/// brightstaff bridge serializes this and returns it with a 502/500 status so
/// the existing `llm_gateway` error handling sees a familiar shape.
pub fn cli_error_to_anthropic_error_body(message: &str) -> Value {
json!({
"type": "error",
"error": {
"type": "claude_cli_error",
"message": message,
}
})
}
/// Synthesize a `message_start` event for streaming clients in cases where
/// the CLI did not emit one (it usually does, but very small turns can skip
/// straight to `assistant`/`result`).
pub fn synthetic_message_start(model: &str, session_id: Option<&str>) -> MessagesStreamEvent {
let id = session_id
.map(|s| format!("msg_cli_{}", s))
.unwrap_or_else(|| format!("msg_cli_{}", Uuid::new_v4().simple()));
MessagesStreamEvent::MessageStart {
message: MessagesStreamMessage {
id,
obj_type: "message".to_string(),
role: MessagesRole::Assistant,
content: Vec::new(),
model: model.to_string(),
stop_reason: None,
stop_sequence: None,
usage: MessagesUsage {
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
},
},
}
}
/// Convenience: parse one NDJSON line into a `ClaudeCliEvent`. Whitespace-only
/// lines deserialize to `None` so callers can simply skip them.
pub fn parse_ndjson_line(line: &str) -> Option<Result<ClaudeCliEvent, serde_json::Error>> {
let trimmed = line.trim();
if trimmed.is_empty() {
return None;
}
Some(serde_json::from_str(trimmed))
}
// Unused helper to keep MessagesMessage in scope in case future tool_result
// translation needs to reach into the message shape directly.
#[allow(dead_code)]
fn _touch_messages_message_type(_m: MessagesMessage) {}
#[cfg(test)]
mod tests {
use super::*;
use crate::apis::anthropic::{MessagesMessage, MessagesMessageContent};
fn user_request(text: &str) -> MessagesRequest {
MessagesRequest {
model: "claude-cli/sonnet".to_string(),
messages: vec![MessagesMessage {
role: MessagesRole::User,
content: MessagesMessageContent::Single(text.to_string()),
}],
max_tokens: 1024,
container: None,
mcp_servers: None,
system: None,
metadata: None,
service_tier: None,
thinking: None,
temperature: None,
top_p: None,
top_k: None,
stream: Some(true),
stop_sequences: None,
tools: None,
tool_choice: None,
}
}
#[test]
fn parses_system_init_event() {
let line = r#"{"type":"system","subtype":"init","session_id":"s1","model":"sonnet","cwd":"/tmp","tools":[]}"#;
let parsed = parse_ndjson_line(line).expect("non-empty").expect("ok");
match parsed {
ClaudeCliEvent::System {
subtype,
session_id,
model,
..
} => {
assert_eq!(subtype.as_deref(), Some("init"));
assert_eq!(session_id.as_deref(), Some("s1"));
assert_eq!(model.as_deref(), Some("sonnet"));
}
other => panic!("expected System, got {other:?}"),
}
}
#[test]
fn parses_text_stream_event() {
let line = r#"{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}}"#;
let parsed = parse_ndjson_line(line).unwrap().unwrap();
let translated = cli_event_to_messages_stream_event(&parsed)
.expect("text_delta should translate to MessagesStreamEvent");
match translated {
MessagesStreamEvent::ContentBlockDelta { index, delta } => {
assert_eq!(index, 0);
match delta {
MessagesContentDelta::TextDelta { text } => assert_eq!(text, "hi"),
other => panic!("expected TextDelta, got {other:?}"),
}
}
other => panic!("expected ContentBlockDelta, got {other:?}"),
}
}
#[test]
fn parses_result_success_event() {
let line = r#"{"type":"result","subtype":"success","is_error":false,"duration_ms":12,"num_turns":1,"result":"hi","total_cost_usd":0.001,"usage":{"input_tokens":4,"output_tokens":2},"session_id":"s1"}"#;
let parsed = parse_ndjson_line(line).unwrap().unwrap();
match parsed {
ClaudeCliEvent::Result {
is_error,
result,
usage,
..
} => {
assert!(!is_error);
assert_eq!(result.as_deref(), Some("hi"));
assert_eq!(usage.unwrap().output_tokens, Some(2));
}
other => panic!("expected Result, got {other:?}"),
}
}
#[test]
fn unknown_event_type_does_not_break_parser() {
let line = r#"{"type":"future_event_kind","data":{"foo":"bar"},"another":42}"#;
let parsed = parse_ndjson_line(line).unwrap().unwrap();
assert!(matches!(parsed, ClaudeCliEvent::Unknown));
}
#[test]
fn stdin_payload_skips_assistant_turns() {
let mut req = user_request("hello");
req.messages.push(MessagesMessage {
role: MessagesRole::Assistant,
content: MessagesMessageContent::Single("hi back".to_string()),
});
req.messages.push(MessagesMessage {
role: MessagesRole::User,
content: MessagesMessageContent::Single("how are you?".to_string()),
});
let payload = messages_request_to_stdin_payload(&req, Some("s1")).unwrap();
assert_eq!(payload.len(), 2);
for ev in &payload {
match ev {
ClaudeCliInputEvent::User {
message,
session_id,
} => {
assert_eq!(message.role, "user");
assert_eq!(session_id.as_deref(), Some("s1"));
}
}
}
}
#[test]
fn collect_to_messages_response_aggregates_text() {
let events = vec![
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::MessageStart {
message: MessagesStreamMessage {
id: "msg_1".to_string(),
obj_type: "message".to_string(),
role: MessagesRole::Assistant,
content: vec![],
model: "claude-sonnet-4-6".to_string(),
stop_reason: None,
stop_sequence: None,
usage: MessagesUsage {
input_tokens: 7,
output_tokens: 0,
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
},
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockStart {
index: 0,
content_block: MessagesContentBlock::Text {
text: String::new(),
cache_control: None,
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockDelta {
index: 0,
delta: MessagesContentDelta::TextDelta {
text: "Hello ".to_string(),
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockDelta {
index: 0,
delta: MessagesContentDelta::TextDelta {
text: "world".to_string(),
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockStop { index: 0 },
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::MessageDelta {
delta: MessagesMessageDelta {
stop_reason: MessagesStopReason::EndTurn,
stop_sequence: None,
},
usage: MessagesUsage {
input_tokens: 0,
output_tokens: 12,
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::MessageStop,
},
ClaudeCliEvent::Result {
subtype: Some("success".to_string()),
is_error: false,
duration_ms: Some(123),
num_turns: Some(1),
result: Some("Hello world".to_string()),
total_cost_usd: Some(0.001),
usage: Some(ClaudeCliUsage {
input_tokens: Some(7),
output_tokens: Some(12),
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
}),
session_id: Some("s1".to_string()),
},
];
let resp = collect_to_messages_response("claude-cli/sonnet", events).unwrap();
assert_eq!(resp.id, "msg_1");
assert_eq!(resp.model, "claude-sonnet-4-6");
assert_eq!(resp.usage.input_tokens, 7);
assert_eq!(resp.usage.output_tokens, 12);
assert!(matches!(resp.stop_reason, MessagesStopReason::EndTurn));
match &resp.content[..] {
[MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "Hello world"),
other => panic!("expected single Text block, got {other:?}"),
}
}
#[test]
fn collect_to_messages_response_aggregates_tool_use() {
let events = vec![
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::MessageStart {
message: MessagesStreamMessage {
id: "msg_2".to_string(),
obj_type: "message".to_string(),
role: MessagesRole::Assistant,
content: vec![],
model: "sonnet".to_string(),
stop_reason: None,
stop_sequence: None,
usage: MessagesUsage {
input_tokens: 1,
output_tokens: 0,
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
},
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockStart {
index: 0,
content_block: MessagesContentBlock::ToolUse {
id: "toolu_1".to_string(),
name: "get_weather".to_string(),
input: Value::Null,
cache_control: None,
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockDelta {
index: 0,
delta: MessagesContentDelta::InputJsonDelta {
partial_json: "{\"loc\":\"".to_string(),
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockDelta {
index: 0,
delta: MessagesContentDelta::InputJsonDelta {
partial_json: "SF\"}".to_string(),
},
},
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::ContentBlockStop { index: 0 },
},
ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::MessageDelta {
delta: MessagesMessageDelta {
stop_reason: MessagesStopReason::ToolUse,
stop_sequence: None,
},
usage: MessagesUsage {
input_tokens: 0,
output_tokens: 5,
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
},
},
},
ClaudeCliEvent::Result {
subtype: Some("success".to_string()),
is_error: false,
duration_ms: None,
num_turns: Some(1),
result: None,
total_cost_usd: None,
usage: None,
session_id: None,
},
];
let resp = collect_to_messages_response("sonnet", events).unwrap();
assert!(matches!(resp.stop_reason, MessagesStopReason::ToolUse));
match &resp.content[..] {
[MessagesContentBlock::ToolUse {
id, name, input, ..
}] => {
assert_eq!(id, "toolu_1");
assert_eq!(name, "get_weather");
assert_eq!(input["loc"], "SF");
}
other => panic!("expected ToolUse block, got {other:?}"),
}
}
#[test]
fn collect_to_messages_response_propagates_cli_error() {
let events = vec![ClaudeCliEvent::Result {
subtype: Some("error".to_string()),
is_error: true,
duration_ms: Some(5),
num_turns: Some(0),
result: Some("auth failed".to_string()),
total_cost_usd: None,
usage: None,
session_id: None,
}];
let err = collect_to_messages_response("sonnet", events).unwrap_err();
match err {
ClaudeCliTranslationError::CliError { message } => {
assert!(message.contains("auth failed"));
}
other => panic!("expected CliError, got {other:?}"),
}
}
#[test]
fn collect_to_messages_response_unexpected_end() {
let events: Vec<ClaudeCliEvent> = vec![ClaudeCliEvent::StreamEvent {
event: MessagesStreamEvent::Ping,
}];
let err = collect_to_messages_response("sonnet", events).unwrap_err();
assert!(matches!(err, ClaudeCliTranslationError::UnexpectedEnd));
}
#[test]
fn collect_to_messages_response_uses_assistant_when_no_deltas() {
let assistant_msg = ClaudeCliAssistantMessage {
id: Some("msg_3".to_string()),
model: Some("sonnet".to_string()),
role: Some("assistant".to_string()),
content: vec![ClaudeCliContentBlock::Anthropic(
MessagesContentBlock::Text {
text: "ok".to_string(),
cache_control: None,
},
)],
stop_reason: Some("end_turn".to_string()),
stop_sequence: None,
usage: Some(ClaudeCliUsage {
input_tokens: Some(2),
output_tokens: Some(1),
cache_creation_input_tokens: None,
cache_read_input_tokens: None,
}),
};
let events = vec![
ClaudeCliEvent::Assistant {
message: assistant_msg,
},
ClaudeCliEvent::Result {
subtype: Some("success".to_string()),
is_error: false,
duration_ms: None,
num_turns: Some(1),
result: None,
total_cost_usd: None,
usage: None,
session_id: None,
},
];
let resp = collect_to_messages_response("sonnet", events).unwrap();
assert_eq!(resp.id, "msg_3");
assert_eq!(resp.usage.input_tokens, 2);
assert_eq!(resp.usage.output_tokens, 1);
match &resp.content[..] {
[MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "ok"),
other => panic!("expected Text, got {other:?}"),
}
}
#[test]
fn extract_system_prompt_blocks_join_text() {
let req = MessagesRequest {
system: Some(MessagesSystemPrompt::Blocks(vec![
MessagesContentBlock::Text {
text: "line 1".to_string(),
cache_control: None,
},
MessagesContentBlock::Text {
text: "line 2".to_string(),
cache_control: None,
},
])),
..user_request("ignored")
};
assert_eq!(
extract_system_prompt(&req).as_deref(),
Some("line 1\nline 2")
);
}
#[test]
fn tool_result_content_round_trips_through_translation() {
// Sanity-check that ToolResultContent (used by future tool_result
// translation) stays linkable as the surface evolves.
use crate::apis::anthropic::ToolResultContent;
let _ = ToolResultContent::Text("noop".to_string());
}
}

View file

@ -1,5 +1,6 @@
pub mod amazon_bedrock;
pub mod anthropic;
pub mod claude_cli;
pub mod openai;
pub mod openai_responses;
pub mod streaming_shapes;

View file

@ -92,6 +92,19 @@ providers:
- mistralai/mistral-embed
- mistralai/codestral-embed
- mistralai/codestral-embed-2505
claude-cli:
# Family aliases (always resolve to the latest model in the family).
- claude-cli/sonnet
- claude-cli/opus
- claude-cli/haiku
# Dated full ids (sourced from the Claude Code model configuration article;
# refresh by re-fetching that doc whenever Anthropic ships new models).
- claude-cli/claude-opus-4-7
- claude-cli/claude-sonnet-4-6
- claude-cli/claude-opus-4-6
- claude-cli/claude-opus-4-5-20251101
- claude-cli/claude-haiku-4-5-20251001
- claude-cli/claude-sonnet-4-5-20250929
anthropic:
- anthropic/claude-sonnet-4-6
- anthropic/claude-opus-4-6

View file

@ -48,6 +48,11 @@ pub enum ProviderId {
DigitalOcean,
Vercel,
OpenRouter,
/// Claude Code CLI invoked as a local subprocess by brightstaff. On the
/// wire it speaks the Anthropic Messages API exactly like
/// [`ProviderId::Anthropic`]; the difference is that no Anthropic API key
/// or network call is involved — the local `claude` binary is.
ClaudeCli,
}
impl TryFrom<&str> for ProviderId {
@ -81,6 +86,8 @@ impl TryFrom<&str> for ProviderId {
"do_ai" => Ok(ProviderId::DigitalOcean), // alias
"vercel" => Ok(ProviderId::Vercel),
"openrouter" => Ok(ProviderId::OpenRouter),
"claude-cli" => Ok(ProviderId::ClaudeCli),
"claude_cli" => Ok(ProviderId::ClaudeCli), // alias
_ => Err(format!("Unknown provider: {}", value)),
}
}
@ -107,6 +114,7 @@ impl ProviderId {
ProviderId::Qwen => "qwen",
ProviderId::ChatGPT => "chatgpt",
ProviderId::DigitalOcean => "digitalocean",
ProviderId::ClaudeCli => "claude-cli",
_ => return Vec::new(),
};
@ -144,6 +152,14 @@ impl ProviderId {
SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions)
}
// ClaudeCli speaks the same wire protocol as Anthropic — the
// brightstaff bridge always presents itself as an Anthropic
// Messages API endpoint, so client requests in any shape get
// converted to AnthropicMessagesAPI on the way out.
(ProviderId::ClaudeCli, _) => {
SupportedUpstreamAPIs::AnthropicMessagesAPI(AnthropicApi::Messages)
}
// Vercel AI Gateway natively supports all three API types
(ProviderId::Vercel, SupportedAPIsFromClient::AnthropicMessagesAPI(_)) => {
SupportedUpstreamAPIs::AnthropicMessagesAPI(AnthropicApi::Messages)
@ -267,6 +283,7 @@ impl Display for ProviderId {
ProviderId::DigitalOcean => write!(f, "digitalocean"),
ProviderId::Vercel => write!(f, "vercel"),
ProviderId::OpenRouter => write!(f, "openrouter"),
ProviderId::ClaudeCli => write!(f, "claude-cli"),
}
}
}

View file

@ -0,0 +1,114 @@
//! End-to-end fixture tests for `apis::claude_cli`. Each NDJSON file under
//! `tests/fixtures/claude_cli/` represents one canned subprocess output. We
//! parse it line-by-line and feed it through the same translation entry points
//! the brightstaff bridge uses at runtime.
use std::fs;
use std::path::PathBuf;
use hermesllm::apis::anthropic::{
MessagesContentBlock, MessagesContentDelta, MessagesStopReason, MessagesStreamEvent,
};
use hermesllm::apis::claude_cli::{
cli_event_to_messages_stream_event, collect_to_messages_response, parse_ndjson_line,
ClaudeCliEvent, ClaudeCliTranslationError,
};
fn fixture_path(name: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("claude_cli")
.join(name)
}
fn load_events(name: &str) -> Vec<ClaudeCliEvent> {
let body = fs::read_to_string(fixture_path(name))
.unwrap_or_else(|e| panic!("read fixture {name}: {e}"));
body.lines()
.filter_map(|line| parse_ndjson_line(line).map(|r| r.unwrap_or_else(|e| panic!("{e}"))))
.collect()
}
#[test]
fn text_response_aggregates_into_messages_response() {
let events = load_events("text_response.ndjson");
let resp = collect_to_messages_response("claude-cli/sonnet", events.clone()).unwrap();
assert_eq!(resp.id, "msg_01ABC");
assert_eq!(resp.model, "claude-sonnet-4-6");
assert_eq!(resp.usage.input_tokens, 12);
assert_eq!(resp.usage.output_tokens, 4);
assert!(matches!(resp.stop_reason, MessagesStopReason::EndTurn));
match &resp.content[..] {
[MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "Hello, world!"),
other => panic!("expected single Text, got {other:?}"),
}
// Verify the streaming projection emits exactly the events the Anthropic
// SSE wire protocol expects, in order.
let stream: Vec<MessagesStreamEvent> = events
.iter()
.filter_map(cli_event_to_messages_stream_event)
.collect();
assert!(matches!(
stream[0],
MessagesStreamEvent::MessageStart { .. }
));
let final_event = stream.last().unwrap();
assert!(matches!(final_event, MessagesStreamEvent::MessageStop));
let text_deltas = stream
.iter()
.filter_map(|ev| match ev {
MessagesStreamEvent::ContentBlockDelta {
delta: MessagesContentDelta::TextDelta { text },
..
} => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join("");
assert_eq!(text_deltas, "Hello, world!");
}
#[test]
fn tool_use_response_assembles_partial_json() {
let events = load_events("tool_use_response.ndjson");
let resp = collect_to_messages_response("sonnet", events).unwrap();
assert!(matches!(resp.stop_reason, MessagesStopReason::ToolUse));
match &resp.content[..] {
[MessagesContentBlock::ToolUse {
id, name, input, ..
}] => {
assert_eq!(id, "toolu_W");
assert_eq!(name, "get_weather");
assert_eq!(input["city"], "Seattle");
}
other => panic!("expected single ToolUse block, got {other:?}"),
}
}
#[test]
fn error_response_returns_cli_error() {
let events = load_events("error_response.ndjson");
let err = collect_to_messages_response("sonnet", events).unwrap_err();
match err {
ClaudeCliTranslationError::CliError { message } => {
assert!(
message.contains("529"),
"expected 529 in error message, got: {message}"
);
}
other => panic!("expected CliError, got {other:?}"),
}
}
#[test]
fn retry_then_success_is_treated_as_success() {
let events = load_events("retry_then_success.ndjson");
let resp = collect_to_messages_response("sonnet", events).unwrap();
assert!(matches!(resp.stop_reason, MessagesStopReason::EndTurn));
match &resp.content[..] {
[MessagesContentBlock::Text { text, .. }] => assert_eq!(text, "ok"),
other => panic!("expected Text block, got {other:?}"),
}
}

View file

@ -0,0 +1,3 @@
{"type":"system","subtype":"init","session_id":"err-1","model":"sonnet","cwd":"/tmp","tools":[]}
{"type":"system","subtype":"api_retry","attempt":1,"reason":"529 overloaded"}
{"type":"result","subtype":"error","is_error":true,"duration_ms":1200,"num_turns":0,"result":"Anthropic API returned 529 after 3 retries","total_cost_usd":0,"session_id":"err-1"}

View file

@ -0,0 +1,10 @@
{"type":"system","subtype":"init","session_id":"retry-1","model":"sonnet","cwd":"/tmp","tools":[]}
{"type":"system","subtype":"api_retry","attempt":1,"reason":"529 overloaded"}
{"type":"system","subtype":"rate_limit_event","reset_at":"2026-05-04T18:30:00Z"}
{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_retry","type":"message","role":"assistant","content":[],"model":"sonnet","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"output_tokens":0}}}}
{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"ok"}}}
{"type":"stream_event","event":{"type":"content_block_stop","index":0}}
{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":1}}}
{"type":"stream_event","event":{"type":"message_stop"}}
{"type":"result","subtype":"success","is_error":false,"duration_ms":2100,"num_turns":1,"result":"ok","total_cost_usd":0.00009,"usage":{"input_tokens":3,"output_tokens":1},"session_id":"retry-1"}

View file

@ -0,0 +1,10 @@
{"type":"system","subtype":"init","session_id":"a1b2c3","model":"claude-sonnet-4-6","cwd":"/tmp","tools":["Bash","Read"]}
{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_01ABC","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":12,"output_tokens":0}}}}
{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":", world!"}}}
{"type":"stream_event","event":{"type":"content_block_stop","index":0}}
{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":4}}}
{"type":"stream_event","event":{"type":"message_stop"}}
{"type":"assistant","message":{"id":"msg_01ABC","type":"message","role":"assistant","model":"claude-sonnet-4-6","content":[{"type":"text","text":"Hello, world!"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":12,"output_tokens":4}}}
{"type":"result","subtype":"success","is_error":false,"duration_ms":521,"num_turns":1,"result":"Hello, world!","total_cost_usd":0.00012,"usage":{"input_tokens":12,"output_tokens":4},"session_id":"a1b2c3"}

View file

@ -0,0 +1,9 @@
{"type":"system","subtype":"init","session_id":"tool-1","model":"sonnet","cwd":"/tmp","tools":[]}
{"type":"stream_event","event":{"type":"message_start","message":{"id":"msg_tool","type":"message","role":"assistant","content":[],"model":"sonnet","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":20,"output_tokens":0}}}}
{"type":"stream_event","event":{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"toolu_W","name":"get_weather","input":{}}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"{\"city\":\""}}}
{"type":"stream_event","event":{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"Seattle\"}"}}}
{"type":"stream_event","event":{"type":"content_block_stop","index":0}}
{"type":"stream_event","event":{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":7}}}
{"type":"stream_event","event":{"type":"message_stop"}}
{"type":"result","subtype":"success","is_error":false,"duration_ms":701,"num_turns":1,"result":null,"total_cost_usd":0.00021,"usage":{"input_tokens":20,"output_tokens":7},"session_id":"tool-1"}

View file

@ -0,0 +1,49 @@
# Claude Code CLI as a Plano provider
This demo wires the locally installed `claude` binary as a Plano
`model_provider`. The single line under `model_providers:`
```yaml
model_providers:
- model: claude-cli/*
default: true
```
is enough to:
1. Auto-fill `provider_interface: claude-cli`, `base_url: http://127.0.0.1:14001`
and a placeholder `access_key` (the CLI uses its own login keychain).
2. Start a localhost bridge inside `brightstaff` that spawns `claude -p
--output-format stream-json --input-format stream-json` for each
conversation.
3. Expose every Claude Code model — `claude-cli/sonnet`, `claude-cli/opus`,
`claude-cli/haiku`, plus dated full ids — at `GET /v1/models`.
## Running
```bash
# Make sure the CLI is logged in. You can use API krey billing or a paid Claude subscription.
claude auth login
# Start Plano in native mode.
planoai up demos/integrations/claude_cli/config.yaml
```
Then point any OpenAI- or Anthropic-style client at `http://localhost:12000`
and pick any `claude-cli/...` model. Plano routes the request through Envoy
to the brightstaff bridge, which asks the local `claude` binary to handle
it.
## Optional overrides
Set these env vars before `planoai up` if you need to tweak the bridge:
| Env var | Default | Meaning |
| ----------------------------- | ------------------- | -------------------------------------- |
| `CLAUDE_CLI_BIN` | `claude` | Path to the CLI binary. |
| `CLAUDE_CLI_PERMISSION_MODE` | `bypassPermissions` | `--permission-mode` flag value. |
| `CLAUDE_CLI_LISTEN_ADDR` | `127.0.0.1:14001` | Bridge listen address. |
| `CLAUDE_CLI_SESSION_TTL_SECS` | `600` | Idle TTL before a child is killed. |
| `CLAUDE_CLI_WATCHDOG_SECS` | `120` | Per-line watchdog inside one CLI turn. |
| `CLAUDE_CLI_MAX_SESSIONS` | `64` | Hard cap on concurrent CLI children. |

View file

@ -0,0 +1,27 @@
version: v0.4.0
# Claude Code CLI as a Plano model_provider.
#
# The single line below is everything you need: Plano detects the
# `claude-cli/*` namespace, auto-fills the provider_interface, base_url and
# placeholder access_key, and starts a localhost bridge inside brightstaff
# that shells out to the `claude` binary on your $PATH for each request.
#
# Requirements:
# - `claude --version` must work in the same shell as `planoai up`.
# - Auth happens via the CLI's own `claude auth login` (no API key needed
# in Plano).
#
# Optional overrides via env (set before `planoai up`):
# CLAUDE_CLI_BIN=/custom/path/to/claude
# CLAUDE_CLI_PERMISSION_MODE=default
# CLAUDE_CLI_LISTEN_ADDR=127.0.0.1:14001
listeners:
- type: model
name: model_listener
port: 12000
model_providers:
- model: claude-cli/*
default: true