ci(code-mode): cross-platform smoke matrix + one-shot diagnose script

- x-code-mode-smoke.yml: on apps/x PRs, package the app on mac/linux/windows
  and run acp-smoke.mjs, which asserts (1) adapters staged + native engines
  stripped, (2) each staged adapter boots from the packaged app via the
  packaged Electron binary and answers ACP initialize, (3) a fake engine that
  launches but never responds is converted into a clear startup-timeout error
  instead of hanging forever (the silent-hang class)
- diagnose-code-mode.sh: colleagues run one command and send one blob
  (engine versions/paths/types, login-shell vs GUI PATH, auth presence,
  stream-json probe, newest SDK debug log) — one round trip instead of five
- forge.config.cjs: only sign/notarize when APPLE_ID is set, so unsigned
  local mac builds and the CI smoke matrix can package deterministically
- client.ts: startup timeout overridable via ROWBOAT_ACP_STARTUP_TIMEOUT_MS
  (CI uses 10s; also an escape hatch for slow MCP-heavy setups)

Verified on Windows: all smoke checks pass, including the end-to-end
fake-hanging-engine timeout (fails in 10.0s with the stderr-enriched error).
This commit is contained in:
Gagancreates 2026-06-13 00:24:14 +05:30
parent ae362f50f4
commit d2d1814bea
5 changed files with 371 additions and 13 deletions

49
.github/workflows/x-code-mode-smoke.yml vendored Normal file
View file

@ -0,0 +1,49 @@
name: Code Mode Smoke
# Packages the Electron app on all three OSes and smoke-tests code mode's
# cross-platform plumbing (adapter staging, engine stripping, ACP handshake,
# silent-hang startup timeout) — the failure class that previously only
# surfaced on a colleague's machine. See apps/x/apps/main/scripts/acp-smoke.mjs.
on:
pull_request:
paths:
- 'apps/x/**'
- '.github/workflows/x-code-mode-smoke.yml'
workflow_dispatch:
jobs:
smoke:
strategy:
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup pnpm
uses: pnpm/action-setup@v6
with:
version: 10
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: 24.15.0
cache: 'pnpm'
cache-dependency-path: 'apps/x/pnpm-lock.yaml'
- name: Install dependencies
run: pnpm install --frozen-lockfile
working-directory: apps/x
- name: Package app (unsigned)
run: npm run package
working-directory: apps/x/apps/main
- name: Run code-mode smoke test
run: node scripts/acp-smoke.mjs
working-directory: apps/x/apps/main

View file

@ -105,18 +105,24 @@ module.exports = {
extendInfo: {
NSAudioCaptureUsageDescription: 'Rowboat needs access to system audio to transcribe meetings from other apps (Zoom, Meet, etc.)',
},
osxSign: {
batchCodesignCalls: true,
optionsForFile: () => ({
entitlements: path.join(__dirname, 'entitlements.plist'),
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
}),
},
osxNotarize: {
appleId: process.env.APPLE_ID,
appleIdPassword: process.env.APPLE_PASSWORD,
teamId: process.env.APPLE_TEAM_ID
},
// Sign/notarize only when release credentials are present (the release
// workflow sets APPLE_ID). Local mac dev builds and the CI smoke matrix
// have no signing identity — leaving these unconditional makes
// `npm run package` fail there.
...(process.env.APPLE_ID ? {
osxSign: {
batchCodesignCalls: true,
optionsForFile: () => ({
entitlements: path.join(__dirname, 'entitlements.plist'),
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
}),
},
osxNotarize: {
appleId: process.env.APPLE_ID,
appleIdPassword: process.env.APPLE_PASSWORD,
teamId: process.env.APPLE_TEAM_ID
},
} : {}),
// Since we bundle the main process with esbuild, we don't need the workspace
// node_modules. These settings prevent Forge's dependency walker (flora-colossus)
// from trying to analyze/copy node_modules, which fails with pnpm's symlinked

View file

@ -0,0 +1,210 @@
#!/usr/bin/env node
// Code-mode smoke test, run by .github/workflows/x-code-mode-smoke.yml on
// mac/linux/windows after `npm run package`. Catches the cross-platform failure
// modes that previously only surfaced on a colleague's machine:
//
// 1. staging — ACP adapters present in the packaged app, native engines stripped
// 2. handshake — each staged adapter boots from the packaged app via the packaged
// Electron binary (ELECTRON_RUN_AS_NODE) and answers ACP initialize
// 3. timeout — an engine that launches but never responds (the silent-hang class,
// e.g. an outdated local CLI) is converted into a clear error by
// AcpClient's startup deadline instead of pending forever
//
// Usage: node scripts/acp-smoke.mjs (cwd: apps/x/apps/main, after npm run package)
import { spawn } from 'child_process';
import { mkdtempSync, writeFileSync, readFileSync, readdirSync, statSync, existsSync } from 'fs';
import { tmpdir } from 'os';
import * as path from 'path';
import { fileURLToPath, pathToFileURL } from 'url';
const mainDir = path.dirname(path.dirname(fileURLToPath(import.meta.url)));
const outDir = path.join(mainDir, 'out');
let failures = 0;
const ok = (msg) => console.log(` PASS ${msg}`);
const fail = (msg) => { failures++; console.error(` FAIL ${msg}`); };
// ---------------------------------------------------------------------------
// Locate the packaged app for this platform.
// ---------------------------------------------------------------------------
function findPackagedApp() {
const dirs = existsSync(outDir)
? readdirSync(outDir).filter((d) => d.startsWith('Rowboat-') && statSync(path.join(outDir, d)).isDirectory())
: [];
if (dirs.length === 0) throw new Error(`no packaged app under ${outDir} — run npm run package first`);
const root = path.join(outDir, dirs[0]);
if (process.platform === 'darwin') {
const app = readdirSync(root).find((d) => d.endsWith('.app'));
if (!app) throw new Error(`no .app bundle in ${root}`);
const macOS = path.join(root, app, 'Contents', 'MacOS');
const bin = path.join(macOS, readdirSync(macOS)[0]);
return { appRoot: path.join(root, app, 'Contents', 'Resources', 'app'), electronBin: bin };
}
const binName = readdirSync(root).find((f) =>
process.platform === 'win32' ? /^rowboat\.exe$/i.test(f) : /^rowboat$/i.test(f));
if (!binName) throw new Error(`no rowboat binary in ${root}`);
return { appRoot: path.join(root, 'resources', 'app'), electronBin: path.join(root, binName) };
}
// ---------------------------------------------------------------------------
// 1. Staging assertions
// ---------------------------------------------------------------------------
const ADAPTERS = ['@agentclientprotocol/claude-agent-acp', '@agentclientprotocol/codex-acp'];
const ENGINE_DIR_RE = /@anthropic-ai[\\/]claude-agent-sdk-(win32|darwin|linux)|@openai[\\/]codex-(win32|darwin|linux)/;
const MAX_FILE_BYTES = 10 * 1024 * 1024; // engines are ~230MB; nothing legit comes close
function checkStaging(appRoot) {
console.log('\n[1/3] staging');
const acpRoot = path.join(appRoot, '.package', 'acp', 'node_modules');
if (!existsSync(acpRoot)) return fail(`staged adapters missing: ${acpRoot}`);
for (const pkg of ADAPTERS) {
const pkgJson = path.join(acpRoot, ...pkg.split('/'), 'package.json');
if (existsSync(pkgJson)) ok(`${pkg} staged`);
else fail(`${pkg} NOT staged (${pkgJson})`);
}
let engineHits = 0, oversize = 0, totalBytes = 0;
const walk = (dir) => {
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const p = path.join(dir, entry.name);
if (entry.isDirectory()) {
if (ENGINE_DIR_RE.test(p)) { engineHits++; fail(`native engine leaked into package: ${p}`); continue; }
walk(p);
} else {
const size = statSync(p).size;
totalBytes += size;
if (size > MAX_FILE_BYTES) { oversize++; fail(`oversized file (${(size / 1e6).toFixed(0)}MB): ${p}`); }
}
}
};
walk(acpRoot);
if (engineHits === 0) ok('native engines stripped');
if (oversize === 0) ok(`no file over ${Math.round(MAX_FILE_BYTES / 1e6)}MB (acp total: ${(totalBytes / 1e6).toFixed(0)}MB)`);
}
// ---------------------------------------------------------------------------
// 2. Packaged-adapter ACP initialize round-trip
// ---------------------------------------------------------------------------
function adapterEntry(appRoot, pkg) {
const pkgDir = path.join(appRoot, '.package', 'acp', 'node_modules', ...pkg.split('/'));
const pj = JSON.parse(readFileSync(path.join(pkgDir, 'package.json'), 'utf8'));
const rel = typeof pj.bin === 'string' ? pj.bin : Object.values(pj.bin)[0];
return path.join(pkgDir, rel);
}
// allowEngineError: codex-acp spawns its engine DURING initialize (claude-acp only
// at session creation). With the fake engine it answers a structured engine error —
// which still proves what this check is for: the adapter is staged, its dependency
// closure loads, it boots and speaks JSON-RPC. Only a non-response (crash, missing
// module, early exit) is a staging failure.
function initializeRoundTrip(electronBin, entry, label, fakeEngine, allowEngineError) {
return new Promise((resolve) => {
const child = spawn(electronBin, [entry], {
stdio: ['pipe', 'pipe', 'pipe'],
env: {
...process.env,
ELECTRON_RUN_AS_NODE: '1',
// Adapters must boot without a real engine installed (CI has none);
// engines are only spawned at session creation, not at initialize.
CLAUDE_CODE_EXECUTABLE: fakeEngine,
CODEX_PATH: fakeEngine,
},
});
let stdout = '', stderr = '', done = false;
const finish = (err) => {
if (done) return;
done = true;
clearTimeout(deadline);
child.kill();
if (err) fail(`${label}: ${err}${stderr.trim() ? `\n stderr: ${stderr.trim().slice(-600)}` : ''}`);
else ok(`${label}: ACP initialize answered`);
resolve();
};
const deadline = setTimeout(() => finish('no initialize response within 30s'), 30_000);
child.on('error', (e) => finish(`spawn failed: ${e.message}`));
child.on('exit', (code) => finish(`adapter exited early (code ${code})`));
child.stderr.on('data', (d) => { stderr = (stderr + d).slice(-4000); });
child.stdout.on('data', (d) => {
stdout += d;
for (const line of stdout.split('\n')) {
try {
const msg = JSON.parse(line);
if (msg.id === 1 && ('result' in msg || 'error' in msg)) {
if (msg.error && allowEngineError) {
console.log(` (engine error expected with fake engine: ${msg.error.message})`);
return finish(undefined);
}
return finish(msg.error ? `initialize error: ${JSON.stringify(msg.error)}` : undefined);
}
} catch { /* partial line */ }
}
});
child.stdin.write(JSON.stringify({
jsonrpc: '2.0', id: 1, method: 'initialize',
params: { protocolVersion: 1, clientCapabilities: { fs: { readTextFile: true, writeTextFile: true } } },
}) + '\n');
});
}
// ---------------------------------------------------------------------------
// 3. Silent-hang → startup-timeout test (the Arjun scenario, end to end)
// Real workspace adapter + a fake engine that launches and then never speaks.
// AcpClient.newSession() must reject with the startup-timeout error.
// ---------------------------------------------------------------------------
async function checkStartupTimeout(fakeEngine) {
console.log('\n[3/3] startup timeout (fake hanging engine)');
const coreDist = path.join(mainDir, '..', '..', 'packages', 'core', 'dist', 'code-mode', 'acp');
process.env.CLAUDE_CODE_EXECUTABLE = fakeEngine;
process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS = '10000';
const { AcpClient } = await import(pathToFileURL(path.join(coreDist, 'client.js')).href);
const { PermissionBroker } = await import(pathToFileURL(path.join(coreDist, 'permission-broker.js')).href);
const broker = new PermissionBroker({ policy: 'yolo', ask: async () => 'allow_once' });
const cwd = mkdtempSync(path.join(tmpdir(), 'acp-smoke-'));
const client = new AcpClient({ agent: 'claude', cwd, broker, onEvent: () => {} });
try {
await client.start(); // real adapter boots fine — the ENGINE is what hangs
const started = Date.now();
try {
await client.newSession();
fail('newSession resolved against a hanging engine — timeout never fired');
} catch (e) {
const secs = ((Date.now() - started) / 1000).toFixed(1);
const msg = e instanceof Error ? e.message : String(e);
if (msg.includes('timed out')) ok(`newSession failed fast (${secs}s): ${msg.split('\n')[0].slice(0, 160)}`);
else fail(`newSession rejected but not with the startup timeout: ${msg.slice(0, 300)}`);
}
} finally {
client.dispose();
}
}
// ---------------------------------------------------------------------------
async function main() {
// A fake engine: launches, swallows stdin, never answers — models an
// outdated/incompatible local CLI, the silent-hang failure mode.
const fakeEngine = path.join(mkdtempSync(path.join(tmpdir(), 'fake-engine-')), 'fake-claude.js');
writeFileSync(fakeEngine, 'process.stdin.resume(); /* never respond */\n');
const { appRoot, electronBin } = findPackagedApp();
console.log(`packaged app: ${appRoot}`);
checkStaging(appRoot);
console.log('\n[2/3] packaged adapter handshake');
for (const pkg of ADAPTERS) {
const allowEngineError = pkg.includes('codex');
await initializeRoundTrip(electronBin, adapterEntry(appRoot, pkg), pkg, fakeEngine, allowEngineError);
}
await checkStartupTimeout(fakeEngine);
console.log(failures ? `\n${failures} check(s) FAILED` : '\nall checks passed');
process.exit(failures ? 1 : 0);
}
main().catch((e) => { console.error(e); process.exit(1); });

View file

@ -33,7 +33,12 @@ export interface AcpClientOptions {
// never answers the handshake). Without a deadline that failure mode is an infinite
// "(pending...)" with zero feedback. Prompts are intentionally NOT time-limited:
// turns legitimately run for many minutes and may wait on user permission asks.
const STARTUP_TIMEOUT_MS = 60_000;
// Overridable via ROWBOAT_ACP_STARTUP_TIMEOUT_MS — used by the CI smoke test to
// avoid waiting the full minute, and an escape hatch for genuinely slow setups
// (e.g. many MCP servers configured in the engine's user settings).
const STARTUP_TIMEOUT_MS = Number(process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS) > 0
? Number(process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS)
: 60_000;
// Map a raw ACP session/update notification onto our small CodeRunEvent union.
function toEvent(update: SessionUpdate): CodeRunEvent {

View file

@ -0,0 +1,88 @@
#!/usr/bin/env bash
# One-shot code-mode diagnostics (macOS / Linux).
#
# When code mode misbehaves on your machine (stuck runs, "CLI not found",
# startup timeouts), run this and send the FULL output back — it collects
# everything needed to diagnose in one round trip:
#
# bash apps/x/scripts/diagnose-code-mode.sh
#
# Read-only except for one tiny `claude -p` probe (a single short API call).
section() { printf '\n=== %s ===\n' "$1"; }
# Portable timeout: mac has no `timeout` binary by default.
run_with_timeout() {
local secs="$1"; shift
"$@" & local pid=$!
( sleep "$secs" && kill "$pid" 2>/dev/null ) & local killer=$!
wait "$pid" 2>/dev/null; local rc=$?
kill "$killer" 2>/dev/null
return $rc
}
describe_binary() { # $1 = name
local p
p="$(/bin/sh -lc "command -v $1" 2>/dev/null)"
if [ -z "$p" ]; then
echo "$1: NOT on login-shell PATH"
return
fi
echo "$1: $p"
[ -L "$p" ] && echo " symlink -> $(readlink "$p")"
# Node-shebang script vs native binary — the distinction that matters for
# GUI launches (shebang scripts need `node` on the SPAWNING process's PATH).
local head1
head1="$(head -c 64 "$p" 2>/dev/null | head -n 1 | tr -d '\0')"
case "$head1" in
'#!'*) echo " type: script ($head1)" ;;
*) echo " type: native binary" ;;
esac
echo " version: $(run_with_timeout 15 "$1" --version 2>&1 | head -n 1)"
}
section "system"
echo "os: $(uname -sr) ($(uname -m))"
echo "shell: ${SHELL:-unset}"
echo "date: $(date)"
section "engines"
describe_binary claude
describe_binary codex
describe_binary node
section "PATH: login shell vs GUI"
echo "login-shell PATH:"
/bin/sh -lc 'echo " $PATH"'
if [ "$(uname -s)" = "Darwin" ]; then
echo "launchd (GUI) PATH:"
echo " $(launchctl getenv PATH 2>/dev/null || echo '(unset — GUI apps get the system default)')"
fi
section "auth presence (no secrets printed)"
if [ "$(uname -s)" = "Darwin" ]; then
if security find-generic-password -s "Claude Code-credentials" >/dev/null 2>&1; then
echo "claude: keychain credential present"
else
echo "claude: NO keychain credential (signed in?)"
fi
fi
[ -f "$HOME/.claude/.credentials.json" ] && echo "claude: ~/.claude/.credentials.json present"
[ -f "$HOME/.codex/auth.json" ] && echo "codex: ~/.codex/auth.json present" || echo "codex: NO ~/.codex/auth.json"
section "claude stream-json probe (what the app does under the hood)"
# A healthy claude prints a `system`/`init` JSON line within seconds. A hang or
# error here reproduces the in-app failure WITHOUT the app.
run_with_timeout 45 claude -p "reply with exactly: ok" --output-format stream-json --verbose 2>&1 | head -n 3
echo "(probe exit: $? — 143 means it hung and was killed after 45s)"
section "newest SDK debug log (~/.claude/debug)"
latest="$(ls -t "$HOME/.claude/debug"/sdk-*.txt 2>/dev/null | head -n 1)"
if [ -n "$latest" ]; then
echo "$latest:"
tail -n 40 "$latest"
else
echo "(none found)"
fi
printf '\ndone — send everything above.\n'