mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-15 20:05:16 +02:00
ci(code-mode): cross-platform smoke matrix + one-shot diagnose script
- x-code-mode-smoke.yml: on apps/x PRs, package the app on mac/linux/windows and run acp-smoke.mjs, which asserts (1) adapters staged + native engines stripped, (2) each staged adapter boots from the packaged app via the packaged Electron binary and answers ACP initialize, (3) a fake engine that launches but never responds is converted into a clear startup-timeout error instead of hanging forever (the silent-hang class) - diagnose-code-mode.sh: colleagues run one command and send one blob (engine versions/paths/types, login-shell vs GUI PATH, auth presence, stream-json probe, newest SDK debug log) — one round trip instead of five - forge.config.cjs: only sign/notarize when APPLE_ID is set, so unsigned local mac builds and the CI smoke matrix can package deterministically - client.ts: startup timeout overridable via ROWBOAT_ACP_STARTUP_TIMEOUT_MS (CI uses 10s; also an escape hatch for slow MCP-heavy setups) Verified on Windows: all smoke checks pass, including the end-to-end fake-hanging-engine timeout (fails in 10.0s with the stderr-enriched error).
This commit is contained in:
parent
ae362f50f4
commit
d2d1814bea
5 changed files with 371 additions and 13 deletions
49
.github/workflows/x-code-mode-smoke.yml
vendored
Normal file
49
.github/workflows/x-code-mode-smoke.yml
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
name: Code Mode Smoke
|
||||
|
||||
# Packages the Electron app on all three OSes and smoke-tests code mode's
|
||||
# cross-platform plumbing (adapter staging, engine stripping, ACP handshake,
|
||||
# silent-hang startup timeout) — the failure class that previously only
|
||||
# surfaced on a colleague's machine. See apps/x/apps/main/scripts/acp-smoke.mjs.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'apps/x/**'
|
||||
- '.github/workflows/x-code-mode-smoke.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
smoke:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest, ubuntu-latest, windows-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v6
|
||||
with:
|
||||
version: 10
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24.15.0
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: 'apps/x/pnpm-lock.yaml'
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
working-directory: apps/x
|
||||
|
||||
- name: Package app (unsigned)
|
||||
run: npm run package
|
||||
working-directory: apps/x/apps/main
|
||||
|
||||
- name: Run code-mode smoke test
|
||||
run: node scripts/acp-smoke.mjs
|
||||
working-directory: apps/x/apps/main
|
||||
|
|
@ -105,18 +105,24 @@ module.exports = {
|
|||
extendInfo: {
|
||||
NSAudioCaptureUsageDescription: 'Rowboat needs access to system audio to transcribe meetings from other apps (Zoom, Meet, etc.)',
|
||||
},
|
||||
osxSign: {
|
||||
batchCodesignCalls: true,
|
||||
optionsForFile: () => ({
|
||||
entitlements: path.join(__dirname, 'entitlements.plist'),
|
||||
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
|
||||
}),
|
||||
},
|
||||
osxNotarize: {
|
||||
appleId: process.env.APPLE_ID,
|
||||
appleIdPassword: process.env.APPLE_PASSWORD,
|
||||
teamId: process.env.APPLE_TEAM_ID
|
||||
},
|
||||
// Sign/notarize only when release credentials are present (the release
|
||||
// workflow sets APPLE_ID). Local mac dev builds and the CI smoke matrix
|
||||
// have no signing identity — leaving these unconditional makes
|
||||
// `npm run package` fail there.
|
||||
...(process.env.APPLE_ID ? {
|
||||
osxSign: {
|
||||
batchCodesignCalls: true,
|
||||
optionsForFile: () => ({
|
||||
entitlements: path.join(__dirname, 'entitlements.plist'),
|
||||
'entitlements-inherit': path.join(__dirname, 'entitlements.plist'),
|
||||
}),
|
||||
},
|
||||
osxNotarize: {
|
||||
appleId: process.env.APPLE_ID,
|
||||
appleIdPassword: process.env.APPLE_PASSWORD,
|
||||
teamId: process.env.APPLE_TEAM_ID
|
||||
},
|
||||
} : {}),
|
||||
// Since we bundle the main process with esbuild, we don't need the workspace
|
||||
// node_modules. These settings prevent Forge's dependency walker (flora-colossus)
|
||||
// from trying to analyze/copy node_modules, which fails with pnpm's symlinked
|
||||
|
|
|
|||
210
apps/x/apps/main/scripts/acp-smoke.mjs
Normal file
210
apps/x/apps/main/scripts/acp-smoke.mjs
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
#!/usr/bin/env node
|
||||
// Code-mode smoke test, run by .github/workflows/x-code-mode-smoke.yml on
|
||||
// mac/linux/windows after `npm run package`. Catches the cross-platform failure
|
||||
// modes that previously only surfaced on a colleague's machine:
|
||||
//
|
||||
// 1. staging — ACP adapters present in the packaged app, native engines stripped
|
||||
// 2. handshake — each staged adapter boots from the packaged app via the packaged
|
||||
// Electron binary (ELECTRON_RUN_AS_NODE) and answers ACP initialize
|
||||
// 3. timeout — an engine that launches but never responds (the silent-hang class,
|
||||
// e.g. an outdated local CLI) is converted into a clear error by
|
||||
// AcpClient's startup deadline instead of pending forever
|
||||
//
|
||||
// Usage: node scripts/acp-smoke.mjs (cwd: apps/x/apps/main, after npm run package)
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { mkdtempSync, writeFileSync, readFileSync, readdirSync, statSync, existsSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import * as path from 'path';
|
||||
import { fileURLToPath, pathToFileURL } from 'url';
|
||||
|
||||
const mainDir = path.dirname(path.dirname(fileURLToPath(import.meta.url)));
|
||||
const outDir = path.join(mainDir, 'out');
|
||||
|
||||
let failures = 0;
|
||||
const ok = (msg) => console.log(` PASS ${msg}`);
|
||||
const fail = (msg) => { failures++; console.error(` FAIL ${msg}`); };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Locate the packaged app for this platform.
|
||||
// ---------------------------------------------------------------------------
|
||||
function findPackagedApp() {
|
||||
const dirs = existsSync(outDir)
|
||||
? readdirSync(outDir).filter((d) => d.startsWith('Rowboat-') && statSync(path.join(outDir, d)).isDirectory())
|
||||
: [];
|
||||
if (dirs.length === 0) throw new Error(`no packaged app under ${outDir} — run npm run package first`);
|
||||
const root = path.join(outDir, dirs[0]);
|
||||
|
||||
if (process.platform === 'darwin') {
|
||||
const app = readdirSync(root).find((d) => d.endsWith('.app'));
|
||||
if (!app) throw new Error(`no .app bundle in ${root}`);
|
||||
const macOS = path.join(root, app, 'Contents', 'MacOS');
|
||||
const bin = path.join(macOS, readdirSync(macOS)[0]);
|
||||
return { appRoot: path.join(root, app, 'Contents', 'Resources', 'app'), electronBin: bin };
|
||||
}
|
||||
const binName = readdirSync(root).find((f) =>
|
||||
process.platform === 'win32' ? /^rowboat\.exe$/i.test(f) : /^rowboat$/i.test(f));
|
||||
if (!binName) throw new Error(`no rowboat binary in ${root}`);
|
||||
return { appRoot: path.join(root, 'resources', 'app'), electronBin: path.join(root, binName) };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1. Staging assertions
|
||||
// ---------------------------------------------------------------------------
|
||||
const ADAPTERS = ['@agentclientprotocol/claude-agent-acp', '@agentclientprotocol/codex-acp'];
|
||||
const ENGINE_DIR_RE = /@anthropic-ai[\\/]claude-agent-sdk-(win32|darwin|linux)|@openai[\\/]codex-(win32|darwin|linux)/;
|
||||
const MAX_FILE_BYTES = 10 * 1024 * 1024; // engines are ~230MB; nothing legit comes close
|
||||
|
||||
function checkStaging(appRoot) {
|
||||
console.log('\n[1/3] staging');
|
||||
const acpRoot = path.join(appRoot, '.package', 'acp', 'node_modules');
|
||||
if (!existsSync(acpRoot)) return fail(`staged adapters missing: ${acpRoot}`);
|
||||
|
||||
for (const pkg of ADAPTERS) {
|
||||
const pkgJson = path.join(acpRoot, ...pkg.split('/'), 'package.json');
|
||||
if (existsSync(pkgJson)) ok(`${pkg} staged`);
|
||||
else fail(`${pkg} NOT staged (${pkgJson})`);
|
||||
}
|
||||
|
||||
let engineHits = 0, oversize = 0, totalBytes = 0;
|
||||
const walk = (dir) => {
|
||||
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
||||
const p = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (ENGINE_DIR_RE.test(p)) { engineHits++; fail(`native engine leaked into package: ${p}`); continue; }
|
||||
walk(p);
|
||||
} else {
|
||||
const size = statSync(p).size;
|
||||
totalBytes += size;
|
||||
if (size > MAX_FILE_BYTES) { oversize++; fail(`oversized file (${(size / 1e6).toFixed(0)}MB): ${p}`); }
|
||||
}
|
||||
}
|
||||
};
|
||||
walk(acpRoot);
|
||||
if (engineHits === 0) ok('native engines stripped');
|
||||
if (oversize === 0) ok(`no file over ${Math.round(MAX_FILE_BYTES / 1e6)}MB (acp total: ${(totalBytes / 1e6).toFixed(0)}MB)`);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. Packaged-adapter ACP initialize round-trip
|
||||
// ---------------------------------------------------------------------------
|
||||
function adapterEntry(appRoot, pkg) {
|
||||
const pkgDir = path.join(appRoot, '.package', 'acp', 'node_modules', ...pkg.split('/'));
|
||||
const pj = JSON.parse(readFileSync(path.join(pkgDir, 'package.json'), 'utf8'));
|
||||
const rel = typeof pj.bin === 'string' ? pj.bin : Object.values(pj.bin)[0];
|
||||
return path.join(pkgDir, rel);
|
||||
}
|
||||
|
||||
// allowEngineError: codex-acp spawns its engine DURING initialize (claude-acp only
|
||||
// at session creation). With the fake engine it answers a structured engine error —
|
||||
// which still proves what this check is for: the adapter is staged, its dependency
|
||||
// closure loads, it boots and speaks JSON-RPC. Only a non-response (crash, missing
|
||||
// module, early exit) is a staging failure.
|
||||
function initializeRoundTrip(electronBin, entry, label, fakeEngine, allowEngineError) {
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(electronBin, [entry], {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
env: {
|
||||
...process.env,
|
||||
ELECTRON_RUN_AS_NODE: '1',
|
||||
// Adapters must boot without a real engine installed (CI has none);
|
||||
// engines are only spawned at session creation, not at initialize.
|
||||
CLAUDE_CODE_EXECUTABLE: fakeEngine,
|
||||
CODEX_PATH: fakeEngine,
|
||||
},
|
||||
});
|
||||
let stdout = '', stderr = '', done = false;
|
||||
const finish = (err) => {
|
||||
if (done) return;
|
||||
done = true;
|
||||
clearTimeout(deadline);
|
||||
child.kill();
|
||||
if (err) fail(`${label}: ${err}${stderr.trim() ? `\n stderr: ${stderr.trim().slice(-600)}` : ''}`);
|
||||
else ok(`${label}: ACP initialize answered`);
|
||||
resolve();
|
||||
};
|
||||
const deadline = setTimeout(() => finish('no initialize response within 30s'), 30_000);
|
||||
child.on('error', (e) => finish(`spawn failed: ${e.message}`));
|
||||
child.on('exit', (code) => finish(`adapter exited early (code ${code})`));
|
||||
child.stderr.on('data', (d) => { stderr = (stderr + d).slice(-4000); });
|
||||
child.stdout.on('data', (d) => {
|
||||
stdout += d;
|
||||
for (const line of stdout.split('\n')) {
|
||||
try {
|
||||
const msg = JSON.parse(line);
|
||||
if (msg.id === 1 && ('result' in msg || 'error' in msg)) {
|
||||
if (msg.error && allowEngineError) {
|
||||
console.log(` (engine error expected with fake engine: ${msg.error.message})`);
|
||||
return finish(undefined);
|
||||
}
|
||||
return finish(msg.error ? `initialize error: ${JSON.stringify(msg.error)}` : undefined);
|
||||
}
|
||||
} catch { /* partial line */ }
|
||||
}
|
||||
});
|
||||
child.stdin.write(JSON.stringify({
|
||||
jsonrpc: '2.0', id: 1, method: 'initialize',
|
||||
params: { protocolVersion: 1, clientCapabilities: { fs: { readTextFile: true, writeTextFile: true } } },
|
||||
}) + '\n');
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 3. Silent-hang → startup-timeout test (the Arjun scenario, end to end)
|
||||
// Real workspace adapter + a fake engine that launches and then never speaks.
|
||||
// AcpClient.newSession() must reject with the startup-timeout error.
|
||||
// ---------------------------------------------------------------------------
|
||||
async function checkStartupTimeout(fakeEngine) {
|
||||
console.log('\n[3/3] startup timeout (fake hanging engine)');
|
||||
const coreDist = path.join(mainDir, '..', '..', 'packages', 'core', 'dist', 'code-mode', 'acp');
|
||||
process.env.CLAUDE_CODE_EXECUTABLE = fakeEngine;
|
||||
process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS = '10000';
|
||||
|
||||
const { AcpClient } = await import(pathToFileURL(path.join(coreDist, 'client.js')).href);
|
||||
const { PermissionBroker } = await import(pathToFileURL(path.join(coreDist, 'permission-broker.js')).href);
|
||||
|
||||
const broker = new PermissionBroker({ policy: 'yolo', ask: async () => 'allow_once' });
|
||||
const cwd = mkdtempSync(path.join(tmpdir(), 'acp-smoke-'));
|
||||
const client = new AcpClient({ agent: 'claude', cwd, broker, onEvent: () => {} });
|
||||
try {
|
||||
await client.start(); // real adapter boots fine — the ENGINE is what hangs
|
||||
const started = Date.now();
|
||||
try {
|
||||
await client.newSession();
|
||||
fail('newSession resolved against a hanging engine — timeout never fired');
|
||||
} catch (e) {
|
||||
const secs = ((Date.now() - started) / 1000).toFixed(1);
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
if (msg.includes('timed out')) ok(`newSession failed fast (${secs}s): ${msg.split('\n')[0].slice(0, 160)}`);
|
||||
else fail(`newSession rejected but not with the startup timeout: ${msg.slice(0, 300)}`);
|
||||
}
|
||||
} finally {
|
||||
client.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
async function main() {
|
||||
// A fake engine: launches, swallows stdin, never answers — models an
|
||||
// outdated/incompatible local CLI, the silent-hang failure mode.
|
||||
const fakeEngine = path.join(mkdtempSync(path.join(tmpdir(), 'fake-engine-')), 'fake-claude.js');
|
||||
writeFileSync(fakeEngine, 'process.stdin.resume(); /* never respond */\n');
|
||||
|
||||
const { appRoot, electronBin } = findPackagedApp();
|
||||
console.log(`packaged app: ${appRoot}`);
|
||||
|
||||
checkStaging(appRoot);
|
||||
|
||||
console.log('\n[2/3] packaged adapter handshake');
|
||||
for (const pkg of ADAPTERS) {
|
||||
const allowEngineError = pkg.includes('codex');
|
||||
await initializeRoundTrip(electronBin, adapterEntry(appRoot, pkg), pkg, fakeEngine, allowEngineError);
|
||||
}
|
||||
|
||||
await checkStartupTimeout(fakeEngine);
|
||||
|
||||
console.log(failures ? `\n${failures} check(s) FAILED` : '\nall checks passed');
|
||||
process.exit(failures ? 1 : 0);
|
||||
}
|
||||
|
||||
main().catch((e) => { console.error(e); process.exit(1); });
|
||||
|
|
@ -33,7 +33,12 @@ export interface AcpClientOptions {
|
|||
// never answers the handshake). Without a deadline that failure mode is an infinite
|
||||
// "(pending...)" with zero feedback. Prompts are intentionally NOT time-limited:
|
||||
// turns legitimately run for many minutes and may wait on user permission asks.
|
||||
const STARTUP_TIMEOUT_MS = 60_000;
|
||||
// Overridable via ROWBOAT_ACP_STARTUP_TIMEOUT_MS — used by the CI smoke test to
|
||||
// avoid waiting the full minute, and an escape hatch for genuinely slow setups
|
||||
// (e.g. many MCP servers configured in the engine's user settings).
|
||||
const STARTUP_TIMEOUT_MS = Number(process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS) > 0
|
||||
? Number(process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS)
|
||||
: 60_000;
|
||||
|
||||
// Map a raw ACP session/update notification onto our small CodeRunEvent union.
|
||||
function toEvent(update: SessionUpdate): CodeRunEvent {
|
||||
|
|
|
|||
88
apps/x/scripts/diagnose-code-mode.sh
Normal file
88
apps/x/scripts/diagnose-code-mode.sh
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/env bash
|
||||
# One-shot code-mode diagnostics (macOS / Linux).
|
||||
#
|
||||
# When code mode misbehaves on your machine (stuck runs, "CLI not found",
|
||||
# startup timeouts), run this and send the FULL output back — it collects
|
||||
# everything needed to diagnose in one round trip:
|
||||
#
|
||||
# bash apps/x/scripts/diagnose-code-mode.sh
|
||||
#
|
||||
# Read-only except for one tiny `claude -p` probe (a single short API call).
|
||||
|
||||
section() { printf '\n=== %s ===\n' "$1"; }
|
||||
|
||||
# Portable timeout: mac has no `timeout` binary by default.
|
||||
run_with_timeout() {
|
||||
local secs="$1"; shift
|
||||
"$@" & local pid=$!
|
||||
( sleep "$secs" && kill "$pid" 2>/dev/null ) & local killer=$!
|
||||
wait "$pid" 2>/dev/null; local rc=$?
|
||||
kill "$killer" 2>/dev/null
|
||||
return $rc
|
||||
}
|
||||
|
||||
describe_binary() { # $1 = name
|
||||
local p
|
||||
p="$(/bin/sh -lc "command -v $1" 2>/dev/null)"
|
||||
if [ -z "$p" ]; then
|
||||
echo "$1: NOT on login-shell PATH"
|
||||
return
|
||||
fi
|
||||
echo "$1: $p"
|
||||
[ -L "$p" ] && echo " symlink -> $(readlink "$p")"
|
||||
# Node-shebang script vs native binary — the distinction that matters for
|
||||
# GUI launches (shebang scripts need `node` on the SPAWNING process's PATH).
|
||||
local head1
|
||||
head1="$(head -c 64 "$p" 2>/dev/null | head -n 1 | tr -d '\0')"
|
||||
case "$head1" in
|
||||
'#!'*) echo " type: script ($head1)" ;;
|
||||
*) echo " type: native binary" ;;
|
||||
esac
|
||||
echo " version: $(run_with_timeout 15 "$1" --version 2>&1 | head -n 1)"
|
||||
}
|
||||
|
||||
section "system"
|
||||
echo "os: $(uname -sr) ($(uname -m))"
|
||||
echo "shell: ${SHELL:-unset}"
|
||||
echo "date: $(date)"
|
||||
|
||||
section "engines"
|
||||
describe_binary claude
|
||||
describe_binary codex
|
||||
describe_binary node
|
||||
|
||||
section "PATH: login shell vs GUI"
|
||||
echo "login-shell PATH:"
|
||||
/bin/sh -lc 'echo " $PATH"'
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
echo "launchd (GUI) PATH:"
|
||||
echo " $(launchctl getenv PATH 2>/dev/null || echo '(unset — GUI apps get the system default)')"
|
||||
fi
|
||||
|
||||
section "auth presence (no secrets printed)"
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
if security find-generic-password -s "Claude Code-credentials" >/dev/null 2>&1; then
|
||||
echo "claude: keychain credential present"
|
||||
else
|
||||
echo "claude: NO keychain credential (signed in?)"
|
||||
fi
|
||||
fi
|
||||
[ -f "$HOME/.claude/.credentials.json" ] && echo "claude: ~/.claude/.credentials.json present"
|
||||
[ -f "$HOME/.codex/auth.json" ] && echo "codex: ~/.codex/auth.json present" || echo "codex: NO ~/.codex/auth.json"
|
||||
|
||||
section "claude stream-json probe (what the app does under the hood)"
|
||||
# A healthy claude prints a `system`/`init` JSON line within seconds. A hang or
|
||||
# error here reproduces the in-app failure WITHOUT the app.
|
||||
run_with_timeout 45 claude -p "reply with exactly: ok" --output-format stream-json --verbose 2>&1 | head -n 3
|
||||
echo "(probe exit: $? — 143 means it hung and was killed after 45s)"
|
||||
|
||||
section "newest SDK debug log (~/.claude/debug)"
|
||||
latest="$(ls -t "$HOME/.claude/debug"/sdk-*.txt 2>/dev/null | head -n 1)"
|
||||
if [ -n "$latest" ]; then
|
||||
echo "$latest:"
|
||||
tail -n 40 "$latest"
|
||||
else
|
||||
echo "(none found)"
|
||||
fi
|
||||
|
||||
printf '\ndone — send everything above.\n'
|
||||
Loading…
Add table
Add a link
Reference in a new issue