From d2d1814beaf43f8825c3654cc00e1b09cfb77500 Mon Sep 17 00:00:00 2001 From: Gagancreates Date: Sat, 13 Jun 2026 00:24:14 +0530 Subject: [PATCH] ci(code-mode): cross-platform smoke matrix + one-shot diagnose script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - x-code-mode-smoke.yml: on apps/x PRs, package the app on mac/linux/windows and run acp-smoke.mjs, which asserts (1) adapters staged + native engines stripped, (2) each staged adapter boots from the packaged app via the packaged Electron binary and answers ACP initialize, (3) a fake engine that launches but never responds is converted into a clear startup-timeout error instead of hanging forever (the silent-hang class) - diagnose-code-mode.sh: colleagues run one command and send one blob (engine versions/paths/types, login-shell vs GUI PATH, auth presence, stream-json probe, newest SDK debug log) — one round trip instead of five - forge.config.cjs: only sign/notarize when APPLE_ID is set, so unsigned local mac builds and the CI smoke matrix can package deterministically - client.ts: startup timeout overridable via ROWBOAT_ACP_STARTUP_TIMEOUT_MS (CI uses 10s; also an escape hatch for slow MCP-heavy setups) Verified on Windows: all smoke checks pass, including the end-to-end fake-hanging-engine timeout (fails in 10.0s with the stderr-enriched error). --- .github/workflows/x-code-mode-smoke.yml | 49 ++++ apps/x/apps/main/forge.config.cjs | 30 ++- apps/x/apps/main/scripts/acp-smoke.mjs | 210 ++++++++++++++++++ .../packages/core/src/code-mode/acp/client.ts | 7 +- apps/x/scripts/diagnose-code-mode.sh | 88 ++++++++ 5 files changed, 371 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/x-code-mode-smoke.yml create mode 100644 apps/x/apps/main/scripts/acp-smoke.mjs create mode 100644 apps/x/scripts/diagnose-code-mode.sh diff --git a/.github/workflows/x-code-mode-smoke.yml b/.github/workflows/x-code-mode-smoke.yml new file mode 100644 index 00000000..4546086f --- /dev/null +++ b/.github/workflows/x-code-mode-smoke.yml @@ -0,0 +1,49 @@ +name: Code Mode Smoke + +# Packages the Electron app on all three OSes and smoke-tests code mode's +# cross-platform plumbing (adapter staging, engine stripping, ACP handshake, +# silent-hang startup timeout) — the failure class that previously only +# surfaced on a colleague's machine. See apps/x/apps/main/scripts/acp-smoke.mjs. + +on: + pull_request: + paths: + - 'apps/x/**' + - '.github/workflows/x-code-mode-smoke.yml' + workflow_dispatch: + +jobs: + smoke: + strategy: + fail-fast: false + matrix: + os: [macos-latest, ubuntu-latest, windows-latest] + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Setup pnpm + uses: pnpm/action-setup@v6 + with: + version: 10 + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: 24.15.0 + cache: 'pnpm' + cache-dependency-path: 'apps/x/pnpm-lock.yaml' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + working-directory: apps/x + + - name: Package app (unsigned) + run: npm run package + working-directory: apps/x/apps/main + + - name: Run code-mode smoke test + run: node scripts/acp-smoke.mjs + working-directory: apps/x/apps/main diff --git a/apps/x/apps/main/forge.config.cjs b/apps/x/apps/main/forge.config.cjs index a71ca898..b3660a6b 100644 --- a/apps/x/apps/main/forge.config.cjs +++ b/apps/x/apps/main/forge.config.cjs @@ -105,18 +105,24 @@ module.exports = { extendInfo: { NSAudioCaptureUsageDescription: 'Rowboat needs access to system audio to transcribe meetings from other apps (Zoom, Meet, etc.)', }, - osxSign: { - batchCodesignCalls: true, - optionsForFile: () => ({ - entitlements: path.join(__dirname, 'entitlements.plist'), - 'entitlements-inherit': path.join(__dirname, 'entitlements.plist'), - }), - }, - osxNotarize: { - appleId: process.env.APPLE_ID, - appleIdPassword: process.env.APPLE_PASSWORD, - teamId: process.env.APPLE_TEAM_ID - }, + // Sign/notarize only when release credentials are present (the release + // workflow sets APPLE_ID). Local mac dev builds and the CI smoke matrix + // have no signing identity — leaving these unconditional makes + // `npm run package` fail there. + ...(process.env.APPLE_ID ? { + osxSign: { + batchCodesignCalls: true, + optionsForFile: () => ({ + entitlements: path.join(__dirname, 'entitlements.plist'), + 'entitlements-inherit': path.join(__dirname, 'entitlements.plist'), + }), + }, + osxNotarize: { + appleId: process.env.APPLE_ID, + appleIdPassword: process.env.APPLE_PASSWORD, + teamId: process.env.APPLE_TEAM_ID + }, + } : {}), // Since we bundle the main process with esbuild, we don't need the workspace // node_modules. These settings prevent Forge's dependency walker (flora-colossus) // from trying to analyze/copy node_modules, which fails with pnpm's symlinked diff --git a/apps/x/apps/main/scripts/acp-smoke.mjs b/apps/x/apps/main/scripts/acp-smoke.mjs new file mode 100644 index 00000000..e909d11c --- /dev/null +++ b/apps/x/apps/main/scripts/acp-smoke.mjs @@ -0,0 +1,210 @@ +#!/usr/bin/env node +// Code-mode smoke test, run by .github/workflows/x-code-mode-smoke.yml on +// mac/linux/windows after `npm run package`. Catches the cross-platform failure +// modes that previously only surfaced on a colleague's machine: +// +// 1. staging — ACP adapters present in the packaged app, native engines stripped +// 2. handshake — each staged adapter boots from the packaged app via the packaged +// Electron binary (ELECTRON_RUN_AS_NODE) and answers ACP initialize +// 3. timeout — an engine that launches but never responds (the silent-hang class, +// e.g. an outdated local CLI) is converted into a clear error by +// AcpClient's startup deadline instead of pending forever +// +// Usage: node scripts/acp-smoke.mjs (cwd: apps/x/apps/main, after npm run package) + +import { spawn } from 'child_process'; +import { mkdtempSync, writeFileSync, readFileSync, readdirSync, statSync, existsSync } from 'fs'; +import { tmpdir } from 'os'; +import * as path from 'path'; +import { fileURLToPath, pathToFileURL } from 'url'; + +const mainDir = path.dirname(path.dirname(fileURLToPath(import.meta.url))); +const outDir = path.join(mainDir, 'out'); + +let failures = 0; +const ok = (msg) => console.log(` PASS ${msg}`); +const fail = (msg) => { failures++; console.error(` FAIL ${msg}`); }; + +// --------------------------------------------------------------------------- +// Locate the packaged app for this platform. +// --------------------------------------------------------------------------- +function findPackagedApp() { + const dirs = existsSync(outDir) + ? readdirSync(outDir).filter((d) => d.startsWith('Rowboat-') && statSync(path.join(outDir, d)).isDirectory()) + : []; + if (dirs.length === 0) throw new Error(`no packaged app under ${outDir} — run npm run package first`); + const root = path.join(outDir, dirs[0]); + + if (process.platform === 'darwin') { + const app = readdirSync(root).find((d) => d.endsWith('.app')); + if (!app) throw new Error(`no .app bundle in ${root}`); + const macOS = path.join(root, app, 'Contents', 'MacOS'); + const bin = path.join(macOS, readdirSync(macOS)[0]); + return { appRoot: path.join(root, app, 'Contents', 'Resources', 'app'), electronBin: bin }; + } + const binName = readdirSync(root).find((f) => + process.platform === 'win32' ? /^rowboat\.exe$/i.test(f) : /^rowboat$/i.test(f)); + if (!binName) throw new Error(`no rowboat binary in ${root}`); + return { appRoot: path.join(root, 'resources', 'app'), electronBin: path.join(root, binName) }; +} + +// --------------------------------------------------------------------------- +// 1. Staging assertions +// --------------------------------------------------------------------------- +const ADAPTERS = ['@agentclientprotocol/claude-agent-acp', '@agentclientprotocol/codex-acp']; +const ENGINE_DIR_RE = /@anthropic-ai[\\/]claude-agent-sdk-(win32|darwin|linux)|@openai[\\/]codex-(win32|darwin|linux)/; +const MAX_FILE_BYTES = 10 * 1024 * 1024; // engines are ~230MB; nothing legit comes close + +function checkStaging(appRoot) { + console.log('\n[1/3] staging'); + const acpRoot = path.join(appRoot, '.package', 'acp', 'node_modules'); + if (!existsSync(acpRoot)) return fail(`staged adapters missing: ${acpRoot}`); + + for (const pkg of ADAPTERS) { + const pkgJson = path.join(acpRoot, ...pkg.split('/'), 'package.json'); + if (existsSync(pkgJson)) ok(`${pkg} staged`); + else fail(`${pkg} NOT staged (${pkgJson})`); + } + + let engineHits = 0, oversize = 0, totalBytes = 0; + const walk = (dir) => { + for (const entry of readdirSync(dir, { withFileTypes: true })) { + const p = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (ENGINE_DIR_RE.test(p)) { engineHits++; fail(`native engine leaked into package: ${p}`); continue; } + walk(p); + } else { + const size = statSync(p).size; + totalBytes += size; + if (size > MAX_FILE_BYTES) { oversize++; fail(`oversized file (${(size / 1e6).toFixed(0)}MB): ${p}`); } + } + } + }; + walk(acpRoot); + if (engineHits === 0) ok('native engines stripped'); + if (oversize === 0) ok(`no file over ${Math.round(MAX_FILE_BYTES / 1e6)}MB (acp total: ${(totalBytes / 1e6).toFixed(0)}MB)`); +} + +// --------------------------------------------------------------------------- +// 2. Packaged-adapter ACP initialize round-trip +// --------------------------------------------------------------------------- +function adapterEntry(appRoot, pkg) { + const pkgDir = path.join(appRoot, '.package', 'acp', 'node_modules', ...pkg.split('/')); + const pj = JSON.parse(readFileSync(path.join(pkgDir, 'package.json'), 'utf8')); + const rel = typeof pj.bin === 'string' ? pj.bin : Object.values(pj.bin)[0]; + return path.join(pkgDir, rel); +} + +// allowEngineError: codex-acp spawns its engine DURING initialize (claude-acp only +// at session creation). With the fake engine it answers a structured engine error — +// which still proves what this check is for: the adapter is staged, its dependency +// closure loads, it boots and speaks JSON-RPC. Only a non-response (crash, missing +// module, early exit) is a staging failure. +function initializeRoundTrip(electronBin, entry, label, fakeEngine, allowEngineError) { + return new Promise((resolve) => { + const child = spawn(electronBin, [entry], { + stdio: ['pipe', 'pipe', 'pipe'], + env: { + ...process.env, + ELECTRON_RUN_AS_NODE: '1', + // Adapters must boot without a real engine installed (CI has none); + // engines are only spawned at session creation, not at initialize. + CLAUDE_CODE_EXECUTABLE: fakeEngine, + CODEX_PATH: fakeEngine, + }, + }); + let stdout = '', stderr = '', done = false; + const finish = (err) => { + if (done) return; + done = true; + clearTimeout(deadline); + child.kill(); + if (err) fail(`${label}: ${err}${stderr.trim() ? `\n stderr: ${stderr.trim().slice(-600)}` : ''}`); + else ok(`${label}: ACP initialize answered`); + resolve(); + }; + const deadline = setTimeout(() => finish('no initialize response within 30s'), 30_000); + child.on('error', (e) => finish(`spawn failed: ${e.message}`)); + child.on('exit', (code) => finish(`adapter exited early (code ${code})`)); + child.stderr.on('data', (d) => { stderr = (stderr + d).slice(-4000); }); + child.stdout.on('data', (d) => { + stdout += d; + for (const line of stdout.split('\n')) { + try { + const msg = JSON.parse(line); + if (msg.id === 1 && ('result' in msg || 'error' in msg)) { + if (msg.error && allowEngineError) { + console.log(` (engine error expected with fake engine: ${msg.error.message})`); + return finish(undefined); + } + return finish(msg.error ? `initialize error: ${JSON.stringify(msg.error)}` : undefined); + } + } catch { /* partial line */ } + } + }); + child.stdin.write(JSON.stringify({ + jsonrpc: '2.0', id: 1, method: 'initialize', + params: { protocolVersion: 1, clientCapabilities: { fs: { readTextFile: true, writeTextFile: true } } }, + }) + '\n'); + }); +} + +// --------------------------------------------------------------------------- +// 3. Silent-hang → startup-timeout test (the Arjun scenario, end to end) +// Real workspace adapter + a fake engine that launches and then never speaks. +// AcpClient.newSession() must reject with the startup-timeout error. +// --------------------------------------------------------------------------- +async function checkStartupTimeout(fakeEngine) { + console.log('\n[3/3] startup timeout (fake hanging engine)'); + const coreDist = path.join(mainDir, '..', '..', 'packages', 'core', 'dist', 'code-mode', 'acp'); + process.env.CLAUDE_CODE_EXECUTABLE = fakeEngine; + process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS = '10000'; + + const { AcpClient } = await import(pathToFileURL(path.join(coreDist, 'client.js')).href); + const { PermissionBroker } = await import(pathToFileURL(path.join(coreDist, 'permission-broker.js')).href); + + const broker = new PermissionBroker({ policy: 'yolo', ask: async () => 'allow_once' }); + const cwd = mkdtempSync(path.join(tmpdir(), 'acp-smoke-')); + const client = new AcpClient({ agent: 'claude', cwd, broker, onEvent: () => {} }); + try { + await client.start(); // real adapter boots fine — the ENGINE is what hangs + const started = Date.now(); + try { + await client.newSession(); + fail('newSession resolved against a hanging engine — timeout never fired'); + } catch (e) { + const secs = ((Date.now() - started) / 1000).toFixed(1); + const msg = e instanceof Error ? e.message : String(e); + if (msg.includes('timed out')) ok(`newSession failed fast (${secs}s): ${msg.split('\n')[0].slice(0, 160)}`); + else fail(`newSession rejected but not with the startup timeout: ${msg.slice(0, 300)}`); + } + } finally { + client.dispose(); + } +} + +// --------------------------------------------------------------------------- +async function main() { + // A fake engine: launches, swallows stdin, never answers — models an + // outdated/incompatible local CLI, the silent-hang failure mode. + const fakeEngine = path.join(mkdtempSync(path.join(tmpdir(), 'fake-engine-')), 'fake-claude.js'); + writeFileSync(fakeEngine, 'process.stdin.resume(); /* never respond */\n'); + + const { appRoot, electronBin } = findPackagedApp(); + console.log(`packaged app: ${appRoot}`); + + checkStaging(appRoot); + + console.log('\n[2/3] packaged adapter handshake'); + for (const pkg of ADAPTERS) { + const allowEngineError = pkg.includes('codex'); + await initializeRoundTrip(electronBin, adapterEntry(appRoot, pkg), pkg, fakeEngine, allowEngineError); + } + + await checkStartupTimeout(fakeEngine); + + console.log(failures ? `\n${failures} check(s) FAILED` : '\nall checks passed'); + process.exit(failures ? 1 : 0); +} + +main().catch((e) => { console.error(e); process.exit(1); }); diff --git a/apps/x/packages/core/src/code-mode/acp/client.ts b/apps/x/packages/core/src/code-mode/acp/client.ts index a6848544..b079b530 100644 --- a/apps/x/packages/core/src/code-mode/acp/client.ts +++ b/apps/x/packages/core/src/code-mode/acp/client.ts @@ -33,7 +33,12 @@ export interface AcpClientOptions { // never answers the handshake). Without a deadline that failure mode is an infinite // "(pending...)" with zero feedback. Prompts are intentionally NOT time-limited: // turns legitimately run for many minutes and may wait on user permission asks. -const STARTUP_TIMEOUT_MS = 60_000; +// Overridable via ROWBOAT_ACP_STARTUP_TIMEOUT_MS — used by the CI smoke test to +// avoid waiting the full minute, and an escape hatch for genuinely slow setups +// (e.g. many MCP servers configured in the engine's user settings). +const STARTUP_TIMEOUT_MS = Number(process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS) > 0 + ? Number(process.env.ROWBOAT_ACP_STARTUP_TIMEOUT_MS) + : 60_000; // Map a raw ACP session/update notification onto our small CodeRunEvent union. function toEvent(update: SessionUpdate): CodeRunEvent { diff --git a/apps/x/scripts/diagnose-code-mode.sh b/apps/x/scripts/diagnose-code-mode.sh new file mode 100644 index 00000000..16ebdd69 --- /dev/null +++ b/apps/x/scripts/diagnose-code-mode.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# One-shot code-mode diagnostics (macOS / Linux). +# +# When code mode misbehaves on your machine (stuck runs, "CLI not found", +# startup timeouts), run this and send the FULL output back — it collects +# everything needed to diagnose in one round trip: +# +# bash apps/x/scripts/diagnose-code-mode.sh +# +# Read-only except for one tiny `claude -p` probe (a single short API call). + +section() { printf '\n=== %s ===\n' "$1"; } + +# Portable timeout: mac has no `timeout` binary by default. +run_with_timeout() { + local secs="$1"; shift + "$@" & local pid=$! + ( sleep "$secs" && kill "$pid" 2>/dev/null ) & local killer=$! + wait "$pid" 2>/dev/null; local rc=$? + kill "$killer" 2>/dev/null + return $rc +} + +describe_binary() { # $1 = name + local p + p="$(/bin/sh -lc "command -v $1" 2>/dev/null)" + if [ -z "$p" ]; then + echo "$1: NOT on login-shell PATH" + return + fi + echo "$1: $p" + [ -L "$p" ] && echo " symlink -> $(readlink "$p")" + # Node-shebang script vs native binary — the distinction that matters for + # GUI launches (shebang scripts need `node` on the SPAWNING process's PATH). + local head1 + head1="$(head -c 64 "$p" 2>/dev/null | head -n 1 | tr -d '\0')" + case "$head1" in + '#!'*) echo " type: script ($head1)" ;; + *) echo " type: native binary" ;; + esac + echo " version: $(run_with_timeout 15 "$1" --version 2>&1 | head -n 1)" +} + +section "system" +echo "os: $(uname -sr) ($(uname -m))" +echo "shell: ${SHELL:-unset}" +echo "date: $(date)" + +section "engines" +describe_binary claude +describe_binary codex +describe_binary node + +section "PATH: login shell vs GUI" +echo "login-shell PATH:" +/bin/sh -lc 'echo " $PATH"' +if [ "$(uname -s)" = "Darwin" ]; then + echo "launchd (GUI) PATH:" + echo " $(launchctl getenv PATH 2>/dev/null || echo '(unset — GUI apps get the system default)')" +fi + +section "auth presence (no secrets printed)" +if [ "$(uname -s)" = "Darwin" ]; then + if security find-generic-password -s "Claude Code-credentials" >/dev/null 2>&1; then + echo "claude: keychain credential present" + else + echo "claude: NO keychain credential (signed in?)" + fi +fi +[ -f "$HOME/.claude/.credentials.json" ] && echo "claude: ~/.claude/.credentials.json present" +[ -f "$HOME/.codex/auth.json" ] && echo "codex: ~/.codex/auth.json present" || echo "codex: NO ~/.codex/auth.json" + +section "claude stream-json probe (what the app does under the hood)" +# A healthy claude prints a `system`/`init` JSON line within seconds. A hang or +# error here reproduces the in-app failure WITHOUT the app. +run_with_timeout 45 claude -p "reply with exactly: ok" --output-format stream-json --verbose 2>&1 | head -n 3 +echo "(probe exit: $? — 143 means it hung and was killed after 45s)" + +section "newest SDK debug log (~/.claude/debug)" +latest="$(ls -t "$HOME/.claude/debug"/sdk-*.txt 2>/dev/null | head -n 1)" +if [ -n "$latest" ]; then + echo "$latest:" + tail -n 40 "$latest" +else + echo "(none found)" +fi + +printf '\ndone — send everything above.\n'