mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-09 19:45:17 +02:00
fix(code-mode): harden ACP engine — turn-scoped connections, chip-authoritative agent, reliable stop
Three robustness fixes that co-modify manager.runPrompt and the code_agent_run tool, so they land together: - Lifecycle: scope each ACP adapter connection to the agent turn. Dispose it a short grace (60s) after the turn ends instead of holding it for the app's life; the next turn resumes via session/load (both agents support it). Wire disposeAll() on app quit (was dead code). Fixes the unbounded per-chat leak of booted agent processes. - Agent selection: make the composer chip the source of truth. Thread codeMode into ToolContext; code_agent_run uses it instead of the model's guessed `agent` arg, which anchored on the thread's earlier agent and ignored a chip change. Prompts updated to match; the run is labelled by the agent that actually ran. - Stop/abort: guarantee a stopped turn unwinds. On abort the manager sends ACP session/cancel, then force-kills the adapter after a 2s grace and resolves the turn as cancelled — a wedged adapter can no longer hang the run and lock the chat. code_agent_run returns a clean cancelled result.
This commit is contained in:
parent
2fef77416f
commit
dda73d0daa
8 changed files with 150 additions and 34 deletions
|
|
@ -40,7 +40,8 @@ import started from "electron-squirrel-startup";
|
|||
import { execSync, exec, execFileSync } from "node:child_process";
|
||||
import { promisify } from "node:util";
|
||||
import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js";
|
||||
import { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
|
||||
import container, { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
|
||||
import type { CodeModeManager } from "@x/core/dist/code-mode/acp/manager.js";
|
||||
import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js";
|
||||
import { setupBrowserEventForwarding } from "./browser/ipc.js";
|
||||
import { ElectronBrowserControlService } from "./browser/control-service.js";
|
||||
|
|
@ -416,6 +417,12 @@ app.on("before-quit", () => {
|
|||
stopWorkspaceWatcher();
|
||||
stopRunsWatcher();
|
||||
stopServicesWatcher();
|
||||
// Tear down any live ACP coding-agent adapter processes so they don't outlive the app.
|
||||
try {
|
||||
container.resolve<CodeModeManager>('codeModeManager').disposeAll();
|
||||
} catch {
|
||||
// nothing live to dispose
|
||||
}
|
||||
shutdownLocalSites().catch((error) => {
|
||||
console.error('[LocalSites] Failed to shut down cleanly:', error);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -230,7 +230,12 @@ export function CodingRunBlock({
|
|||
onOpenChange: (open: boolean) => void
|
||||
onPermissionDecision: (decision: PermissionDecision) => void
|
||||
}) {
|
||||
const agent = (item.input as { agent?: string } | undefined)?.agent
|
||||
// Prefer the agent the backend actually ran (the chip) once the run returns; fall
|
||||
// back to the requested input agent while it's still in flight. Never trust only the
|
||||
// model's input — it can pass a stale agent the backend overrode with the chip.
|
||||
const agent =
|
||||
(item.result as { agent?: string } | undefined)?.agent ??
|
||||
(item.input as { agent?: string } | undefined)?.agent
|
||||
const title = AGENT_LABEL[agent ?? ''] ?? 'Coding agent'
|
||||
return (
|
||||
<>
|
||||
|
|
|
|||
|
|
@ -1255,6 +1255,7 @@ export async function* streamAgent({
|
|||
signal,
|
||||
abortRegistry,
|
||||
publish: (event) => bus.publish(event),
|
||||
codeMode,
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
@ -1402,17 +1403,15 @@ Do not announce the work directory unless it's relevant. Just use it.`;
|
|||
if (codeMode) {
|
||||
loopLogger.log('code mode enabled, injecting coding-agent context', codeMode);
|
||||
const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex';
|
||||
const otherAgent = codeMode === 'claude' ? 'codex' : 'claude';
|
||||
const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code';
|
||||
instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay}
|
||||
The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn.
|
||||
instructionsWithDateTime += `\n\n# Code Mode (Active) — Agent: ${agentDisplay}
|
||||
The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …").
|
||||
|
||||
**The user can override the agent at any time, two ways:**
|
||||
1. By toggling the chip in the composer (preferred).
|
||||
2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that — use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness.
|
||||
The chip is the single source of truth for which agent runs:
|
||||
- Do NOT carry over a different agent from earlier in this thread — even if a previous run used the other agent, use **${agentDisplay}** now.
|
||||
- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip.
|
||||
|
||||
**How to run coding work — call the \`code_agent_run\` tool** with:
|
||||
- \`agent\`: \`${codeMode}\` by default (or the in-chat override above).
|
||||
- \`agent\`: \`${codeMode}\` (always — match the chip).
|
||||
- \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once).
|
||||
- \`prompt\`: a clear, self-contained coding instruction.
|
||||
|
||||
|
|
|
|||
|
|
@ -48,9 +48,7 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent?
|
|||
2. The path from a "# User Work Directory" block in your context.
|
||||
3. Ask once in plain text: "Which folder should I work in?"
|
||||
|
||||
**Pick the agent** (\`claude\` or \`codex\`), in priority order:
|
||||
- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it.
|
||||
- The agent from the "# Code Mode (Active)" block / Step 1 choice.
|
||||
**Pick the agent** (\`claude\` or \`codex\`): use the agent from the "# Code Mode (Active)" block (the composer chip) / the Step 1 choice. The chip is authoritative — do NOT carry over a different agent from earlier in this thread, and do NOT switch on an in-chat text request ("use codex"); tell the user to toggle the chip instead.
|
||||
|
||||
**State your intent in one line, then call the tool immediately — do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like:
|
||||
|
||||
|
|
@ -71,7 +69,7 @@ code_agent_run({
|
|||
- Mention constraints (language, framework, style).
|
||||
- Expand short user requests into clear, actionable instructions.
|
||||
|
||||
**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context.
|
||||
**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` and the chip's current agent. The session resumes automatically — do NOT start over or re-explain prior context.
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -811,7 +811,7 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
code_agent_run: {
|
||||
description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.',
|
||||
inputSchema: z.object({
|
||||
agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'),
|
||||
agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Set this to the active code-mode chip agent. Note: when the chip is set, the backend uses the chip agent regardless of this value — this only takes effect in the ask-human flow where no chip is set.'),
|
||||
cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'),
|
||||
prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'),
|
||||
}),
|
||||
|
|
@ -819,6 +819,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
if (!ctx) {
|
||||
return { success: false, message: 'code_agent_run requires run context (runId / streaming).' };
|
||||
}
|
||||
// The composer chip is the source of truth for the agent. The model's `agent`
|
||||
// argument is only a fallback for the ask-human flow (code mode not active, no
|
||||
// chip set) — otherwise it can anchor on the thread's earlier agent and ignore a
|
||||
// chip change. Honor the chip so switching it deterministically switches agents.
|
||||
const effectiveAgent = ctx.codeMode ?? agent;
|
||||
const manager = container.resolve<CodeModeManager>('codeModeManager');
|
||||
const registry = container.resolve<CodePermissionRegistry>('codePermissionRegistry');
|
||||
|
||||
|
|
@ -831,11 +836,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
// fall back to 'ask'
|
||||
}
|
||||
|
||||
// Cancel the coding turn (and unblock any pending approval) if the run is stopped.
|
||||
const onAbort = () => {
|
||||
manager.cancel(ctx.runId).catch(() => {});
|
||||
registry.cancelRun(ctx.runId);
|
||||
};
|
||||
// On stop, unblock any pending approval card so the broker stops waiting for
|
||||
// an answer that will never come. The ACP cancel + force-kill backstop that
|
||||
// actually ends the turn is handled inside manager.runPrompt via the signal
|
||||
// we pass below.
|
||||
const onAbort = () => registry.cancelRun(ctx.runId);
|
||||
if (ctx.signal.aborted) onAbort();
|
||||
else ctx.signal.addEventListener('abort', onAbort, { once: true });
|
||||
|
||||
|
|
@ -844,10 +849,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
try {
|
||||
const result = await manager.runPrompt({
|
||||
runId: ctx.runId,
|
||||
agent,
|
||||
agent: effectiveAgent,
|
||||
cwd,
|
||||
prompt,
|
||||
policy,
|
||||
signal: ctx.signal,
|
||||
onEvent: (event) => {
|
||||
if (event.type === 'message' && event.role === 'agent') finalText += event.text;
|
||||
if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f);
|
||||
|
|
@ -873,10 +879,23 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
return {
|
||||
success: result.stopReason === 'end_turn',
|
||||
stopReason: result.stopReason,
|
||||
// The agent that actually ran (the chip), so the UI can label the run
|
||||
// authoritatively rather than trusting the model's `agent` argument.
|
||||
agent: effectiveAgent,
|
||||
summary: finalText.trim(),
|
||||
changedFiles: [...changedFiles],
|
||||
};
|
||||
} catch (error) {
|
||||
// A stop mid-run isn't a failure — report it as a clean cancellation.
|
||||
if (ctx.signal.aborted) {
|
||||
return {
|
||||
success: false,
|
||||
stopReason: 'cancelled',
|
||||
agent: effectiveAgent,
|
||||
summary: finalText.trim(),
|
||||
changedFiles: [...changedFiles],
|
||||
};
|
||||
}
|
||||
return {
|
||||
success: false,
|
||||
message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,10 @@ export interface ToolContext {
|
|||
signal: AbortSignal;
|
||||
abortRegistry: IAbortRegistry;
|
||||
publish: (event: z.infer<typeof RunEvent>) => Promise<void>;
|
||||
// The composer code-mode chip for the message that triggered this turn. When set,
|
||||
// it is the authoritative coding agent — code_agent_run uses it rather than the
|
||||
// agent the model guessed, so switching the chip deterministically switches agents.
|
||||
codeMode?: 'claude' | 'codex' | null;
|
||||
}
|
||||
|
||||
async function execMcpTool(agentTool: z.infer<typeof ToolAttachment> & { type: "mcp" }, input: Record<string, unknown>): Promise<unknown> {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ export interface RunPromptArgs {
|
|||
ask: (ask: PermissionAsk) => Promise<PermissionDecision>;
|
||||
/** Stream sink for this prompt's run. */
|
||||
onEvent: (event: CodeRunEvent) => void;
|
||||
/** Aborts the turn on stop; the manager cancels then force-kills the adapter. */
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
interface ActiveRun {
|
||||
|
|
@ -20,16 +22,36 @@ interface ActiveRun {
|
|||
sessionId: string;
|
||||
agent: CodingAgent;
|
||||
cwd: string;
|
||||
// Prompts currently streaming on this connection. Disposal is deferred while
|
||||
// this is > 0 so we never tear down a connection mid-turn.
|
||||
inflight: number;
|
||||
// Pending grace-window teardown, cleared if the run is reused before it fires.
|
||||
disposeTimer?: ReturnType<typeof setTimeout>;
|
||||
}
|
||||
|
||||
// Drives ACP coding sessions, one live connection per chat run. Reuses a warm
|
||||
// connection for follow-up prompts in the same chat; resumes a persisted session
|
||||
// (via session/load) on the first prompt after an app restart.
|
||||
// How long a connection stays warm after its last turn ends before we tear it down.
|
||||
// A coding "turn" is one code_agent_run tool call; we keep the adapter briefly so
|
||||
// back-to-back calls within one copilot turn (edit -> test -> fix) and quick user
|
||||
// follow-ups reuse the warm connection instead of cold-starting. Set to 0 for strict
|
||||
// per-turn teardown. Context is never lost either way: the next turn resumes the
|
||||
// persisted session via session/load.
|
||||
const DISPOSE_GRACE_MS = 60_000;
|
||||
|
||||
// On stop, how long to let the adapter cancel gracefully (ACP session/cancel) before
|
||||
// we force-kill it. The kill guarantees the turn unwinds even if the adapter ignores
|
||||
// cancel or is blocked — otherwise a hung prompt would lock the chat indefinitely.
|
||||
const CANCEL_GRACE_MS = 2_000;
|
||||
|
||||
// Drives ACP coding sessions. A connection's lifetime is scoped to the agent turn
|
||||
// (one code_agent_run): it is torn down a short grace window after the turn ends, so
|
||||
// idle chats hold no adapter processes. Turns that land within the grace window reuse
|
||||
// the warm connection; anything colder (grace elapsed, or after an app restart)
|
||||
// resumes the persisted session via session/load.
|
||||
export class CodeModeManager {
|
||||
private readonly runs = new Map<string, ActiveRun>();
|
||||
|
||||
async runPrompt(args: RunPromptArgs): Promise<RunPromptResult> {
|
||||
const { runId, agent, cwd, prompt, policy, ask, onEvent } = args;
|
||||
const { runId, agent, cwd, prompt, policy, ask, onEvent, signal } = args;
|
||||
|
||||
const broker = new PermissionBroker({
|
||||
policy,
|
||||
|
|
@ -38,22 +60,82 @@ export class CodeModeManager {
|
|||
});
|
||||
|
||||
const run = await this.ensureRun(runId, agent, cwd, broker, onEvent);
|
||||
const res = await run.client.prompt(run.sessionId, prompt);
|
||||
return { stopReason: res.stopReason, sessionId: run.sessionId };
|
||||
}
|
||||
run.inflight++;
|
||||
|
||||
async cancel(runId: string): Promise<void> {
|
||||
const run = this.runs.get(runId);
|
||||
if (run) await run.client.cancel(run.sessionId);
|
||||
let graceTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
let onAbort: (() => void) | undefined;
|
||||
try {
|
||||
const promptP = run.client.prompt(run.sessionId, prompt);
|
||||
// We may stop awaiting this prompt below (force-kill on stop rejects it);
|
||||
// attach a no-op catch so the orphaned rejection isn't flagged.
|
||||
promptP.catch(() => {});
|
||||
|
||||
// Stop handling: on abort, ask the adapter to cancel; if it hasn't unwound
|
||||
// within the grace, force-kill it and resolve as cancelled. This guarantees
|
||||
// the turn ends even if the adapter ignores cancel or is wedged — a hung
|
||||
// prompt would otherwise lock the chat (no run-stopped, composer disabled).
|
||||
const cancelledP = new Promise<{ stopReason: string }>((resolve) => {
|
||||
if (!signal) return;
|
||||
onAbort = () => {
|
||||
run.client.cancel(run.sessionId).catch(() => {});
|
||||
graceTimer = setTimeout(() => {
|
||||
this.dispose(runId);
|
||||
resolve({ stopReason: 'cancelled' });
|
||||
}, CANCEL_GRACE_MS);
|
||||
graceTimer.unref?.();
|
||||
};
|
||||
if (signal.aborted) onAbort();
|
||||
else signal.addEventListener('abort', onAbort, { once: true });
|
||||
});
|
||||
|
||||
const res = await Promise.race([promptP, cancelledP]);
|
||||
return { stopReason: res.stopReason, sessionId: run.sessionId };
|
||||
} catch (e) {
|
||||
// A kill-induced "connection closed" during a stop is an expected cancel.
|
||||
if (signal?.aborted) return { stopReason: 'cancelled', sessionId: run.sessionId };
|
||||
throw e;
|
||||
} finally {
|
||||
if (signal && onAbort) signal.removeEventListener('abort', onAbort);
|
||||
if (graceTimer) clearTimeout(graceTimer);
|
||||
run.inflight--;
|
||||
this.scheduleDispose(runId);
|
||||
}
|
||||
}
|
||||
|
||||
dispose(runId: string): void {
|
||||
const run = this.runs.get(runId);
|
||||
if (!run) return;
|
||||
this.cancelDispose(run);
|
||||
run.client.dispose();
|
||||
this.runs.delete(runId);
|
||||
}
|
||||
|
||||
// Tear down the connection a grace window after its last turn ends. Skipped while a
|
||||
// prompt is still streaming, and re-armed when each turn ends so the window measures
|
||||
// idle-since-last-activity. With grace 0 we dispose immediately (strict per-turn).
|
||||
private scheduleDispose(runId: string): void {
|
||||
const run = this.runs.get(runId);
|
||||
if (!run || run.inflight > 0) return;
|
||||
this.cancelDispose(run);
|
||||
if (DISPOSE_GRACE_MS <= 0) {
|
||||
this.dispose(runId);
|
||||
return;
|
||||
}
|
||||
run.disposeTimer = setTimeout(() => {
|
||||
const r = this.runs.get(runId);
|
||||
if (r && r.inflight === 0) this.dispose(runId);
|
||||
}, DISPOSE_GRACE_MS);
|
||||
// A pending teardown timer must not keep the process alive at quit.
|
||||
run.disposeTimer.unref?.();
|
||||
}
|
||||
|
||||
private cancelDispose(run: ActiveRun): void {
|
||||
if (run.disposeTimer) {
|
||||
clearTimeout(run.disposeTimer);
|
||||
run.disposeTimer = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
disposeAll(): void {
|
||||
for (const runId of [...this.runs.keys()]) this.dispose(runId);
|
||||
}
|
||||
|
|
@ -69,6 +151,7 @@ export class CodeModeManager {
|
|||
): Promise<ActiveRun> {
|
||||
const existing = this.runs.get(runId);
|
||||
if (existing && existing.agent === agent && existing.cwd === cwd) {
|
||||
this.cancelDispose(existing); // reused before its grace window elapsed
|
||||
existing.client.setHandlers(broker, onEvent);
|
||||
return existing;
|
||||
}
|
||||
|
|
@ -78,7 +161,7 @@ export class CodeModeManager {
|
|||
await client.start();
|
||||
|
||||
const sessionId = await this.openSession(runId, agent, cwd, client);
|
||||
const run: ActiveRun = { client, sessionId, agent, cwd };
|
||||
const run: ActiveRun = { client, sessionId, agent, cwd, inflight: 0 };
|
||||
this.runs.set(runId, run);
|
||||
return run;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,8 +46,9 @@ container.register({
|
|||
agentScheduleStateRepo: asClass<IAgentScheduleStateRepo>(FSAgentScheduleStateRepo).singleton(),
|
||||
slackConfigRepo: asClass<ISlackConfigRepo>(FSSlackConfigRepo).singleton(),
|
||||
|
||||
// ACP code-mode engine: the manager holds live agent connections for the app
|
||||
// lifetime (warm sessions across messages); the registry brokers mid-run approvals.
|
||||
// ACP code-mode engine: the manager holds a live agent connection per chat only
|
||||
// around an active turn (torn down after a short idle grace; resumed via
|
||||
// session/load); the registry brokers mid-run approvals.
|
||||
codeModeManager: asClass(CodeModeManager).singleton(),
|
||||
codePermissionRegistry: asClass(CodePermissionRegistry).singleton(),
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue