fix(code-mode): harden ACP engine — turn-scoped connections, chip-authoritative agent, reliable stop

Three robustness fixes that co-modify manager.runPrompt and the code_agent_run
tool, so they land together:

- Lifecycle: scope each ACP adapter connection to the agent turn. Dispose it a
  short grace (60s) after the turn ends instead of holding it for the app's life;
  the next turn resumes via session/load (both agents support it). Wire
  disposeAll() on app quit (was dead code). Fixes the unbounded per-chat leak of
  booted agent processes.

- Agent selection: make the composer chip the source of truth. Thread codeMode
  into ToolContext; code_agent_run uses it instead of the model's guessed `agent`
  arg, which anchored on the thread's earlier agent and ignored a chip change.
  Prompts updated to match; the run is labelled by the agent that actually ran.

- Stop/abort: guarantee a stopped turn unwinds. On abort the manager sends ACP
  session/cancel, then force-kills the adapter after a 2s grace and resolves the
  turn as cancelled — a wedged adapter can no longer hang the run and lock the
  chat. code_agent_run returns a clean cancelled result.
This commit is contained in:
Gagancreates 2026-06-04 01:46:28 +05:30
parent 2fef77416f
commit dda73d0daa
8 changed files with 150 additions and 34 deletions

View file

@ -40,7 +40,8 @@ import started from "electron-squirrel-startup";
import { execSync, exec, execFileSync } from "node:child_process";
import { promisify } from "node:util";
import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js";
import { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
import container, { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
import type { CodeModeManager } from "@x/core/dist/code-mode/acp/manager.js";
import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js";
import { setupBrowserEventForwarding } from "./browser/ipc.js";
import { ElectronBrowserControlService } from "./browser/control-service.js";
@ -416,6 +417,12 @@ app.on("before-quit", () => {
stopWorkspaceWatcher();
stopRunsWatcher();
stopServicesWatcher();
// Tear down any live ACP coding-agent adapter processes so they don't outlive the app.
try {
container.resolve<CodeModeManager>('codeModeManager').disposeAll();
} catch {
// nothing live to dispose
}
shutdownLocalSites().catch((error) => {
console.error('[LocalSites] Failed to shut down cleanly:', error);
});

View file

@ -230,7 +230,12 @@ export function CodingRunBlock({
onOpenChange: (open: boolean) => void
onPermissionDecision: (decision: PermissionDecision) => void
}) {
const agent = (item.input as { agent?: string } | undefined)?.agent
// Prefer the agent the backend actually ran (the chip) once the run returns; fall
// back to the requested input agent while it's still in flight. Never trust only the
// model's input — it can pass a stale agent the backend overrode with the chip.
const agent =
(item.result as { agent?: string } | undefined)?.agent ??
(item.input as { agent?: string } | undefined)?.agent
const title = AGENT_LABEL[agent ?? ''] ?? 'Coding agent'
return (
<>

View file

@ -1255,6 +1255,7 @@ export async function* streamAgent({
signal,
abortRegistry,
publish: (event) => bus.publish(event),
codeMode,
});
}
} catch (error) {
@ -1402,17 +1403,15 @@ Do not announce the work directory unless it's relevant. Just use it.`;
if (codeMode) {
loopLogger.log('code mode enabled, injecting coding-agent context', codeMode);
const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex';
const otherAgent = codeMode === 'claude' ? 'codex' : 'claude';
const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code';
instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay}
The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn.
instructionsWithDateTime += `\n\n# Code Mode (Active) — Agent: ${agentDisplay}
The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …").
**The user can override the agent at any time, two ways:**
1. By toggling the chip in the composer (preferred).
2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness.
The chip is the single source of truth for which agent runs:
- Do NOT carry over a different agent from earlier in this thread even if a previous run used the other agent, use **${agentDisplay}** now.
- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip.
**How to run coding work call the \`code_agent_run\` tool** with:
- \`agent\`: \`${codeMode}\` by default (or the in-chat override above).
- \`agent\`: \`${codeMode}\` (always — match the chip).
- \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once).
- \`prompt\`: a clear, self-contained coding instruction.

View file

@ -48,9 +48,7 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent?
2. The path from a "# User Work Directory" block in your context.
3. Ask once in plain text: "Which folder should I work in?"
**Pick the agent** (\`claude\` or \`codex\`), in priority order:
- An explicit in-chat override from the user this turn ("use codex", "switch to claude") honor it.
- The agent from the "# Code Mode (Active)" block / Step 1 choice.
**Pick the agent** (\`claude\` or \`codex\`): use the agent from the "# Code Mode (Active)" block (the composer chip) / the Step 1 choice. The chip is authoritative — do NOT carry over a different agent from earlier in this thread, and do NOT switch on an in-chat text request ("use codex"); tell the user to toggle the chip instead.
**State your intent in one line, then call the tool immediately do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like:
@ -71,7 +69,7 @@ code_agent_run({
- Mention constraints (language, framework, style).
- Expand short user requests into clear, actionable instructions.
**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context.
**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` and the chip's current agent. The session resumes automatically — do NOT start over or re-explain prior context.
---

View file

@ -811,7 +811,7 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
code_agent_run: {
description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.',
inputSchema: z.object({
agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'),
agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Set this to the active code-mode chip agent. Note: when the chip is set, the backend uses the chip agent regardless of this value — this only takes effect in the ask-human flow where no chip is set.'),
cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'),
prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'),
}),
@ -819,6 +819,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
if (!ctx) {
return { success: false, message: 'code_agent_run requires run context (runId / streaming).' };
}
// The composer chip is the source of truth for the agent. The model's `agent`
// argument is only a fallback for the ask-human flow (code mode not active, no
// chip set) — otherwise it can anchor on the thread's earlier agent and ignore a
// chip change. Honor the chip so switching it deterministically switches agents.
const effectiveAgent = ctx.codeMode ?? agent;
const manager = container.resolve<CodeModeManager>('codeModeManager');
const registry = container.resolve<CodePermissionRegistry>('codePermissionRegistry');
@ -831,11 +836,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
// fall back to 'ask'
}
// Cancel the coding turn (and unblock any pending approval) if the run is stopped.
const onAbort = () => {
manager.cancel(ctx.runId).catch(() => {});
registry.cancelRun(ctx.runId);
};
// On stop, unblock any pending approval card so the broker stops waiting for
// an answer that will never come. The ACP cancel + force-kill backstop that
// actually ends the turn is handled inside manager.runPrompt via the signal
// we pass below.
const onAbort = () => registry.cancelRun(ctx.runId);
if (ctx.signal.aborted) onAbort();
else ctx.signal.addEventListener('abort', onAbort, { once: true });
@ -844,10 +849,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
try {
const result = await manager.runPrompt({
runId: ctx.runId,
agent,
agent: effectiveAgent,
cwd,
prompt,
policy,
signal: ctx.signal,
onEvent: (event) => {
if (event.type === 'message' && event.role === 'agent') finalText += event.text;
if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f);
@ -873,10 +879,23 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
return {
success: result.stopReason === 'end_turn',
stopReason: result.stopReason,
// The agent that actually ran (the chip), so the UI can label the run
// authoritatively rather than trusting the model's `agent` argument.
agent: effectiveAgent,
summary: finalText.trim(),
changedFiles: [...changedFiles],
};
} catch (error) {
// A stop mid-run isn't a failure — report it as a clean cancellation.
if (ctx.signal.aborted) {
return {
success: false,
stopReason: 'cancelled',
agent: effectiveAgent,
summary: finalText.trim(),
changedFiles: [...changedFiles],
};
}
return {
success: false,
message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`,

View file

@ -14,6 +14,10 @@ export interface ToolContext {
signal: AbortSignal;
abortRegistry: IAbortRegistry;
publish: (event: z.infer<typeof RunEvent>) => Promise<void>;
// The composer code-mode chip for the message that triggered this turn. When set,
// it is the authoritative coding agent — code_agent_run uses it rather than the
// agent the model guessed, so switching the chip deterministically switches agents.
codeMode?: 'claude' | 'codex' | null;
}
async function execMcpTool(agentTool: z.infer<typeof ToolAttachment> & { type: "mcp" }, input: Record<string, unknown>): Promise<unknown> {

View file

@ -13,6 +13,8 @@ export interface RunPromptArgs {
ask: (ask: PermissionAsk) => Promise<PermissionDecision>;
/** Stream sink for this prompt's run. */
onEvent: (event: CodeRunEvent) => void;
/** Aborts the turn on stop; the manager cancels then force-kills the adapter. */
signal?: AbortSignal;
}
interface ActiveRun {
@ -20,16 +22,36 @@ interface ActiveRun {
sessionId: string;
agent: CodingAgent;
cwd: string;
// Prompts currently streaming on this connection. Disposal is deferred while
// this is > 0 so we never tear down a connection mid-turn.
inflight: number;
// Pending grace-window teardown, cleared if the run is reused before it fires.
disposeTimer?: ReturnType<typeof setTimeout>;
}
// Drives ACP coding sessions, one live connection per chat run. Reuses a warm
// connection for follow-up prompts in the same chat; resumes a persisted session
// (via session/load) on the first prompt after an app restart.
// How long a connection stays warm after its last turn ends before we tear it down.
// A coding "turn" is one code_agent_run tool call; we keep the adapter briefly so
// back-to-back calls within one copilot turn (edit -> test -> fix) and quick user
// follow-ups reuse the warm connection instead of cold-starting. Set to 0 for strict
// per-turn teardown. Context is never lost either way: the next turn resumes the
// persisted session via session/load.
const DISPOSE_GRACE_MS = 60_000;
// On stop, how long to let the adapter cancel gracefully (ACP session/cancel) before
// we force-kill it. The kill guarantees the turn unwinds even if the adapter ignores
// cancel or is blocked — otherwise a hung prompt would lock the chat indefinitely.
const CANCEL_GRACE_MS = 2_000;
// Drives ACP coding sessions. A connection's lifetime is scoped to the agent turn
// (one code_agent_run): it is torn down a short grace window after the turn ends, so
// idle chats hold no adapter processes. Turns that land within the grace window reuse
// the warm connection; anything colder (grace elapsed, or after an app restart)
// resumes the persisted session via session/load.
export class CodeModeManager {
private readonly runs = new Map<string, ActiveRun>();
async runPrompt(args: RunPromptArgs): Promise<RunPromptResult> {
const { runId, agent, cwd, prompt, policy, ask, onEvent } = args;
const { runId, agent, cwd, prompt, policy, ask, onEvent, signal } = args;
const broker = new PermissionBroker({
policy,
@ -38,22 +60,82 @@ export class CodeModeManager {
});
const run = await this.ensureRun(runId, agent, cwd, broker, onEvent);
const res = await run.client.prompt(run.sessionId, prompt);
return { stopReason: res.stopReason, sessionId: run.sessionId };
}
run.inflight++;
async cancel(runId: string): Promise<void> {
const run = this.runs.get(runId);
if (run) await run.client.cancel(run.sessionId);
let graceTimer: ReturnType<typeof setTimeout> | undefined;
let onAbort: (() => void) | undefined;
try {
const promptP = run.client.prompt(run.sessionId, prompt);
// We may stop awaiting this prompt below (force-kill on stop rejects it);
// attach a no-op catch so the orphaned rejection isn't flagged.
promptP.catch(() => {});
// Stop handling: on abort, ask the adapter to cancel; if it hasn't unwound
// within the grace, force-kill it and resolve as cancelled. This guarantees
// the turn ends even if the adapter ignores cancel or is wedged — a hung
// prompt would otherwise lock the chat (no run-stopped, composer disabled).
const cancelledP = new Promise<{ stopReason: string }>((resolve) => {
if (!signal) return;
onAbort = () => {
run.client.cancel(run.sessionId).catch(() => {});
graceTimer = setTimeout(() => {
this.dispose(runId);
resolve({ stopReason: 'cancelled' });
}, CANCEL_GRACE_MS);
graceTimer.unref?.();
};
if (signal.aborted) onAbort();
else signal.addEventListener('abort', onAbort, { once: true });
});
const res = await Promise.race([promptP, cancelledP]);
return { stopReason: res.stopReason, sessionId: run.sessionId };
} catch (e) {
// A kill-induced "connection closed" during a stop is an expected cancel.
if (signal?.aborted) return { stopReason: 'cancelled', sessionId: run.sessionId };
throw e;
} finally {
if (signal && onAbort) signal.removeEventListener('abort', onAbort);
if (graceTimer) clearTimeout(graceTimer);
run.inflight--;
this.scheduleDispose(runId);
}
}
dispose(runId: string): void {
const run = this.runs.get(runId);
if (!run) return;
this.cancelDispose(run);
run.client.dispose();
this.runs.delete(runId);
}
// Tear down the connection a grace window after its last turn ends. Skipped while a
// prompt is still streaming, and re-armed when each turn ends so the window measures
// idle-since-last-activity. With grace 0 we dispose immediately (strict per-turn).
private scheduleDispose(runId: string): void {
const run = this.runs.get(runId);
if (!run || run.inflight > 0) return;
this.cancelDispose(run);
if (DISPOSE_GRACE_MS <= 0) {
this.dispose(runId);
return;
}
run.disposeTimer = setTimeout(() => {
const r = this.runs.get(runId);
if (r && r.inflight === 0) this.dispose(runId);
}, DISPOSE_GRACE_MS);
// A pending teardown timer must not keep the process alive at quit.
run.disposeTimer.unref?.();
}
private cancelDispose(run: ActiveRun): void {
if (run.disposeTimer) {
clearTimeout(run.disposeTimer);
run.disposeTimer = undefined;
}
}
disposeAll(): void {
for (const runId of [...this.runs.keys()]) this.dispose(runId);
}
@ -69,6 +151,7 @@ export class CodeModeManager {
): Promise<ActiveRun> {
const existing = this.runs.get(runId);
if (existing && existing.agent === agent && existing.cwd === cwd) {
this.cancelDispose(existing); // reused before its grace window elapsed
existing.client.setHandlers(broker, onEvent);
return existing;
}
@ -78,7 +161,7 @@ export class CodeModeManager {
await client.start();
const sessionId = await this.openSession(runId, agent, cwd, client);
const run: ActiveRun = { client, sessionId, agent, cwd };
const run: ActiveRun = { client, sessionId, agent, cwd, inflight: 0 };
this.runs.set(runId, run);
return run;
}

View file

@ -46,8 +46,9 @@ container.register({
agentScheduleStateRepo: asClass<IAgentScheduleStateRepo>(FSAgentScheduleStateRepo).singleton(),
slackConfigRepo: asClass<ISlackConfigRepo>(FSSlackConfigRepo).singleton(),
// ACP code-mode engine: the manager holds live agent connections for the app
// lifetime (warm sessions across messages); the registry brokers mid-run approvals.
// ACP code-mode engine: the manager holds a live agent connection per chat only
// around an active turn (torn down after a short idle grace; resumed via
// session/load); the registry brokers mid-run approvals.
codeModeManager: asClass(CodeModeManager).singleton(),
codePermissionRegistry: asClass(CodePermissionRegistry).singleton(),
});