fix(code-mode): harden ACP engine — turn-scoped connections, chip-authoritative agent, reliable stop

Three robustness fixes that co-modify manager.runPrompt and the code_agent_run tool, so they land together: - Lifecycle: scope each ACP adapter connection to the agent turn. Dispose it a short grace (60s) after the turn ends instead of holding it for the app's life; the next turn resumes via session/load (both agents support it). Wire disposeAll() on app quit (was dead code). Fixes the unbounded per-chat leak of booted agent processes. - Agent selection: make the composer chip the source of truth. Thread codeMode into ToolContext; code_agent_run uses it instead of the model's guessed `agent` arg, which anchored on the thread's earlier agent and ignored a chip change. Prompts updated to match; the run is labelled by the agent that actually ran. - Stop/abort: guarantee a stopped turn unwinds. On abort the manager sends ACP session/cancel, then force-kills the adapter after a 2s grace and resolves the turn as cancelled — a wedged adapter can no longer hang the run and lock the chat. code_agent_run returns a clean cancelled result.
2026-06-09 19:45:17 +02:00 · 2026-06-04 01:46:28 +05:30 · 2026-06-04 01:46:28 +05:30 · dda73d0daa
commit dda73d0daa
parent 2fef77416f
8 changed files with 150 additions and 34 deletions
--- a/apps/x/apps/main/src/main.ts
+++ b/apps/x/apps/main/src/main.ts
@ -40,7 +40,8 @@ import started from "electron-squirrel-startup";
 import { execSync, exec, execFileSync } from "node:child_process";
 import { promisify } from "node:util";
 import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js";
-import { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
+import container, { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
+import type { CodeModeManager } from "@x/core/dist/code-mode/acp/manager.js";
 import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js";
 import { setupBrowserEventForwarding } from "./browser/ipc.js";
 import { ElectronBrowserControlService } from "./browser/control-service.js";
@ -416,6 +417,12 @@ app.on("before-quit", () => {
  stopWorkspaceWatcher();
  stopRunsWatcher();
  stopServicesWatcher();
+  // Tear down any live ACP coding-agent adapter processes so they don't outlive the app.
+  try {
+    container.resolve<CodeModeManager>('codeModeManager').disposeAll();
+  } catch {
+    // nothing live to dispose
+  }
  shutdownLocalSites().catch((error) => {
    console.error('[LocalSites] Failed to shut down cleanly:', error);
  });
--- a/apps/x/apps/renderer/src/components/coding-run.tsx
+++ b/apps/x/apps/renderer/src/components/coding-run.tsx
@ -230,7 +230,12 @@ export function CodingRunBlock({
  onOpenChange: (open: boolean) => void
  onPermissionDecision: (decision: PermissionDecision) => void
 }) {
-  const agent = (item.input as { agent?: string } | undefined)?.agent
+  // Prefer the agent the backend actually ran (the chip) once the run returns; fall
+  // back to the requested input agent while it's still in flight. Never trust only the
+  // model's input — it can pass a stale agent the backend overrode with the chip.
+  const agent =
+    (item.result as { agent?: string } | undefined)?.agent ??
+    (item.input as { agent?: string } | undefined)?.agent
  const title = AGENT_LABEL[agent ?? ''] ?? 'Coding agent'
  return (
    <>
--- a/apps/x/packages/core/src/agents/runtime.ts
+++ b/apps/x/packages/core/src/agents/runtime.ts
@ -1255,6 +1255,7 @@ export async function* streamAgent({
                    signal,
                    abortRegistry,
                    publish: (event) => bus.publish(event),
+                    codeMode,
                });
            }
            } catch (error) {
@ -1402,17 +1403,15 @@ Do not announce the work directory unless it's relevant. Just use it.`;
        if (codeMode) {
            loopLogger.log('code mode enabled, injecting coding-agent context', codeMode);
            const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex';
-            const otherAgent = codeMode === 'claude' ? 'codex' : 'claude';
-            const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code';
-            instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay}
-The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn.
+            instructionsWithDateTime += `\n\n# Code Mode (Active) — Agent: ${agentDisplay}
+The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …").

-**The user can override the agent at any time, two ways:**
-1. By toggling the chip in the composer (preferred).
-2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that — use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness.
+The chip is the single source of truth for which agent runs:
+- Do NOT carry over a different agent from earlier in this thread — even if a previous run used the other agent, use **${agentDisplay}** now.
+- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip.

 **How to run coding work — call the \`code_agent_run\` tool** with:
- \`agent\`: \`${codeMode}\` by default (or the in-chat override above).
+- \`agent\`: \`${codeMode}\` (always — match the chip).
 - \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once).
 - \`prompt\`: a clear, self-contained coding instruction.

--- a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts
+++ b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts
@ -48,9 +48,7 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent?
 2. The path from a "# User Work Directory" block in your context.
 3. Ask once in plain text: "Which folder should I work in?"

-**Pick the agent** (\`claude\` or \`codex\`), in priority order:
- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it.
- The agent from the "# Code Mode (Active)" block / Step 1 choice.
+**Pick the agent** (\`claude\` or \`codex\`): use the agent from the "# Code Mode (Active)" block (the composer chip) / the Step 1 choice. The chip is authoritative — do NOT carry over a different agent from earlier in this thread, and do NOT switch on an in-chat text request ("use codex"); tell the user to toggle the chip instead.

 **State your intent in one line, then call the tool immediately — do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like:

@ -71,7 +69,7 @@ code_agent_run({
 - Mention constraints (language, framework, style).
 - Expand short user requests into clear, actionable instructions.

-**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context.
+**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` and the chip's current agent. The session resumes automatically — do NOT start over or re-explain prior context.

 ---

--- a/apps/x/packages/core/src/application/lib/builtin-tools.ts
+++ b/apps/x/packages/core/src/application/lib/builtin-tools.ts
@ -811,7 +811,7 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
    code_agent_run: {
        description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.',
        inputSchema: z.object({
-            agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'),
+            agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Set this to the active code-mode chip agent. Note: when the chip is set, the backend uses the chip agent regardless of this value — this only takes effect in the ask-human flow where no chip is set.'),
            cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'),
            prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'),
        }),
@ -819,6 +819,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
            if (!ctx) {
                return { success: false, message: 'code_agent_run requires run context (runId / streaming).' };
            }
+            // The composer chip is the source of truth for the agent. The model's `agent`
+            // argument is only a fallback for the ask-human flow (code mode not active, no
+            // chip set) — otherwise it can anchor on the thread's earlier agent and ignore a
+            // chip change. Honor the chip so switching it deterministically switches agents.
+            const effectiveAgent = ctx.codeMode ?? agent;
            const manager = container.resolve<CodeModeManager>('codeModeManager');
            const registry = container.resolve<CodePermissionRegistry>('codePermissionRegistry');

@ -831,11 +836,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
                // fall back to 'ask'
            }

-            // Cancel the coding turn (and unblock any pending approval) if the run is stopped.
-            const onAbort = () => {
-                manager.cancel(ctx.runId).catch(() => {});
-                registry.cancelRun(ctx.runId);
-            };
+            // On stop, unblock any pending approval card so the broker stops waiting for
+            // an answer that will never come. The ACP cancel + force-kill backstop that
+            // actually ends the turn is handled inside manager.runPrompt via the signal
+            // we pass below.
+            const onAbort = () => registry.cancelRun(ctx.runId);
            if (ctx.signal.aborted) onAbort();
            else ctx.signal.addEventListener('abort', onAbort, { once: true });

@ -844,10 +849,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
            try {
                const result = await manager.runPrompt({
                    runId: ctx.runId,
-                    agent,
+                    agent: effectiveAgent,
                    cwd,
                    prompt,
                    policy,
+                    signal: ctx.signal,
                    onEvent: (event) => {
                        if (event.type === 'message' && event.role === 'agent') finalText += event.text;
                        if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f);
@ -873,10 +879,23 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
                return {
                    success: result.stopReason === 'end_turn',
                    stopReason: result.stopReason,
+                    // The agent that actually ran (the chip), so the UI can label the run
+                    // authoritatively rather than trusting the model's `agent` argument.
+                    agent: effectiveAgent,
                    summary: finalText.trim(),
                    changedFiles: [...changedFiles],
                };
            } catch (error) {
+                // A stop mid-run isn't a failure — report it as a clean cancellation.
+                if (ctx.signal.aborted) {
+                    return {
+                        success: false,
+                        stopReason: 'cancelled',
+                        agent: effectiveAgent,
+                        summary: finalText.trim(),
+                        changedFiles: [...changedFiles],
+                    };
+                }
                return {
                    success: false,
                    message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`,
--- a/apps/x/packages/core/src/application/lib/exec-tool.ts
+++ b/apps/x/packages/core/src/application/lib/exec-tool.ts
@ -14,6 +14,10 @@ export interface ToolContext {
    signal: AbortSignal;
    abortRegistry: IAbortRegistry;
    publish: (event: z.infer<typeof RunEvent>) => Promise<void>;
+    // The composer code-mode chip for the message that triggered this turn. When set,
+    // it is the authoritative coding agent — code_agent_run uses it rather than the
+    // agent the model guessed, so switching the chip deterministically switches agents.
+    codeMode?: 'claude' | 'codex' | null;
 }

 async function execMcpTool(agentTool: z.infer<typeof ToolAttachment> & { type: "mcp" }, input: Record<string, unknown>): Promise<unknown> {
--- a/apps/x/packages/core/src/code-mode/acp/manager.ts
+++ b/apps/x/packages/core/src/code-mode/acp/manager.ts
@ -13,6 +13,8 @@ export interface RunPromptArgs {
    ask: (ask: PermissionAsk) => Promise<PermissionDecision>;
    /** Stream sink for this prompt's run. */
    onEvent: (event: CodeRunEvent) => void;
+    /** Aborts the turn on stop; the manager cancels then force-kills the adapter. */
+    signal?: AbortSignal;
 }

 interface ActiveRun {
@ -20,16 +22,36 @@ interface ActiveRun {
    sessionId: string;
    agent: CodingAgent;
    cwd: string;
+    // Prompts currently streaming on this connection. Disposal is deferred while
+    // this is > 0 so we never tear down a connection mid-turn.
+    inflight: number;
+    // Pending grace-window teardown, cleared if the run is reused before it fires.
+    disposeTimer?: ReturnType<typeof setTimeout>;
 }

-// Drives ACP coding sessions, one live connection per chat run. Reuses a warm
-// connection for follow-up prompts in the same chat; resumes a persisted session
-// (via session/load) on the first prompt after an app restart.
+// How long a connection stays warm after its last turn ends before we tear it down.
+// A coding "turn" is one code_agent_run tool call; we keep the adapter briefly so
+// back-to-back calls within one copilot turn (edit -> test -> fix) and quick user
+// follow-ups reuse the warm connection instead of cold-starting. Set to 0 for strict
+// per-turn teardown. Context is never lost either way: the next turn resumes the
+// persisted session via session/load.
+const DISPOSE_GRACE_MS = 60_000;
+
+// On stop, how long to let the adapter cancel gracefully (ACP session/cancel) before
+// we force-kill it. The kill guarantees the turn unwinds even if the adapter ignores
+// cancel or is blocked — otherwise a hung prompt would lock the chat indefinitely.
+const CANCEL_GRACE_MS = 2_000;
+
+// Drives ACP coding sessions. A connection's lifetime is scoped to the agent turn
+// (one code_agent_run): it is torn down a short grace window after the turn ends, so
+// idle chats hold no adapter processes. Turns that land within the grace window reuse
+// the warm connection; anything colder (grace elapsed, or after an app restart)
+// resumes the persisted session via session/load.
 export class CodeModeManager {
    private readonly runs = new Map<string, ActiveRun>();

    async runPrompt(args: RunPromptArgs): Promise<RunPromptResult> {
-        const { runId, agent, cwd, prompt, policy, ask, onEvent } = args;
+        const { runId, agent, cwd, prompt, policy, ask, onEvent, signal } = args;

        const broker = new PermissionBroker({
            policy,
@ -38,22 +60,82 @@ export class CodeModeManager {
        });

        const run = await this.ensureRun(runId, agent, cwd, broker, onEvent);
-        const res = await run.client.prompt(run.sessionId, prompt);
-        return { stopReason: res.stopReason, sessionId: run.sessionId };
-    }
+        run.inflight++;

-    async cancel(runId: string): Promise<void> {
-        const run = this.runs.get(runId);
-        if (run) await run.client.cancel(run.sessionId);
+        let graceTimer: ReturnType<typeof setTimeout> | undefined;
+        let onAbort: (() => void) | undefined;
+        try {
+            const promptP = run.client.prompt(run.sessionId, prompt);
+            // We may stop awaiting this prompt below (force-kill on stop rejects it);
+            // attach a no-op catch so the orphaned rejection isn't flagged.
+            promptP.catch(() => {});
+
+            // Stop handling: on abort, ask the adapter to cancel; if it hasn't unwound
+            // within the grace, force-kill it and resolve as cancelled. This guarantees
+            // the turn ends even if the adapter ignores cancel or is wedged — a hung
+            // prompt would otherwise lock the chat (no run-stopped, composer disabled).
+            const cancelledP = new Promise<{ stopReason: string }>((resolve) => {
+                if (!signal) return;
+                onAbort = () => {
+                    run.client.cancel(run.sessionId).catch(() => {});
+                    graceTimer = setTimeout(() => {
+                        this.dispose(runId);
+                        resolve({ stopReason: 'cancelled' });
+                    }, CANCEL_GRACE_MS);
+                    graceTimer.unref?.();
+                };
+                if (signal.aborted) onAbort();
+                else signal.addEventListener('abort', onAbort, { once: true });
+            });
+
+            const res = await Promise.race([promptP, cancelledP]);
+            return { stopReason: res.stopReason, sessionId: run.sessionId };
+        } catch (e) {
+            // A kill-induced "connection closed" during a stop is an expected cancel.
+            if (signal?.aborted) return { stopReason: 'cancelled', sessionId: run.sessionId };
+            throw e;
+        } finally {
+            if (signal && onAbort) signal.removeEventListener('abort', onAbort);
+            if (graceTimer) clearTimeout(graceTimer);
+            run.inflight--;
+            this.scheduleDispose(runId);
+        }
    }

    dispose(runId: string): void {
        const run = this.runs.get(runId);
        if (!run) return;
+        this.cancelDispose(run);
        run.client.dispose();
        this.runs.delete(runId);
    }

+    // Tear down the connection a grace window after its last turn ends. Skipped while a
+    // prompt is still streaming, and re-armed when each turn ends so the window measures
+    // idle-since-last-activity. With grace 0 we dispose immediately (strict per-turn).
+    private scheduleDispose(runId: string): void {
+        const run = this.runs.get(runId);
+        if (!run || run.inflight > 0) return;
+        this.cancelDispose(run);
+        if (DISPOSE_GRACE_MS <= 0) {
+            this.dispose(runId);
+            return;
+        }
+        run.disposeTimer = setTimeout(() => {
+            const r = this.runs.get(runId);
+            if (r && r.inflight === 0) this.dispose(runId);
+        }, DISPOSE_GRACE_MS);
+        // A pending teardown timer must not keep the process alive at quit.
+        run.disposeTimer.unref?.();
+    }
+
+    private cancelDispose(run: ActiveRun): void {
+        if (run.disposeTimer) {
+            clearTimeout(run.disposeTimer);
+            run.disposeTimer = undefined;
+        }
+    }
+
    disposeAll(): void {
        for (const runId of [...this.runs.keys()]) this.dispose(runId);
    }
@ -69,6 +151,7 @@ export class CodeModeManager {
    ): Promise<ActiveRun> {
        const existing = this.runs.get(runId);
        if (existing && existing.agent === agent && existing.cwd === cwd) {
+            this.cancelDispose(existing); // reused before its grace window elapsed
            existing.client.setHandlers(broker, onEvent);
            return existing;
        }
@ -78,7 +161,7 @@ export class CodeModeManager {
        await client.start();

        const sessionId = await this.openSession(runId, agent, cwd, client);
-        const run: ActiveRun = { client, sessionId, agent, cwd };
+        const run: ActiveRun = { client, sessionId, agent, cwd, inflight: 0 };
        this.runs.set(runId, run);
        return run;
    }
--- a/apps/x/packages/core/src/di/container.ts
+++ b/apps/x/packages/core/src/di/container.ts
@ -46,8 +46,9 @@ container.register({
    agentScheduleStateRepo: asClass<IAgentScheduleStateRepo>(FSAgentScheduleStateRepo).singleton(),
    slackConfigRepo: asClass<ISlackConfigRepo>(FSSlackConfigRepo).singleton(),

-    // ACP code-mode engine: the manager holds live agent connections for the app
-    // lifetime (warm sessions across messages); the registry brokers mid-run approvals.
+    // ACP code-mode engine: the manager holds a live agent connection per chat only
+    // around an active turn (torn down after a short idle grace; resumed via
+    // session/load); the registry brokers mid-run approvals.
    codeModeManager: asClass(CodeModeManager).singleton(),
    codePermissionRegistry: asClass(CodePermissionRegistry).singleton(),
 });