From dda73d0daae76c3d5147546e9c8f87248239ca9d Mon Sep 17 00:00:00 2001
From: Gagancreates <gaganp000999@gmail.com>
Date: Thu, 4 Jun 2026 01:46:28 +0530
Subject: [PATCH] =?UTF-8?q?fix(code-mode):=20harden=20ACP=20engine=20?=
 =?UTF-8?q?=E2=80=94=20turn-scoped=20connections,=20chip-authoritative=20a?=
 =?UTF-8?q?gent,=20reliable=20stop?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three robustness fixes that co-modify manager.runPrompt and the code_agent_run
tool, so they land together:

- Lifecycle: scope each ACP adapter connection to the agent turn. Dispose it a
  short grace (60s) after the turn ends instead of holding it for the app's life;
  the next turn resumes via session/load (both agents support it). Wire
  disposeAll() on app quit (was dead code). Fixes the unbounded per-chat leak of
  booted agent processes.

- Agent selection: make the composer chip the source of truth. Thread codeMode
  into ToolContext; code_agent_run uses it instead of the model's guessed `agent`
  arg, which anchored on the thread's earlier agent and ignored a chip change.
  Prompts updated to match; the run is labelled by the agent that actually ran.

- Stop/abort: guarantee a stopped turn unwinds. On abort the manager sends ACP
  session/cancel, then force-kills the adapter after a 2s grace and resolves the
  turn as cancelled — a wedged adapter can no longer hang the run and lock the
  chat. code_agent_run returns a clean cancelled result.
---
 apps/x/apps/main/src/main.ts                  |   9 +-
 .../renderer/src/components/coding-run.tsx    |   7 +-
 apps/x/packages/core/src/agents/runtime.ts    |  15 ++-
 .../skills/code-with-agents/skill.ts          |   6 +-
 .../core/src/application/lib/builtin-tools.ts |  33 ++++--
 .../core/src/application/lib/exec-tool.ts     |   4 +
 .../core/src/code-mode/acp/manager.ts         | 105 ++++++++++++++++--
 apps/x/packages/core/src/di/container.ts      |   5 +-
 8 files changed, 150 insertions(+), 34 deletions(-)
diff --git a/apps/x/apps/main/src/main.ts b/apps/x/apps/main/src/main.ts
index 81d43553..bd407f5b 100644
--- a/apps/x/apps/main/src/main.ts
+++ b/apps/x/apps/main/src/main.ts
@@ -40,7 +40,8 @@ import started from "electron-squirrel-startup";
 import { execSync, exec, execFileSync } from "node:child_process";
 import { promisify } from "node:util";
 import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js";
-import { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
+import container, { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js";
+import type { CodeModeManager } from "@x/core/dist/code-mode/acp/manager.js";
 import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js";
 import { setupBrowserEventForwarding } from "./browser/ipc.js";
 import { ElectronBrowserControlService } from "./browser/control-service.js";
@@ -416,6 +417,12 @@ app.on("before-quit", () => {
   stopWorkspaceWatcher();
   stopRunsWatcher();
   stopServicesWatcher();
+  // Tear down any live ACP coding-agent adapter processes so they don't outlive the app.
+  try {
+    container.resolve<CodeModeManager>('codeModeManager').disposeAll();
+  } catch {
+    // nothing live to dispose
+  }
   shutdownLocalSites().catch((error) => {
     console.error('[LocalSites] Failed to shut down cleanly:', error);
   });
diff --git a/apps/x/apps/renderer/src/components/coding-run.tsx b/apps/x/apps/renderer/src/components/coding-run.tsx
index ae6fe055..4d5dd33b 100644
--- a/apps/x/apps/renderer/src/components/coding-run.tsx
+++ b/apps/x/apps/renderer/src/components/coding-run.tsx
@@ -230,7 +230,12 @@ export function CodingRunBlock({
   onOpenChange: (open: boolean) => void
   onPermissionDecision: (decision: PermissionDecision) => void
 }) {
-  const agent = (item.input as { agent?: string } | undefined)?.agent
+  // Prefer the agent the backend actually ran (the chip) once the run returns; fall
+  // back to the requested input agent while it's still in flight. Never trust only the
+  // model's input — it can pass a stale agent the backend overrode with the chip.
+  const agent =
+    (item.result as { agent?: string } | undefined)?.agent ??
+    (item.input as { agent?: string } | undefined)?.agent
   const title = AGENT_LABEL[agent ?? ''] ?? 'Coding agent'
   return (
     <>
diff --git a/apps/x/packages/core/src/agents/runtime.ts b/apps/x/packages/core/src/agents/runtime.ts
index 9fbef908..5265e230 100644
--- a/apps/x/packages/core/src/agents/runtime.ts
+++ b/apps/x/packages/core/src/agents/runtime.ts
@@ -1255,6 +1255,7 @@ export async function* streamAgent({
                     signal,
                     abortRegistry,
                     publish: (event) => bus.publish(event),
+                    codeMode,
                 });
             }
             } catch (error) {
@@ -1402,17 +1403,15 @@ Do not announce the work directory unless it's relevant. Just use it.`;
         if (codeMode) {
             loopLogger.log('code mode enabled, injecting coding-agent context', codeMode);
             const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex';
-            const otherAgent = codeMode === 'claude' ? 'codex' : 'claude';
-            const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code';
-            instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay}
-The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn.
+            instructionsWithDateTime += `\n\n# Code Mode (Active) — Agent: ${agentDisplay}
+The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …").
 
-**The user can override the agent at any time, two ways:**
-1. By toggling the chip in the composer (preferred).
-2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that — use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness.
+The chip is the single source of truth for which agent runs:
+- Do NOT carry over a different agent from earlier in this thread — even if a previous run used the other agent, use **${agentDisplay}** now.
+- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip.
 
 **How to run coding work — call the \`code_agent_run\` tool** with:
-- \`agent\`: \`${codeMode}\` by default (or the in-chat override above).
+- \`agent\`: \`${codeMode}\` (always — match the chip).
 - \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once).
 - \`prompt\`: a clear, self-contained coding instruction.
 
diff --git a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts
index 8180a762..d9f15dd8 100644
--- a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts
+++ b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts
@@ -48,9 +48,7 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent?
 2. The path from a "# User Work Directory" block in your context.
 3. Ask once in plain text: "Which folder should I work in?"
 
-**Pick the agent** (\`claude\` or \`codex\`), in priority order:
-- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it.
-- The agent from the "# Code Mode (Active)" block / Step 1 choice.
+**Pick the agent** (\`claude\` or \`codex\`): use the agent from the "# Code Mode (Active)" block (the composer chip) / the Step 1 choice. The chip is authoritative — do NOT carry over a different agent from earlier in this thread, and do NOT switch on an in-chat text request ("use codex"); tell the user to toggle the chip instead.
 
 **State your intent in one line, then call the tool immediately — do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like:
 
@@ -71,7 +69,7 @@ code_agent_run({
 - Mention constraints (language, framework, style).
 - Expand short user requests into clear, actionable instructions.
 
-**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context.
+**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` and the chip's current agent. The session resumes automatically — do NOT start over or re-explain prior context.
 
 ---
 
diff --git a/apps/x/packages/core/src/application/lib/builtin-tools.ts b/apps/x/packages/core/src/application/lib/builtin-tools.ts
index 8796cb65..08e8334f 100644
--- a/apps/x/packages/core/src/application/lib/builtin-tools.ts
+++ b/apps/x/packages/core/src/application/lib/builtin-tools.ts
@@ -811,7 +811,7 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
     code_agent_run: {
         description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.',
         inputSchema: z.object({
-            agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'),
+            agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Set this to the active code-mode chip agent. Note: when the chip is set, the backend uses the chip agent regardless of this value — this only takes effect in the ask-human flow where no chip is set.'),
             cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'),
             prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'),
         }),
@@ -819,6 +819,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
             if (!ctx) {
                 return { success: false, message: 'code_agent_run requires run context (runId / streaming).' };
             }
+            // The composer chip is the source of truth for the agent. The model's `agent`
+            // argument is only a fallback for the ask-human flow (code mode not active, no
+            // chip set) — otherwise it can anchor on the thread's earlier agent and ignore a
+            // chip change. Honor the chip so switching it deterministically switches agents.
+            const effectiveAgent = ctx.codeMode ?? agent;
             const manager = container.resolve<CodeModeManager>('codeModeManager');
             const registry = container.resolve<CodePermissionRegistry>('codePermissionRegistry');
 
@@ -831,11 +836,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
                 // fall back to 'ask'
             }
 
-            // Cancel the coding turn (and unblock any pending approval) if the run is stopped.
-            const onAbort = () => {
-                manager.cancel(ctx.runId).catch(() => {});
-                registry.cancelRun(ctx.runId);
-            };
+            // On stop, unblock any pending approval card so the broker stops waiting for
+            // an answer that will never come. The ACP cancel + force-kill backstop that
+            // actually ends the turn is handled inside manager.runPrompt via the signal
+            // we pass below.
+            const onAbort = () => registry.cancelRun(ctx.runId);
             if (ctx.signal.aborted) onAbort();
             else ctx.signal.addEventListener('abort', onAbort, { once: true });
 
@@ -844,10 +849,11 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
             try {
                 const result = await manager.runPrompt({
                     runId: ctx.runId,
-                    agent,
+                    agent: effectiveAgent,
                     cwd,
                     prompt,
                     policy,
+                    signal: ctx.signal,
                     onEvent: (event) => {
                         if (event.type === 'message' && event.role === 'agent') finalText += event.text;
                         if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f);
@@ -873,10 +879,23 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
                 return {
                     success: result.stopReason === 'end_turn',
                     stopReason: result.stopReason,
+                    // The agent that actually ran (the chip), so the UI can label the run
+                    // authoritatively rather than trusting the model's `agent` argument.
+                    agent: effectiveAgent,
                     summary: finalText.trim(),
                     changedFiles: [...changedFiles],
                 };
             } catch (error) {
+                // A stop mid-run isn't a failure — report it as a clean cancellation.
+                if (ctx.signal.aborted) {
+                    return {
+                        success: false,
+                        stopReason: 'cancelled',
+                        agent: effectiveAgent,
+                        summary: finalText.trim(),
+                        changedFiles: [...changedFiles],
+                    };
+                }
                 return {
                     success: false,
                     message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`,
diff --git a/apps/x/packages/core/src/application/lib/exec-tool.ts b/apps/x/packages/core/src/application/lib/exec-tool.ts
index 92e87fa6..b34f6ed4 100644
--- a/apps/x/packages/core/src/application/lib/exec-tool.ts
+++ b/apps/x/packages/core/src/application/lib/exec-tool.ts
@@ -14,6 +14,10 @@ export interface ToolContext {
     signal: AbortSignal;
     abortRegistry: IAbortRegistry;
     publish: (event: z.infer<typeof RunEvent>) => Promise<void>;
+    // The composer code-mode chip for the message that triggered this turn. When set,
+    // it is the authoritative coding agent — code_agent_run uses it rather than the
+    // agent the model guessed, so switching the chip deterministically switches agents.
+    codeMode?: 'claude' | 'codex' | null;
 }
 
 async function execMcpTool(agentTool: z.infer<typeof ToolAttachment> & { type: "mcp" }, input: Record<string, unknown>): Promise<unknown> {
diff --git a/apps/x/packages/core/src/code-mode/acp/manager.ts b/apps/x/packages/core/src/code-mode/acp/manager.ts
index 7e813ae4..04ccdebd 100644
--- a/apps/x/packages/core/src/code-mode/acp/manager.ts
+++ b/apps/x/packages/core/src/code-mode/acp/manager.ts
@@ -13,6 +13,8 @@ export interface RunPromptArgs {
     ask: (ask: PermissionAsk) => Promise<PermissionDecision>;
     /** Stream sink for this prompt's run. */
     onEvent: (event: CodeRunEvent) => void;
+    /** Aborts the turn on stop; the manager cancels then force-kills the adapter. */
+    signal?: AbortSignal;
 }
 
 interface ActiveRun {
@@ -20,16 +22,36 @@ interface ActiveRun {
     sessionId: string;
     agent: CodingAgent;
     cwd: string;
+    // Prompts currently streaming on this connection. Disposal is deferred while
+    // this is > 0 so we never tear down a connection mid-turn.
+    inflight: number;
+    // Pending grace-window teardown, cleared if the run is reused before it fires.
+    disposeTimer?: ReturnType<typeof setTimeout>;
 }
 
-// Drives ACP coding sessions, one live connection per chat run. Reuses a warm
-// connection for follow-up prompts in the same chat; resumes a persisted session
-// (via session/load) on the first prompt after an app restart.
+// How long a connection stays warm after its last turn ends before we tear it down.
+// A coding "turn" is one code_agent_run tool call; we keep the adapter briefly so
+// back-to-back calls within one copilot turn (edit -> test -> fix) and quick user
+// follow-ups reuse the warm connection instead of cold-starting. Set to 0 for strict
+// per-turn teardown. Context is never lost either way: the next turn resumes the
+// persisted session via session/load.
+const DISPOSE_GRACE_MS = 60_000;
+
+// On stop, how long to let the adapter cancel gracefully (ACP session/cancel) before
+// we force-kill it. The kill guarantees the turn unwinds even if the adapter ignores
+// cancel or is blocked — otherwise a hung prompt would lock the chat indefinitely.
+const CANCEL_GRACE_MS = 2_000;
+
+// Drives ACP coding sessions. A connection's lifetime is scoped to the agent turn
+// (one code_agent_run): it is torn down a short grace window after the turn ends, so
+// idle chats hold no adapter processes. Turns that land within the grace window reuse
+// the warm connection; anything colder (grace elapsed, or after an app restart)
+// resumes the persisted session via session/load.
 export class CodeModeManager {
     private readonly runs = new Map<string, ActiveRun>();
 
     async runPrompt(args: RunPromptArgs): Promise<RunPromptResult> {
-        const { runId, agent, cwd, prompt, policy, ask, onEvent } = args;
+        const { runId, agent, cwd, prompt, policy, ask, onEvent, signal } = args;
 
         const broker = new PermissionBroker({
             policy,
@@ -38,22 +60,82 @@ export class CodeModeManager {
         });
 
         const run = await this.ensureRun(runId, agent, cwd, broker, onEvent);
-        const res = await run.client.prompt(run.sessionId, prompt);
-        return { stopReason: res.stopReason, sessionId: run.sessionId };
-    }
+        run.inflight++;
 
-    async cancel(runId: string): Promise<void> {
-        const run = this.runs.get(runId);
-        if (run) await run.client.cancel(run.sessionId);
+        let graceTimer: ReturnType<typeof setTimeout> | undefined;
+        let onAbort: (() => void) | undefined;
+        try {
+            const promptP = run.client.prompt(run.sessionId, prompt);
+            // We may stop awaiting this prompt below (force-kill on stop rejects it);
+            // attach a no-op catch so the orphaned rejection isn't flagged.
+            promptP.catch(() => {});
+
+            // Stop handling: on abort, ask the adapter to cancel; if it hasn't unwound
+            // within the grace, force-kill it and resolve as cancelled. This guarantees
+            // the turn ends even if the adapter ignores cancel or is wedged — a hung
+            // prompt would otherwise lock the chat (no run-stopped, composer disabled).
+            const cancelledP = new Promise<{ stopReason: string }>((resolve) => {
+                if (!signal) return;
+                onAbort = () => {
+                    run.client.cancel(run.sessionId).catch(() => {});
+                    graceTimer = setTimeout(() => {
+                        this.dispose(runId);
+                        resolve({ stopReason: 'cancelled' });
+                    }, CANCEL_GRACE_MS);
+                    graceTimer.unref?.();
+                };
+                if (signal.aborted) onAbort();
+                else signal.addEventListener('abort', onAbort, { once: true });
+            });
+
+            const res = await Promise.race([promptP, cancelledP]);
+            return { stopReason: res.stopReason, sessionId: run.sessionId };
+        } catch (e) {
+            // A kill-induced "connection closed" during a stop is an expected cancel.
+            if (signal?.aborted) return { stopReason: 'cancelled', sessionId: run.sessionId };
+            throw e;
+        } finally {
+            if (signal && onAbort) signal.removeEventListener('abort', onAbort);
+            if (graceTimer) clearTimeout(graceTimer);
+            run.inflight--;
+            this.scheduleDispose(runId);
+        }
     }
 
     dispose(runId: string): void {
         const run = this.runs.get(runId);
         if (!run) return;
+        this.cancelDispose(run);
         run.client.dispose();
         this.runs.delete(runId);
     }
 
+    // Tear down the connection a grace window after its last turn ends. Skipped while a
+    // prompt is still streaming, and re-armed when each turn ends so the window measures
+    // idle-since-last-activity. With grace 0 we dispose immediately (strict per-turn).
+    private scheduleDispose(runId: string): void {
+        const run = this.runs.get(runId);
+        if (!run || run.inflight > 0) return;
+        this.cancelDispose(run);
+        if (DISPOSE_GRACE_MS <= 0) {
+            this.dispose(runId);
+            return;
+        }
+        run.disposeTimer = setTimeout(() => {
+            const r = this.runs.get(runId);
+            if (r && r.inflight === 0) this.dispose(runId);
+        }, DISPOSE_GRACE_MS);
+        // A pending teardown timer must not keep the process alive at quit.
+        run.disposeTimer.unref?.();
+    }
+
+    private cancelDispose(run: ActiveRun): void {
+        if (run.disposeTimer) {
+            clearTimeout(run.disposeTimer);
+            run.disposeTimer = undefined;
+        }
+    }
+
     disposeAll(): void {
         for (const runId of [...this.runs.keys()]) this.dispose(runId);
     }
@@ -69,6 +151,7 @@ export class CodeModeManager {
     ): Promise<ActiveRun> {
         const existing = this.runs.get(runId);
         if (existing && existing.agent === agent && existing.cwd === cwd) {
+            this.cancelDispose(existing); // reused before its grace window elapsed
             existing.client.setHandlers(broker, onEvent);
             return existing;
         }
@@ -78,7 +161,7 @@ export class CodeModeManager {
         await client.start();
 
         const sessionId = await this.openSession(runId, agent, cwd, client);
-        const run: ActiveRun = { client, sessionId, agent, cwd };
+        const run: ActiveRun = { client, sessionId, agent, cwd, inflight: 0 };
         this.runs.set(runId, run);
         return run;
     }
diff --git a/apps/x/packages/core/src/di/container.ts b/apps/x/packages/core/src/di/container.ts
index 18622870..d7b17ce7 100644
--- a/apps/x/packages/core/src/di/container.ts
+++ b/apps/x/packages/core/src/di/container.ts
@@ -46,8 +46,9 @@ container.register({
     agentScheduleStateRepo: asClass<IAgentScheduleStateRepo>(FSAgentScheduleStateRepo).singleton(),
     slackConfigRepo: asClass<ISlackConfigRepo>(FSSlackConfigRepo).singleton(),
 
-    // ACP code-mode engine: the manager holds live agent connections for the app
-    // lifetime (warm sessions across messages); the registry brokers mid-run approvals.
+    // ACP code-mode engine: the manager holds a live agent connection per chat only
+    // around an active turn (torn down after a short idle grace; resumed via
+    // session/load); the registry brokers mid-run approvals.
     codeModeManager: asClass(CodeModeManager).singleton(),
     codePermissionRegistry: asClass(CodePermissionRegistry).singleton(),
 });