From dda73d0daae76c3d5147546e9c8f87248239ca9d Mon Sep 17 00:00:00 2001 From: Gagancreates Date: Thu, 4 Jun 2026 01:46:28 +0530 Subject: [PATCH] =?UTF-8?q?fix(code-mode):=20harden=20ACP=20engine=20?= =?UTF-8?q?=E2=80=94=20turn-scoped=20connections,=20chip-authoritative=20a?= =?UTF-8?q?gent,=20reliable=20stop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three robustness fixes that co-modify manager.runPrompt and the code_agent_run tool, so they land together: - Lifecycle: scope each ACP adapter connection to the agent turn. Dispose it a short grace (60s) after the turn ends instead of holding it for the app's life; the next turn resumes via session/load (both agents support it). Wire disposeAll() on app quit (was dead code). Fixes the unbounded per-chat leak of booted agent processes. - Agent selection: make the composer chip the source of truth. Thread codeMode into ToolContext; code_agent_run uses it instead of the model's guessed `agent` arg, which anchored on the thread's earlier agent and ignored a chip change. Prompts updated to match; the run is labelled by the agent that actually ran. - Stop/abort: guarantee a stopped turn unwinds. On abort the manager sends ACP session/cancel, then force-kills the adapter after a 2s grace and resolves the turn as cancelled — a wedged adapter can no longer hang the run and lock the chat. code_agent_run returns a clean cancelled result. --- apps/x/apps/main/src/main.ts | 9 +- .../renderer/src/components/coding-run.tsx | 7 +- apps/x/packages/core/src/agents/runtime.ts | 15 ++- .../skills/code-with-agents/skill.ts | 6 +- .../core/src/application/lib/builtin-tools.ts | 33 ++++-- .../core/src/application/lib/exec-tool.ts | 4 + .../core/src/code-mode/acp/manager.ts | 105 ++++++++++++++++-- apps/x/packages/core/src/di/container.ts | 5 +- 8 files changed, 150 insertions(+), 34 deletions(-) diff --git a/apps/x/apps/main/src/main.ts b/apps/x/apps/main/src/main.ts index 81d43553..bd407f5b 100644 --- a/apps/x/apps/main/src/main.ts +++ b/apps/x/apps/main/src/main.ts @@ -40,7 +40,8 @@ import started from "electron-squirrel-startup"; import { execSync, exec, execFileSync } from "node:child_process"; import { promisify } from "node:util"; import { init as initChromeSync } from "@x/core/dist/knowledge/chrome-extension/server/server.js"; -import { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js"; +import container, { registerBrowserControlService, registerNotificationService } from "@x/core/dist/di/container.js"; +import type { CodeModeManager } from "@x/core/dist/code-mode/acp/manager.js"; import { browserViewManager, BROWSER_PARTITION } from "./browser/view.js"; import { setupBrowserEventForwarding } from "./browser/ipc.js"; import { ElectronBrowserControlService } from "./browser/control-service.js"; @@ -416,6 +417,12 @@ app.on("before-quit", () => { stopWorkspaceWatcher(); stopRunsWatcher(); stopServicesWatcher(); + // Tear down any live ACP coding-agent adapter processes so they don't outlive the app. + try { + container.resolve('codeModeManager').disposeAll(); + } catch { + // nothing live to dispose + } shutdownLocalSites().catch((error) => { console.error('[LocalSites] Failed to shut down cleanly:', error); }); diff --git a/apps/x/apps/renderer/src/components/coding-run.tsx b/apps/x/apps/renderer/src/components/coding-run.tsx index ae6fe055..4d5dd33b 100644 --- a/apps/x/apps/renderer/src/components/coding-run.tsx +++ b/apps/x/apps/renderer/src/components/coding-run.tsx @@ -230,7 +230,12 @@ export function CodingRunBlock({ onOpenChange: (open: boolean) => void onPermissionDecision: (decision: PermissionDecision) => void }) { - const agent = (item.input as { agent?: string } | undefined)?.agent + // Prefer the agent the backend actually ran (the chip) once the run returns; fall + // back to the requested input agent while it's still in flight. Never trust only the + // model's input — it can pass a stale agent the backend overrode with the chip. + const agent = + (item.result as { agent?: string } | undefined)?.agent ?? + (item.input as { agent?: string } | undefined)?.agent const title = AGENT_LABEL[agent ?? ''] ?? 'Coding agent' return ( <> diff --git a/apps/x/packages/core/src/agents/runtime.ts b/apps/x/packages/core/src/agents/runtime.ts index 9fbef908..5265e230 100644 --- a/apps/x/packages/core/src/agents/runtime.ts +++ b/apps/x/packages/core/src/agents/runtime.ts @@ -1255,6 +1255,7 @@ export async function* streamAgent({ signal, abortRegistry, publish: (event) => bus.publish(event), + codeMode, }); } } catch (error) { @@ -1402,17 +1403,15 @@ Do not announce the work directory unless it's relevant. Just use it.`; if (codeMode) { loopLogger.log('code mode enabled, injecting coding-agent context', codeMode); const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex'; - const otherAgent = codeMode === 'claude' ? 'codex' : 'claude'; - const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code'; - instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay} -The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn. + instructionsWithDateTime += `\n\n# Code Mode (Active) — Agent: ${agentDisplay} +The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). For EVERY coding task this turn, use **${agentDisplay}**, and narrate that agent ("Using ${agentDisplay} to …"). -**The user can override the agent at any time, two ways:** -1. By toggling the chip in the composer (preferred). -2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that — use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness. +The chip is the single source of truth for which agent runs: +- Do NOT carry over a different agent from earlier in this thread — even if a previous run used the other agent, use **${agentDisplay}** now. +- Do NOT switch agents based on an in-chat text request ("use codex", "switch to claude"). The agent only changes when the user toggles the chip; if they ask in chat, tell them to toggle the chip. **How to run coding work — call the \`code_agent_run\` tool** with: -- \`agent\`: \`${codeMode}\` by default (or the in-chat override above). +- \`agent\`: \`${codeMode}\` (always — match the chip). - \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once). - \`prompt\`: a clear, self-contained coding instruction. diff --git a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts index 8180a762..d9f15dd8 100644 --- a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts +++ b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts @@ -48,9 +48,7 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent? 2. The path from a "# User Work Directory" block in your context. 3. Ask once in plain text: "Which folder should I work in?" -**Pick the agent** (\`claude\` or \`codex\`), in priority order: -- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it. -- The agent from the "# Code Mode (Active)" block / Step 1 choice. +**Pick the agent** (\`claude\` or \`codex\`): use the agent from the "# Code Mode (Active)" block (the composer chip) / the Step 1 choice. The chip is authoritative — do NOT carry over a different agent from earlier in this thread, and do NOT switch on an in-chat text request ("use codex"); tell the user to toggle the chip instead. **State your intent in one line, then call the tool immediately — do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like: @@ -71,7 +69,7 @@ code_agent_run({ - Mention constraints (language, framework, style). - Expand short user requests into clear, actionable instructions. -**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context. +**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` and the chip's current agent. The session resumes automatically — do NOT start over or re-explain prior context. --- diff --git a/apps/x/packages/core/src/application/lib/builtin-tools.ts b/apps/x/packages/core/src/application/lib/builtin-tools.ts index 8796cb65..08e8334f 100644 --- a/apps/x/packages/core/src/application/lib/builtin-tools.ts +++ b/apps/x/packages/core/src/application/lib/builtin-tools.ts @@ -811,7 +811,7 @@ export const BuiltinTools: z.infer = { code_agent_run: { description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.', inputSchema: z.object({ - agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'), + agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Set this to the active code-mode chip agent. Note: when the chip is set, the backend uses the chip agent regardless of this value — this only takes effect in the ask-human flow where no chip is set.'), cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'), prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'), }), @@ -819,6 +819,11 @@ export const BuiltinTools: z.infer = { if (!ctx) { return { success: false, message: 'code_agent_run requires run context (runId / streaming).' }; } + // The composer chip is the source of truth for the agent. The model's `agent` + // argument is only a fallback for the ask-human flow (code mode not active, no + // chip set) — otherwise it can anchor on the thread's earlier agent and ignore a + // chip change. Honor the chip so switching it deterministically switches agents. + const effectiveAgent = ctx.codeMode ?? agent; const manager = container.resolve('codeModeManager'); const registry = container.resolve('codePermissionRegistry'); @@ -831,11 +836,11 @@ export const BuiltinTools: z.infer = { // fall back to 'ask' } - // Cancel the coding turn (and unblock any pending approval) if the run is stopped. - const onAbort = () => { - manager.cancel(ctx.runId).catch(() => {}); - registry.cancelRun(ctx.runId); - }; + // On stop, unblock any pending approval card so the broker stops waiting for + // an answer that will never come. The ACP cancel + force-kill backstop that + // actually ends the turn is handled inside manager.runPrompt via the signal + // we pass below. + const onAbort = () => registry.cancelRun(ctx.runId); if (ctx.signal.aborted) onAbort(); else ctx.signal.addEventListener('abort', onAbort, { once: true }); @@ -844,10 +849,11 @@ export const BuiltinTools: z.infer = { try { const result = await manager.runPrompt({ runId: ctx.runId, - agent, + agent: effectiveAgent, cwd, prompt, policy, + signal: ctx.signal, onEvent: (event) => { if (event.type === 'message' && event.role === 'agent') finalText += event.text; if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f); @@ -873,10 +879,23 @@ export const BuiltinTools: z.infer = { return { success: result.stopReason === 'end_turn', stopReason: result.stopReason, + // The agent that actually ran (the chip), so the UI can label the run + // authoritatively rather than trusting the model's `agent` argument. + agent: effectiveAgent, summary: finalText.trim(), changedFiles: [...changedFiles], }; } catch (error) { + // A stop mid-run isn't a failure — report it as a clean cancellation. + if (ctx.signal.aborted) { + return { + success: false, + stopReason: 'cancelled', + agent: effectiveAgent, + summary: finalText.trim(), + changedFiles: [...changedFiles], + }; + } return { success: false, message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`, diff --git a/apps/x/packages/core/src/application/lib/exec-tool.ts b/apps/x/packages/core/src/application/lib/exec-tool.ts index 92e87fa6..b34f6ed4 100644 --- a/apps/x/packages/core/src/application/lib/exec-tool.ts +++ b/apps/x/packages/core/src/application/lib/exec-tool.ts @@ -14,6 +14,10 @@ export interface ToolContext { signal: AbortSignal; abortRegistry: IAbortRegistry; publish: (event: z.infer) => Promise; + // The composer code-mode chip for the message that triggered this turn. When set, + // it is the authoritative coding agent — code_agent_run uses it rather than the + // agent the model guessed, so switching the chip deterministically switches agents. + codeMode?: 'claude' | 'codex' | null; } async function execMcpTool(agentTool: z.infer & { type: "mcp" }, input: Record): Promise { diff --git a/apps/x/packages/core/src/code-mode/acp/manager.ts b/apps/x/packages/core/src/code-mode/acp/manager.ts index 7e813ae4..04ccdebd 100644 --- a/apps/x/packages/core/src/code-mode/acp/manager.ts +++ b/apps/x/packages/core/src/code-mode/acp/manager.ts @@ -13,6 +13,8 @@ export interface RunPromptArgs { ask: (ask: PermissionAsk) => Promise; /** Stream sink for this prompt's run. */ onEvent: (event: CodeRunEvent) => void; + /** Aborts the turn on stop; the manager cancels then force-kills the adapter. */ + signal?: AbortSignal; } interface ActiveRun { @@ -20,16 +22,36 @@ interface ActiveRun { sessionId: string; agent: CodingAgent; cwd: string; + // Prompts currently streaming on this connection. Disposal is deferred while + // this is > 0 so we never tear down a connection mid-turn. + inflight: number; + // Pending grace-window teardown, cleared if the run is reused before it fires. + disposeTimer?: ReturnType; } -// Drives ACP coding sessions, one live connection per chat run. Reuses a warm -// connection for follow-up prompts in the same chat; resumes a persisted session -// (via session/load) on the first prompt after an app restart. +// How long a connection stays warm after its last turn ends before we tear it down. +// A coding "turn" is one code_agent_run tool call; we keep the adapter briefly so +// back-to-back calls within one copilot turn (edit -> test -> fix) and quick user +// follow-ups reuse the warm connection instead of cold-starting. Set to 0 for strict +// per-turn teardown. Context is never lost either way: the next turn resumes the +// persisted session via session/load. +const DISPOSE_GRACE_MS = 60_000; + +// On stop, how long to let the adapter cancel gracefully (ACP session/cancel) before +// we force-kill it. The kill guarantees the turn unwinds even if the adapter ignores +// cancel or is blocked — otherwise a hung prompt would lock the chat indefinitely. +const CANCEL_GRACE_MS = 2_000; + +// Drives ACP coding sessions. A connection's lifetime is scoped to the agent turn +// (one code_agent_run): it is torn down a short grace window after the turn ends, so +// idle chats hold no adapter processes. Turns that land within the grace window reuse +// the warm connection; anything colder (grace elapsed, or after an app restart) +// resumes the persisted session via session/load. export class CodeModeManager { private readonly runs = new Map(); async runPrompt(args: RunPromptArgs): Promise { - const { runId, agent, cwd, prompt, policy, ask, onEvent } = args; + const { runId, agent, cwd, prompt, policy, ask, onEvent, signal } = args; const broker = new PermissionBroker({ policy, @@ -38,22 +60,82 @@ export class CodeModeManager { }); const run = await this.ensureRun(runId, agent, cwd, broker, onEvent); - const res = await run.client.prompt(run.sessionId, prompt); - return { stopReason: res.stopReason, sessionId: run.sessionId }; - } + run.inflight++; - async cancel(runId: string): Promise { - const run = this.runs.get(runId); - if (run) await run.client.cancel(run.sessionId); + let graceTimer: ReturnType | undefined; + let onAbort: (() => void) | undefined; + try { + const promptP = run.client.prompt(run.sessionId, prompt); + // We may stop awaiting this prompt below (force-kill on stop rejects it); + // attach a no-op catch so the orphaned rejection isn't flagged. + promptP.catch(() => {}); + + // Stop handling: on abort, ask the adapter to cancel; if it hasn't unwound + // within the grace, force-kill it and resolve as cancelled. This guarantees + // the turn ends even if the adapter ignores cancel or is wedged — a hung + // prompt would otherwise lock the chat (no run-stopped, composer disabled). + const cancelledP = new Promise<{ stopReason: string }>((resolve) => { + if (!signal) return; + onAbort = () => { + run.client.cancel(run.sessionId).catch(() => {}); + graceTimer = setTimeout(() => { + this.dispose(runId); + resolve({ stopReason: 'cancelled' }); + }, CANCEL_GRACE_MS); + graceTimer.unref?.(); + }; + if (signal.aborted) onAbort(); + else signal.addEventListener('abort', onAbort, { once: true }); + }); + + const res = await Promise.race([promptP, cancelledP]); + return { stopReason: res.stopReason, sessionId: run.sessionId }; + } catch (e) { + // A kill-induced "connection closed" during a stop is an expected cancel. + if (signal?.aborted) return { stopReason: 'cancelled', sessionId: run.sessionId }; + throw e; + } finally { + if (signal && onAbort) signal.removeEventListener('abort', onAbort); + if (graceTimer) clearTimeout(graceTimer); + run.inflight--; + this.scheduleDispose(runId); + } } dispose(runId: string): void { const run = this.runs.get(runId); if (!run) return; + this.cancelDispose(run); run.client.dispose(); this.runs.delete(runId); } + // Tear down the connection a grace window after its last turn ends. Skipped while a + // prompt is still streaming, and re-armed when each turn ends so the window measures + // idle-since-last-activity. With grace 0 we dispose immediately (strict per-turn). + private scheduleDispose(runId: string): void { + const run = this.runs.get(runId); + if (!run || run.inflight > 0) return; + this.cancelDispose(run); + if (DISPOSE_GRACE_MS <= 0) { + this.dispose(runId); + return; + } + run.disposeTimer = setTimeout(() => { + const r = this.runs.get(runId); + if (r && r.inflight === 0) this.dispose(runId); + }, DISPOSE_GRACE_MS); + // A pending teardown timer must not keep the process alive at quit. + run.disposeTimer.unref?.(); + } + + private cancelDispose(run: ActiveRun): void { + if (run.disposeTimer) { + clearTimeout(run.disposeTimer); + run.disposeTimer = undefined; + } + } + disposeAll(): void { for (const runId of [...this.runs.keys()]) this.dispose(runId); } @@ -69,6 +151,7 @@ export class CodeModeManager { ): Promise { const existing = this.runs.get(runId); if (existing && existing.agent === agent && existing.cwd === cwd) { + this.cancelDispose(existing); // reused before its grace window elapsed existing.client.setHandlers(broker, onEvent); return existing; } @@ -78,7 +161,7 @@ export class CodeModeManager { await client.start(); const sessionId = await this.openSession(runId, agent, cwd, client); - const run: ActiveRun = { client, sessionId, agent, cwd }; + const run: ActiveRun = { client, sessionId, agent, cwd, inflight: 0 }; this.runs.set(runId, run); return run; } diff --git a/apps/x/packages/core/src/di/container.ts b/apps/x/packages/core/src/di/container.ts index 18622870..d7b17ce7 100644 --- a/apps/x/packages/core/src/di/container.ts +++ b/apps/x/packages/core/src/di/container.ts @@ -46,8 +46,9 @@ container.register({ agentScheduleStateRepo: asClass(FSAgentScheduleStateRepo).singleton(), slackConfigRepo: asClass(FSSlackConfigRepo).singleton(), - // ACP code-mode engine: the manager holds live agent connections for the app - // lifetime (warm sessions across messages); the registry brokers mid-run approvals. + // ACP code-mode engine: the manager holds a live agent connection per chat only + // around an active turn (torn down after a short idle grace; resumed via + // session/load); the registry brokers mid-run approvals. codeModeManager: asClass(CodeModeManager).singleton(), codePermissionRegistry: asClass(CodePermissionRegistry).singleton(), });