diff --git a/apps/x/apps/main/src/ipc.ts b/apps/x/apps/main/src/ipc.ts index ec2803aa..0b2111a5 100644 --- a/apps/x/apps/main/src/ipc.ts +++ b/apps/x/apps/main/src/ipc.ts @@ -32,6 +32,7 @@ import type { IModelConfigRepo } from '@x/core/dist/models/repo.js'; import type { IOAuthRepo } from '@x/core/dist/auth/repo.js'; import { IGranolaConfigRepo } from '@x/core/dist/knowledge/granola/repo.js'; import { ICodeModeConfigRepo } from '@x/core/dist/code-mode/repo.js'; +import { CodePermissionRegistry } from '@x/core/dist/code-mode/acp/permission-registry.js'; import { checkCodeModeAgentStatus } from '@x/core/dist/code-mode/status.js'; import { invalidateCopilotInstructionsCache } from '@x/core/dist/application/assistant/instructions.js'; import { triggerSync as triggerGranolaSync } from '@x/core/dist/knowledge/granola/sync.js'; @@ -536,6 +537,11 @@ export function setupIpcHandlers() { await runsCore.authorizePermission(args.runId, args.authorization); return { success: true }; }, + 'codeRun:resolvePermission': async (_event, args) => { + const registry = container.resolve('codePermissionRegistry'); + registry.resolve(args.requestId, args.decision); + return { success: true }; + }, 'runs:provideHumanInput': async (_event, args) => { await runsCore.replyToHumanInputRequest(args.runId, args.reply); return { success: true }; diff --git a/apps/x/packages/core/src/agents/runtime.ts b/apps/x/packages/core/src/agents/runtime.ts index 84aa4092..9fbef908 100644 --- a/apps/x/packages/core/src/agents/runtime.ts +++ b/apps/x/packages/core/src/agents/runtime.ts @@ -1404,10 +1404,6 @@ Do not announce the work directory unless it's relevant. Just use it.`; const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex'; const otherAgent = codeMode === 'claude' ? 'codex' : 'claude'; const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code'; - // Deterministic, per-chat session name so the coding agent keeps - // context across the user's requests within this chat. Reusing the - // same -s resumes the session; the first call creates it. - const sessionName = `rowboat-${runId}`; instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay} The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn. @@ -1415,31 +1411,12 @@ The user has turned on **code mode** and the composer chip is set to **${agentDi 1. By toggling the chip in the composer (preferred). 2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that — use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness. -**Persistent session for this chat — session name: \`${sessionName}\`.** This chat uses one named agent session so the agent keeps context across your requests. The session must exist before it can be prompted (\`-s\` only resumes; it does not create). +**How to run coding work — call the \`code_agent_run\` tool** with: +- \`agent\`: \`${codeMode}\` by default (or the in-chat override above). +- \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once). +- \`prompt\`: a clear, self-contained coding instruction. -**1. First coding action in this chat — ensure the session exists:** - -\`\`\` -npx acpx@latest --approve-all --cwd sessions ensure --name ${sessionName} -\`\`\` - -(\`ensure\` creates the session if missing and reuses it if it already exists — safe to call when reopening this chat later.) - -**2. Then run the prompt:** - -\`\`\` -npx acpx@latest --approve-all --timeout 600 --cwd -s ${sessionName} "" -\`\`\` - -**3. Every follow-up coding request in this chat — reuse the same session (do NOT create again):** - -\`\`\` -npx acpx@latest --approve-all --timeout 600 --cwd -s ${sessionName} "" -\`\`\` - -Run these as **separate, sequential** \`executeCommand\` calls — issue the \`sessions ensure\` call first and WAIT for it to finish, then issue the prompt call. Do NOT fire both in the same turn / batch. - -Where \`\` is either \`claude\` or \`codex\` — pick based on (in priority order): an explicit in-chat override → the chip setting (\`${codeMode}\`). Use \`${sessionName}\` exactly — do NOT invent a different name, and do NOT use \`exec\` (it is one-shot and forgets). +The tool runs the agent on-device and streams its tool calls, file diffs, and plan into the chat; any action needing approval surfaces as an inline permission card, so you do NOT pre-confirm with an in-chat "reply yes". This chat keeps ONE persistent agent session, so follow-up coding requests automatically resume with full context — just call \`code_agent_run\` again. Do NOT shell out to \`acpx\` or \`executeCommand\` for coding, and do NOT fall back to your own file tools. If the user's message is clearly NOT a coding request (small talk, an unrelated question), answer directly without invoking the coding agent. Code mode signals readiness, not that every message must route through the agent.`; } diff --git a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts index d8e81a58..8180a762 100644 --- a/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts +++ b/apps/x/packages/core/src/application/assistant/skills/code-with-agents/skill.ts @@ -5,6 +5,8 @@ Use this skill whenever the user asks you to write code, build a project, create Coding agents operate on **arbitrary file paths** (including paths outside the Rowboat workspace root, like \`G:/4th sem/CN\` or \`~/projects/foo\`). Do NOT raise "outside workspace" concerns, and do NOT fall back to your own \`executeCommand\` (PowerShell / bash) or workspace file tools to do code work yourself. +All coding work runs through the **\`code_agent_run\`** tool. It launches the selected on-device coding agent (Claude Code / Codex), streams its tool calls, file diffs, and plan into the chat, and surfaces any action needing approval as an inline permission card. One persistent session is kept per chat, so follow-up requests resume with full context automatically. + --- ## STEP 1 — MANDATORY FIRST ACTION @@ -39,96 +41,54 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent? --- -## STEP 2 — Resolve workdir, confirm, execute +## STEP 2 — Resolve workdir, then run **Resolve the workdir** (in this priority order): 1. A path the user named in their original message (e.g. \`G:/4th sem/CN\`). 2. The path from a "# User Work Directory" block in your context. 3. Ask once in plain text: "Which folder should I work in?" -**State your intent in one line, then execute immediately — do NOT wait for a "yes".** The \`executeCommand\` call surfaces a permission card that is itself the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like: +**Pick the agent** (\`claude\` or \`codex\`), in priority order: +- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it. +- The agent from the "# Code Mode (Active)" block / Step 1 choice. + +**State your intent in one line, then call the tool immediately — do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like: > Using [Claude Code / Codex] to [task description] in \`[folder]\`. -…and then immediately make the \`executeCommand\` call in the same turn. - -**Execute** with the chosen agent using a **persistent named session** so follow-up coding requests in this chat resume the same agent and keep context. - -Pick \`\` (\`claude\` or \`codex\`) by, in priority order: -- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it. -- The agent chosen in Step 1 / the "# Code Mode (Active)" block. - -Pick \`\` — **stable for this whole chat**: -- If the "# Code Mode (Active)" block gives a session name (e.g. \`rowboat-\`), use that exact name. -- Otherwise pick one short, kebab-case name and **reuse it for every coding call this turn and in follow-ups** — never a new name each time. - -**\`-s\` resumes an existing session; it does NOT create one.** So ensure the session exists once at the start, then prompt: - -**1. First coding action in this chat — ensure the session exists:** +…and then immediately call: \`\`\` -npx acpx@latest --approve-all --cwd sessions ensure --name +code_agent_run({ + agent: "", + cwd: "", + prompt: "" +}) \`\`\` -(\`ensure\` creates the session if missing and reuses it if it already exists — so reopening this chat later just resumes the same session instead of erroring.) - -**2. Then run the prompt:** - -\`\`\` -npx acpx@latest --approve-all --timeout 600 --cwd -s "" -\`\`\` - -**3. Every follow-up coding request in this chat — reuse the same session (do NOT create again):** - -\`\`\` -npx acpx@latest --approve-all --timeout 600 --cwd -s "" -\`\`\` - -**Run steps 1 and 2 as separate, sequential \`executeCommand\` calls.** Issue the \`sessions ensure\` call FIRST, wait for it to finish, and only THEN issue the prompt call. Do NOT fire both in the same turn / batch — each must surface and complete its own permission + command block before the next begins. - -Do NOT use \`exec\` — it is one-shot and forgets everything. - -Concrete example: - -\`\`\` -# First coding message in the chat — ensure the session, then prompt: -npx acpx@latest --approve-all --cwd "G:\\Blogging\\myblog" claude sessions ensure --name diskspace-check -npx acpx@latest --approve-all --timeout 600 --cwd "G:\\Blogging\\myblog" claude -s diskspace-check "Check the system disk space and report total, used, and free space." - -# Follow-up in the same chat — reuse the session, no create: -npx acpx@latest --approve-all --timeout 600 --cwd "G:\\Blogging\\myblog" claude -s diskspace-check "Summarize what we did and the final findings." -\`\`\` - -### Critical: flag order - -\`--approve-all\`, \`--timeout\`, and \`--cwd\` are GLOBAL flags and MUST appear BEFORE the agent name. \`sessions ensure --name \` and \`-s \` come AFTER the agent name: - -- ✓ Correct: \`npx acpx@latest --approve-all --timeout 600 --cwd -s ""\` -- ✗ Wrong: \`npx acpx@latest --approve-all -s "..."\` (will fail) - -### Writing good prompts for the agent - +**Writing good prompts for the agent:** - Be specific: file names, function signatures, expected behavior. - Mention constraints (language, framework, style). -- Expand short user requests into clear, actionable prompts. +- Expand short user requests into clear, actionable instructions. + +**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context. --- ## STEP 3 — Report results -After the command finishes: -- Pass through the coding agent's summary as-is. Do not rewrite. +After \`code_agent_run\` returns: +- Pass through the agent's \`summary\` as-is. Do not rewrite it. - Refer to file paths as plain text. Do NOT use \`\`\`file:path\`\`\` reference blocks. (This overrides the global "always wrap paths in filepath blocks" rule — for code-mode output, plain text.) -- Only add your own explanation if the command failed (non-zero exit): - - Exit code 5 — permissions were denied (shouldn't happen with \`--approve-all\`; flag it). - - Exit code 4 / "No acpx session found" — the \`-s \` session doesn't exist yet. Create it once with \`npx acpx@latest --approve-all --cwd sessions ensure --name \`, then retry the prompt. (\`-s\` only resumes; it never creates.) - - "command not found" / agent not installed, or an auth/sign-in error — the agent isn't set up. Tell the user to install or sign in to the agent via **Settings → Code Mode**, where Rowboat shows the install and sign-in status. +- Only add your own explanation if it failed: + - \`success: false\` with a message — surface the message. If it mentions the agent isn't installed or signed in, tell the user to install or sign in via **Settings → Code Mode**. + - \`stopReason: "cancelled"\` — the run was stopped; acknowledge briefly and ask if they want to continue. --- ## Once delegating: delegate fully -After Step 2 fires, delegate ALL related coding tasks for this turn to the coding agent — writing, editing, reading, debugging, exploring structure, running tests. You are the coordinator; the agent does the work. +After Step 2 fires, delegate ALL related coding tasks for this turn to \`code_agent_run\` — writing, editing, reading, debugging, exploring structure, running tests. You are the coordinator; the agent does the work. ## Prerequisites (informational) diff --git a/apps/x/packages/core/src/application/lib/builtin-tools.ts b/apps/x/packages/core/src/application/lib/builtin-tools.ts index 89554a10..a07781e6 100644 --- a/apps/x/packages/core/src/application/lib/builtin-tools.ts +++ b/apps/x/packages/core/src/application/lib/builtin-tools.ts @@ -16,6 +16,10 @@ import { CURATED_TOOLKITS, CURATED_TOOLKIT_SLUGS } from "@x/shared/dist/composio import { BrowserControlInputSchema, type BrowserControlInput } from "@x/shared/dist/browser-control.js"; import { BackgroundTaskSchema, TriggersSchema } from "@x/shared/dist/background-task.js"; import { resolveClaudeExeOnWindows } from "../../code-mode/acp/claude-exec.js"; +import type { CodeModeManager } from "../../code-mode/acp/manager.js"; +import type { CodePermissionRegistry } from "../../code-mode/acp/permission-registry.js"; +import { ICodeModeConfigRepo } from "../../code-mode/repo.js"; +import type { ApprovalPolicy } from "@x/shared/dist/code-mode.js"; // Inputs for the bg-task builtin tools. Reuse the canonical schema field // descriptions; only `triggers` gets a tighter contextual override (the @@ -816,6 +820,85 @@ export const BuiltinTools: z.infer = { }, }, + code_agent_run: { + description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.', + inputSchema: z.object({ + agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'), + cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'), + prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'), + }), + execute: async ({ agent, cwd, prompt }: { agent: 'claude' | 'codex', cwd: string, prompt: string }, ctx?: ToolContext) => { + if (!ctx) { + return { success: false, message: 'code_agent_run requires run context (runId / streaming).' }; + } + const manager = container.resolve('codeModeManager'); + const registry = container.resolve('codePermissionRegistry'); + + // Approval policy from settings; default to asking the user. + let policy: ApprovalPolicy = 'ask'; + try { + const cfg = await container.resolve('codeModeConfigRepo').getConfig(); + if (cfg.approvalPolicy) policy = cfg.approvalPolicy; + } catch { + // fall back to 'ask' + } + + // Cancel the coding turn (and unblock any pending approval) if the run is stopped. + const onAbort = () => { + manager.cancel(ctx.runId).catch(() => {}); + registry.cancelRun(ctx.runId); + }; + if (ctx.signal.aborted) onAbort(); + else ctx.signal.addEventListener('abort', onAbort, { once: true }); + + let finalText = ''; + const changedFiles = new Set(); + try { + const result = await manager.runPrompt({ + runId: ctx.runId, + agent, + cwd, + prompt, + policy, + onEvent: (event) => { + if (event.type === 'message' && event.role === 'agent') finalText += event.text; + if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f); + void ctx.publish({ + runId: ctx.runId, + type: 'code-run-event', + toolCallId: ctx.toolCallId, + event, + subflow: [], + }); + }, + ask: (permAsk) => registry.request(ctx.runId, (requestId) => { + void ctx.publish({ + runId: ctx.runId, + type: 'code-run-permission-request', + toolCallId: ctx.toolCallId, + requestId, + ask: permAsk, + subflow: [], + }); + }), + }); + return { + success: result.stopReason === 'end_turn', + stopReason: result.stopReason, + summary: finalText.trim(), + changedFiles: [...changedFiles], + }; + } catch (error) { + return { + success: false, + message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`, + }; + } finally { + ctx.signal.removeEventListener('abort', onAbort); + } + }, + }, + // ============================================================================ // Browser Skills (browser-use/browser-harness domain-skills cache) // ============================================================================ diff --git a/apps/x/packages/core/src/background-tasks/agent.ts b/apps/x/packages/core/src/background-tasks/agent.ts index 3f3a2d47..853c1ef0 100644 --- a/apps/x/packages/core/src/background-tasks/agent.ts +++ b/apps/x/packages/core/src/background-tasks/agent.ts @@ -71,7 +71,9 @@ The workspace lives at \`${WorkDir}\`. export function buildBackgroundTaskAgent(): z.infer { const tools: Record> = {}; for (const name of Object.keys(BuiltinTools)) { - if (name === 'executeCommand') continue; + // code_agent_run requires an interactive UI for permission approvals — skip it + // here (headless) so it can't hang on an approval no one can answer. + if (name === 'executeCommand' || name === 'code_agent_run') continue; tools[name] = { type: 'builtin', name }; } diff --git a/apps/x/packages/core/src/code-mode/acp/permission-registry.ts b/apps/x/packages/core/src/code-mode/acp/permission-registry.ts new file mode 100644 index 00000000..862f2de4 --- /dev/null +++ b/apps/x/packages/core/src/code-mode/acp/permission-registry.ts @@ -0,0 +1,43 @@ +import type { PermissionDecision } from './types.js'; + +interface Pending { + runId: string; + resolve: (decision: PermissionDecision) => void; +} + +// Holds in-flight mid-run permission asks. The agent (via the broker) calls +// request() which BLOCKS the coding turn until the user answers; the renderer's +// answer arrives over IPC and calls resolve(). This is separate from the LLM +// tool-loop's pre-call permission gate, which can't model a mid-execution wait. +export class CodePermissionRegistry { + private readonly pending = new Map(); + private counter = 0; + + // Register a pending ask, hand the generated requestId to `emit` (so the caller + // can publish the UI event), and resolve once the user answers. + request(runId: string, emit: (requestId: string) => void): Promise { + const requestId = `cpr-${runId}-${++this.counter}`; + return new Promise((resolve) => { + this.pending.set(requestId, { runId, resolve }); + emit(requestId); + }); + } + + // Called from the IPC handler when the user answers a card. + resolve(requestId: string, decision: PermissionDecision): void { + const entry = this.pending.get(requestId); + if (!entry) return; + this.pending.delete(requestId); + entry.resolve(decision); + } + + // On run stop/cancel: reject anything still waiting so the turn can unwind. + cancelRun(runId: string): void { + for (const [id, entry] of [...this.pending]) { + if (entry.runId === runId) { + this.pending.delete(id); + entry.resolve('reject'); + } + } + } +} diff --git a/apps/x/packages/core/src/code-mode/acp/types.ts b/apps/x/packages/core/src/code-mode/acp/types.ts index 31e41369..6fafd438 100644 --- a/apps/x/packages/core/src/code-mode/acp/types.ts +++ b/apps/x/packages/core/src/code-mode/acp/types.ts @@ -1,43 +1,11 @@ -// Rowboat-facing types for the ACP code-mode engine. These are intentionally -// decoupled from the raw @agentclientprotocol/sdk schema so the IPC layer (Phase 2) -// and renderer (Phase 3) consume a small, stable surface instead of the full protocol. - -export type CodingAgent = 'claude' | 'codex'; - -// How the permission broker answers an agent's requestPermission, before any -// per-tool "allow for this session" memory is applied. -// ask -> surface every gated action to the user -// auto-approve-reads -> silently allow read-only tool calls, ask for the rest -// yolo -> auto-approve everything (the safe, scoped equivalent of -// `claude --dangerously-skip-permissions` — our toggle, not a flag) -export type ApprovalPolicy = 'ask' | 'auto-approve-reads' | 'yolo'; - -// A user's decision for a single permission request. -export type PermissionDecision = 'allow_once' | 'allow_always' | 'reject'; - -// What we hand to the UI (Phase 3) when the agent asks for permission. -export interface PermissionAsk { - toolCallId?: string; - title: string; - kind?: string; // tool kind, e.g. "edit" | "execute" | "read" - /** Whether this looks like a read-only action (used by auto-approve-reads). */ - isRead: boolean; -} - -// Normalized stream events emitted for a coding run. The renderer renders these; -// the engine maps raw ACP session/update notifications onto this union. -export type CodeRunEvent = - // role distinguishes the agent's own output from replayed user turns - // (loadSession streams the whole prior conversation back on resume). - | { type: 'message'; role: 'agent' | 'user'; text: string } - | { type: 'thought' } - | { type: 'tool_call'; id?: string; title?: string; kind?: string; status?: string } - | { type: 'tool_call_update'; id?: string; status?: string; diffs: string[] } - | { type: 'plan'; entries: { content: string; status?: string; priority?: string }[] } - | { type: 'permission'; ask: PermissionAsk; decision: PermissionDecision | 'cancelled'; auto: boolean } - | { type: 'other'; sessionUpdate: string }; - -export interface RunPromptResult { - stopReason: string; - sessionId: string; -} +// Rowboat-facing types for the ACP code-mode engine. The schemas live in +// @x/shared (so the IPC/renderer layers share them); we re-export the inferred +// types here so the engine modules import from one local barrel. +export type { + CodingAgent, + ApprovalPolicy, + PermissionDecision, + PermissionAsk, + CodeRunEvent, + RunPromptResult, +} from '@x/shared/dist/code-mode.js'; diff --git a/apps/x/packages/core/src/code-mode/types.ts b/apps/x/packages/core/src/code-mode/types.ts index 57a3158f..f52ae813 100644 --- a/apps/x/packages/core/src/code-mode/types.ts +++ b/apps/x/packages/core/src/code-mode/types.ts @@ -1,7 +1,11 @@ import z from "zod"; +import { ApprovalPolicy } from "@x/shared/dist/code-mode.js"; export const CodeModeConfig = z.object({ enabled: z.boolean(), + // How the ACP engine answers the coding agent's permission requests. + // Optional for back-compat; the tool defaults to "ask" when unset. + approvalPolicy: ApprovalPolicy.optional(), }); export type CodeModeConfig = z.infer; diff --git a/apps/x/packages/core/src/di/container.ts b/apps/x/packages/core/src/di/container.ts index f452105a..18622870 100644 --- a/apps/x/packages/core/src/di/container.ts +++ b/apps/x/packages/core/src/di/container.ts @@ -16,6 +16,8 @@ import { IAbortRegistry, InMemoryAbortRegistry } from "../runs/abort-registry.js import { FSAgentScheduleRepo, IAgentScheduleRepo } from "../agent-schedule/repo.js"; import { FSAgentScheduleStateRepo, IAgentScheduleStateRepo } from "../agent-schedule/state-repo.js"; import { FSSlackConfigRepo, ISlackConfigRepo } from "../slack/repo.js"; +import { CodeModeManager } from "../code-mode/acp/manager.js"; +import { CodePermissionRegistry } from "../code-mode/acp/permission-registry.js"; import type { IBrowserControlService } from "../application/browser-control/service.js"; import type { INotificationService } from "../application/notification/service.js"; @@ -43,6 +45,11 @@ container.register({ agentScheduleRepo: asClass(FSAgentScheduleRepo).singleton(), agentScheduleStateRepo: asClass(FSAgentScheduleStateRepo).singleton(), slackConfigRepo: asClass(FSSlackConfigRepo).singleton(), + + // ACP code-mode engine: the manager holds live agent connections for the app + // lifetime (warm sessions across messages); the registry brokers mid-run approvals. + codeModeManager: asClass(CodeModeManager).singleton(), + codePermissionRegistry: asClass(CodePermissionRegistry).singleton(), }); export default container; diff --git a/apps/x/packages/core/src/knowledge/inline_task_agent.ts b/apps/x/packages/core/src/knowledge/inline_task_agent.ts index 1a5c2582..db81198d 100644 --- a/apps/x/packages/core/src/knowledge/inline_task_agent.ts +++ b/apps/x/packages/core/src/knowledge/inline_task_agent.ts @@ -1,7 +1,10 @@ import { BuiltinTools } from '../application/lib/builtin-tools.js'; export function getRaw(): string { + // code_agent_run needs an interactive UI to answer its permission asks; exclude it + // from this headless agent so it can't hang waiting on an approval no one can give. const toolEntries = Object.keys(BuiltinTools) + .filter(name => name !== 'code_agent_run') .map(name => ` ${name}:\n type: builtin\n name: ${name}`) .join('\n'); diff --git a/apps/x/packages/core/src/knowledge/live-note/agent.ts b/apps/x/packages/core/src/knowledge/live-note/agent.ts index 8bba90bc..7638384e 100644 --- a/apps/x/packages/core/src/knowledge/live-note/agent.ts +++ b/apps/x/packages/core/src/knowledge/live-note/agent.ts @@ -152,7 +152,9 @@ Avoid: "I updated the note.", "Done!", "Here is the update:". The summary is a d export function buildLiveNoteAgent(): z.infer { const tools: Record> = {}; for (const name of Object.keys(BuiltinTools)) { - if (name === 'executeCommand') continue; + // code_agent_run requires an interactive UI for permission approvals — skip it + // here (headless) so it can't hang on an approval no one can answer. + if (name === 'executeCommand' || name === 'code_agent_run') continue; tools[name] = { type: 'builtin', name }; } diff --git a/apps/x/packages/shared/src/code-mode.ts b/apps/x/packages/shared/src/code-mode.ts new file mode 100644 index 00000000..a3bd46a7 --- /dev/null +++ b/apps/x/packages/shared/src/code-mode.ts @@ -0,0 +1,70 @@ +import z from "zod"; + +// Shared zod schemas for the ACP code-mode engine. Single source of truth: the +// core engine re-exports the inferred TS types, and runs.ts builds the RunEvent +// variants that carry these to the renderer. + +export const CodingAgent = z.enum(["claude", "codex"]); +export type CodingAgent = z.infer; + +// How the permission broker answers the agent's requests before any per-tool +// "always allow" memory is applied. `yolo` is the safe, scoped equivalent of +// `claude --dangerously-skip-permissions` (our toggle, not a CLI flag). +export const ApprovalPolicy = z.enum(["ask", "auto-approve-reads", "yolo"]); +export type ApprovalPolicy = z.infer; + +export const PermissionDecision = z.enum(["allow_once", "allow_always", "reject"]); +export type PermissionDecision = z.infer; + +// What the UI needs to render a permission card. +export const PermissionAsk = z.object({ + toolCallId: z.string().optional(), + title: z.string(), + kind: z.string().optional(), // tool kind, e.g. "edit" | "execute" | "read" + isRead: z.boolean(), +}); +export type PermissionAsk = z.infer; + +// Normalized per-run stream items. The engine maps raw ACP session/update +// notifications onto this union; the renderer renders them. +export const CodeRunEvent = z.discriminatedUnion("type", [ + // role distinguishes the agent's own output from replayed user turns + // (loadSession streams the whole prior conversation back on resume). + z.object({ type: z.literal("message"), role: z.enum(["agent", "user"]), text: z.string() }), + z.object({ type: z.literal("thought") }), + z.object({ + type: z.literal("tool_call"), + id: z.string().optional(), + title: z.string().optional(), + kind: z.string().optional(), + status: z.string().optional(), + }), + z.object({ + type: z.literal("tool_call_update"), + id: z.string().optional(), + status: z.string().optional(), + diffs: z.array(z.string()), + }), + z.object({ + type: z.literal("plan"), + entries: z.array(z.object({ + content: z.string(), + status: z.string().optional(), + priority: z.string().optional(), + })), + }), + z.object({ + type: z.literal("permission"), + ask: PermissionAsk, + decision: z.union([PermissionDecision, z.literal("cancelled")]), + auto: z.boolean(), + }), + z.object({ type: z.literal("other"), sessionUpdate: z.string() }), +]); +export type CodeRunEvent = z.infer; + +export const RunPromptResult = z.object({ + stopReason: z.string(), + sessionId: z.string(), +}); +export type RunPromptResult = z.infer; diff --git a/apps/x/packages/shared/src/ipc.ts b/apps/x/packages/shared/src/ipc.ts index 092a4b29..572e0af7 100644 --- a/apps/x/packages/shared/src/ipc.ts +++ b/apps/x/packages/shared/src/ipc.ts @@ -19,6 +19,7 @@ import { ZListToolkitsResponse } from './composio.js'; import { BrowserStateSchema } from './browser-control.js'; import { BillingInfoSchema } from './billing.js'; import { EmailBlockSchema, GmailThreadSchema } from './blocks.js'; +import { PermissionDecision } from './code-mode.js'; // ============================================================================ // Runtime Validation Schemas (Single Source of Truth) @@ -440,6 +441,16 @@ const ipcSchemas = { success: z.literal(true), }), }, + // Answer a mid-run permission request from a code_agent_run coding turn. + 'codeRun:resolvePermission': { + req: z.object({ + requestId: z.string(), + decision: PermissionDecision, + }), + res: z.object({ + success: z.literal(true), + }), + }, 'codeMode:checkAgentStatus': { req: z.null(), res: z.object({ diff --git a/apps/x/packages/shared/src/runs.ts b/apps/x/packages/shared/src/runs.ts index a977db0b..d9f8750b 100644 --- a/apps/x/packages/shared/src/runs.ts +++ b/apps/x/packages/shared/src/runs.ts @@ -1,5 +1,6 @@ import { LlmStepStreamEvent } from "./llm-step-events.js"; import { Message, ToolCallPart } from "./message.js"; +import { CodeRunEvent as CodeRunEventSchema, PermissionAsk } from "./code-mode.js"; import z from "zod"; const BaseRunEvent = z.object({ @@ -110,6 +111,23 @@ export const ToolPermissionResponseEvent = BaseRunEvent.extend({ scope: z.enum(["once", "session", "always"]).optional(), }); +// A structured item from a code_agent_run coding turn (tool call, diff, plan, +// message chunk, resolved permission). Fire-and-forget — rendered live. +export const CodeRunStreamEvent = BaseRunEvent.extend({ + type: z.literal("code-run-event"), + toolCallId: z.string(), + event: CodeRunEventSchema, +}); + +// The coding agent is asking for permission mid-turn and the run is BLOCKED until +// the user answers via `codeRun:resolvePermission` (keyed by requestId). +export const CodeRunPermissionRequestEvent = BaseRunEvent.extend({ + type: z.literal("code-run-permission-request"), + toolCallId: z.string(), + requestId: z.string(), + ask: PermissionAsk, +}); + export const RunErrorEvent = BaseRunEvent.extend({ type: z.literal("error"), error: z.string(), @@ -134,6 +152,8 @@ export const RunEvent = z.union([ AskHumanResponseEvent, ToolPermissionRequestEvent, ToolPermissionResponseEvent, + CodeRunStreamEvent, + CodeRunPermissionRequestEvent, RunErrorEvent, RunStoppedEvent, ]);