mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-06-09 19:45:17 +02:00
feat(code-mode): route code mode through code_agent_run tool + live approvals
Replace the acpx shell-out with a structured code_agent_run tool that drives the ACP engine directly, streaming the agent's tool calls / diffs / plan into the chat and surfacing permission requests inline. - shared: code-mode.ts zod schemas; add code-run-event + code-run-permission-request RunEvent variants (stream to the renderer over the existing runs:events channel); codeRun:resolvePermission IPC channel. - core: CodePermissionRegistry (promise-based mid-run approvals — the LLM tool-loop's pre-call gate can't model a mid-execution wait); register codeModeManager + codePermissionRegistry in awilix. - core: code_agent_run builtin tool (streams via ctx.publish, asks via the registry, cancels on ctx.signal, returns the agent summary). CodeModeConfig.approvalPolicy (ask | auto-approve-reads | yolo; default ask). Exclude the tool from the headless background-task / live-note / inline-task agents so they can't block on an approval. - main: codeRun:resolvePermission handler -> registry.resolve. - rewrite the code-with-agents skill and the runtime "Code Mode (Active)" block to call code_agent_run instead of emitting npx acpx commands.
This commit is contained in:
parent
99ef643c8e
commit
80ed635300
14 changed files with 293 additions and 137 deletions
|
|
@ -32,6 +32,7 @@ import type { IModelConfigRepo } from '@x/core/dist/models/repo.js';
|
|||
import type { IOAuthRepo } from '@x/core/dist/auth/repo.js';
|
||||
import { IGranolaConfigRepo } from '@x/core/dist/knowledge/granola/repo.js';
|
||||
import { ICodeModeConfigRepo } from '@x/core/dist/code-mode/repo.js';
|
||||
import { CodePermissionRegistry } from '@x/core/dist/code-mode/acp/permission-registry.js';
|
||||
import { checkCodeModeAgentStatus } from '@x/core/dist/code-mode/status.js';
|
||||
import { invalidateCopilotInstructionsCache } from '@x/core/dist/application/assistant/instructions.js';
|
||||
import { triggerSync as triggerGranolaSync } from '@x/core/dist/knowledge/granola/sync.js';
|
||||
|
|
@ -536,6 +537,11 @@ export function setupIpcHandlers() {
|
|||
await runsCore.authorizePermission(args.runId, args.authorization);
|
||||
return { success: true };
|
||||
},
|
||||
'codeRun:resolvePermission': async (_event, args) => {
|
||||
const registry = container.resolve<CodePermissionRegistry>('codePermissionRegistry');
|
||||
registry.resolve(args.requestId, args.decision);
|
||||
return { success: true };
|
||||
},
|
||||
'runs:provideHumanInput': async (_event, args) => {
|
||||
await runsCore.replyToHumanInputRequest(args.runId, args.reply);
|
||||
return { success: true };
|
||||
|
|
|
|||
|
|
@ -1404,10 +1404,6 @@ Do not announce the work directory unless it's relevant. Just use it.`;
|
|||
const agentDisplay = codeMode === 'claude' ? 'Claude Code' : 'Codex';
|
||||
const otherAgent = codeMode === 'claude' ? 'codex' : 'claude';
|
||||
const otherDisplay = codeMode === 'claude' ? 'Codex' : 'Claude Code';
|
||||
// Deterministic, per-chat session name so the coding agent keeps
|
||||
// context across the user's requests within this chat. Reusing the
|
||||
// same -s <name> resumes the session; the first call creates it.
|
||||
const sessionName = `rowboat-${runId}`;
|
||||
instructionsWithDateTime += `\n\n# Code Mode (Active) — Default agent: ${agentDisplay}
|
||||
The user has turned on **code mode** and the composer chip is set to **${agentDisplay}** (\`${codeMode}\`). Use this as the **default** agent for coding tasks in this turn.
|
||||
|
||||
|
|
@ -1415,31 +1411,12 @@ The user has turned on **code mode** and the composer chip is set to **${agentDi
|
|||
1. By toggling the chip in the composer (preferred).
|
||||
2. By asking you directly in chat ("use codex", "switch to claude", "do this with ${otherDisplay}", etc.). When the user explicitly asks to use a different agent in the current message, honor that — use \`${otherAgent}\` instead of \`${codeMode}\` for this turn, and briefly mention they can also toggle it via the chip for stickiness.
|
||||
|
||||
**Persistent session for this chat — session name: \`${sessionName}\`.** This chat uses one named agent session so the agent keeps context across your requests. The session must exist before it can be prompted (\`-s\` only resumes; it does not create).
|
||||
**How to run coding work — call the \`code_agent_run\` tool** with:
|
||||
- \`agent\`: \`${codeMode}\` by default (or the in-chat override above).
|
||||
- \`cwd\`: the absolute project/working directory (resolve it per the code-with-agents skill — a path the user named, the "# User Work Directory" block, or ask once).
|
||||
- \`prompt\`: a clear, self-contained coding instruction.
|
||||
|
||||
**1. First coding action in this chat — ensure the session exists:**
|
||||
|
||||
\`\`\`
|
||||
npx acpx@latest --approve-all --cwd <workdir> <agent> sessions ensure --name ${sessionName}
|
||||
\`\`\`
|
||||
|
||||
(\`ensure\` creates the session if missing and reuses it if it already exists — safe to call when reopening this chat later.)
|
||||
|
||||
**2. Then run the prompt:**
|
||||
|
||||
\`\`\`
|
||||
npx acpx@latest --approve-all --timeout 600 --cwd <workdir> <agent> -s ${sessionName} "<prompt>"
|
||||
\`\`\`
|
||||
|
||||
**3. Every follow-up coding request in this chat — reuse the same session (do NOT create again):**
|
||||
|
||||
\`\`\`
|
||||
npx acpx@latest --approve-all --timeout 600 --cwd <workdir> <agent> -s ${sessionName} "<prompt>"
|
||||
\`\`\`
|
||||
|
||||
Run these as **separate, sequential** \`executeCommand\` calls — issue the \`sessions ensure\` call first and WAIT for it to finish, then issue the prompt call. Do NOT fire both in the same turn / batch.
|
||||
|
||||
Where \`<agent>\` is either \`claude\` or \`codex\` — pick based on (in priority order): an explicit in-chat override → the chip setting (\`${codeMode}\`). Use \`${sessionName}\` exactly — do NOT invent a different name, and do NOT use \`exec\` (it is one-shot and forgets).
|
||||
The tool runs the agent on-device and streams its tool calls, file diffs, and plan into the chat; any action needing approval surfaces as an inline permission card, so you do NOT pre-confirm with an in-chat "reply yes". This chat keeps ONE persistent agent session, so follow-up coding requests automatically resume with full context — just call \`code_agent_run\` again. Do NOT shell out to \`acpx\` or \`executeCommand\` for coding, and do NOT fall back to your own file tools.
|
||||
|
||||
If the user's message is clearly NOT a coding request (small talk, an unrelated question), answer directly without invoking the coding agent. Code mode signals readiness, not that every message must route through the agent.`;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ Use this skill whenever the user asks you to write code, build a project, create
|
|||
|
||||
Coding agents operate on **arbitrary file paths** (including paths outside the Rowboat workspace root, like \`G:/4th sem/CN\` or \`~/projects/foo\`). Do NOT raise "outside workspace" concerns, and do NOT fall back to your own \`executeCommand\` (PowerShell / bash) or workspace file tools to do code work yourself.
|
||||
|
||||
All coding work runs through the **\`code_agent_run\`** tool. It launches the selected on-device coding agent (Claude Code / Codex), streams its tool calls, file diffs, and plan into the chat, and surfaces any action needing approval as an inline permission card. One persistent session is kept per chat, so follow-up requests resume with full context automatically.
|
||||
|
||||
---
|
||||
|
||||
## STEP 1 — MANDATORY FIRST ACTION
|
||||
|
|
@ -39,96 +41,54 @@ This is non-negotiable. The user gets clickable buttons. Free-text "which agent?
|
|||
|
||||
---
|
||||
|
||||
## STEP 2 — Resolve workdir, confirm, execute
|
||||
## STEP 2 — Resolve workdir, then run
|
||||
|
||||
**Resolve the workdir** (in this priority order):
|
||||
1. A path the user named in their original message (e.g. \`G:/4th sem/CN\`).
|
||||
2. The path from a "# User Work Directory" block in your context.
|
||||
3. Ask once in plain text: "Which folder should I work in?"
|
||||
|
||||
**State your intent in one line, then execute immediately — do NOT wait for a "yes".** The \`executeCommand\` call surfaces a permission card that is itself the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like:
|
||||
**Pick the agent** (\`claude\` or \`codex\`), in priority order:
|
||||
- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it.
|
||||
- The agent from the "# Code Mode (Active)" block / Step 1 choice.
|
||||
|
||||
**State your intent in one line, then call the tool immediately — do NOT wait for a "yes".** The tool's own permission cards are the user's confirmation, so an extra in-chat "reply yes to proceed" is redundant friction. Say something like:
|
||||
|
||||
> Using [Claude Code / Codex] to [task description] in \`[folder]\`.
|
||||
|
||||
…and then immediately make the \`executeCommand\` call in the same turn.
|
||||
|
||||
**Execute** with the chosen agent using a **persistent named session** so follow-up coding requests in this chat resume the same agent and keep context.
|
||||
|
||||
Pick \`<agent>\` (\`claude\` or \`codex\`) by, in priority order:
|
||||
- An explicit in-chat override from the user this turn ("use codex", "switch to claude") — honor it.
|
||||
- The agent chosen in Step 1 / the "# Code Mode (Active)" block.
|
||||
|
||||
Pick \`<session-name>\` — **stable for this whole chat**:
|
||||
- If the "# Code Mode (Active)" block gives a session name (e.g. \`rowboat-<runId>\`), use that exact name.
|
||||
- Otherwise pick one short, kebab-case name and **reuse it for every coding call this turn and in follow-ups** — never a new name each time.
|
||||
|
||||
**\`-s\` resumes an existing session; it does NOT create one.** So ensure the session exists once at the start, then prompt:
|
||||
|
||||
**1. First coding action in this chat — ensure the session exists:**
|
||||
…and then immediately call:
|
||||
|
||||
\`\`\`
|
||||
npx acpx@latest --approve-all --cwd <folder> <agent> sessions ensure --name <session-name>
|
||||
code_agent_run({
|
||||
agent: "<claude|codex>",
|
||||
cwd: "<resolved absolute folder>",
|
||||
prompt: "<clear, self-contained coding instruction>"
|
||||
})
|
||||
\`\`\`
|
||||
|
||||
(\`ensure\` creates the session if missing and reuses it if it already exists — so reopening this chat later just resumes the same session instead of erroring.)
|
||||
|
||||
**2. Then run the prompt:**
|
||||
|
||||
\`\`\`
|
||||
npx acpx@latest --approve-all --timeout 600 --cwd <folder> <agent> -s <session-name> "<prompt>"
|
||||
\`\`\`
|
||||
|
||||
**3. Every follow-up coding request in this chat — reuse the same session (do NOT create again):**
|
||||
|
||||
\`\`\`
|
||||
npx acpx@latest --approve-all --timeout 600 --cwd <folder> <agent> -s <session-name> "<prompt>"
|
||||
\`\`\`
|
||||
|
||||
**Run steps 1 and 2 as separate, sequential \`executeCommand\` calls.** Issue the \`sessions ensure\` call FIRST, wait for it to finish, and only THEN issue the prompt call. Do NOT fire both in the same turn / batch — each must surface and complete its own permission + command block before the next begins.
|
||||
|
||||
Do NOT use \`exec\` — it is one-shot and forgets everything.
|
||||
|
||||
Concrete example:
|
||||
|
||||
\`\`\`
|
||||
# First coding message in the chat — ensure the session, then prompt:
|
||||
npx acpx@latest --approve-all --cwd "G:\\Blogging\\myblog" claude sessions ensure --name diskspace-check
|
||||
npx acpx@latest --approve-all --timeout 600 --cwd "G:\\Blogging\\myblog" claude -s diskspace-check "Check the system disk space and report total, used, and free space."
|
||||
|
||||
# Follow-up in the same chat — reuse the session, no create:
|
||||
npx acpx@latest --approve-all --timeout 600 --cwd "G:\\Blogging\\myblog" claude -s diskspace-check "Summarize what we did and the final findings."
|
||||
\`\`\`
|
||||
|
||||
### Critical: flag order
|
||||
|
||||
\`--approve-all\`, \`--timeout\`, and \`--cwd\` are GLOBAL flags and MUST appear BEFORE the agent name. \`sessions ensure --name <name>\` and \`-s <session-name>\` come AFTER the agent name:
|
||||
|
||||
- ✓ Correct: \`npx acpx@latest --approve-all --timeout 600 --cwd <folder> <agent> -s <session-name> "<prompt>"\`
|
||||
- ✗ Wrong: \`npx acpx@latest <agent> --approve-all -s <name> "..."\` (will fail)
|
||||
|
||||
### Writing good prompts for the agent
|
||||
|
||||
**Writing good prompts for the agent:**
|
||||
- Be specific: file names, function signatures, expected behavior.
|
||||
- Mention constraints (language, framework, style).
|
||||
- Expand short user requests into clear, actionable prompts.
|
||||
- Expand short user requests into clear, actionable instructions.
|
||||
|
||||
**Follow-ups:** for every later coding request in this chat, just call \`code_agent_run\` again with the same \`cwd\` (and agent, unless overridden). The session resumes automatically — do NOT start over or re-explain prior context.
|
||||
|
||||
---
|
||||
|
||||
## STEP 3 — Report results
|
||||
|
||||
After the command finishes:
|
||||
- Pass through the coding agent's summary as-is. Do not rewrite.
|
||||
After \`code_agent_run\` returns:
|
||||
- Pass through the agent's \`summary\` as-is. Do not rewrite it.
|
||||
- Refer to file paths as plain text. Do NOT use \`\`\`file:path\`\`\` reference blocks. (This overrides the global "always wrap paths in filepath blocks" rule — for code-mode output, plain text.)
|
||||
- Only add your own explanation if the command failed (non-zero exit):
|
||||
- Exit code 5 — permissions were denied (shouldn't happen with \`--approve-all\`; flag it).
|
||||
- Exit code 4 / "No acpx session found" — the \`-s <session-name>\` session doesn't exist yet. Create it once with \`npx acpx@latest --approve-all --cwd <folder> <agent> sessions ensure --name <session-name>\`, then retry the prompt. (\`-s\` only resumes; it never creates.)
|
||||
- "command not found" / agent not installed, or an auth/sign-in error — the agent isn't set up. Tell the user to install or sign in to the agent via **Settings → Code Mode**, where Rowboat shows the install and sign-in status.
|
||||
- Only add your own explanation if it failed:
|
||||
- \`success: false\` with a message — surface the message. If it mentions the agent isn't installed or signed in, tell the user to install or sign in via **Settings → Code Mode**.
|
||||
- \`stopReason: "cancelled"\` — the run was stopped; acknowledge briefly and ask if they want to continue.
|
||||
|
||||
---
|
||||
|
||||
## Once delegating: delegate fully
|
||||
|
||||
After Step 2 fires, delegate ALL related coding tasks for this turn to the coding agent — writing, editing, reading, debugging, exploring structure, running tests. You are the coordinator; the agent does the work.
|
||||
After Step 2 fires, delegate ALL related coding tasks for this turn to \`code_agent_run\` — writing, editing, reading, debugging, exploring structure, running tests. You are the coordinator; the agent does the work.
|
||||
|
||||
## Prerequisites (informational)
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,10 @@ import { CURATED_TOOLKITS, CURATED_TOOLKIT_SLUGS } from "@x/shared/dist/composio
|
|||
import { BrowserControlInputSchema, type BrowserControlInput } from "@x/shared/dist/browser-control.js";
|
||||
import { BackgroundTaskSchema, TriggersSchema } from "@x/shared/dist/background-task.js";
|
||||
import { resolveClaudeExeOnWindows } from "../../code-mode/acp/claude-exec.js";
|
||||
import type { CodeModeManager } from "../../code-mode/acp/manager.js";
|
||||
import type { CodePermissionRegistry } from "../../code-mode/acp/permission-registry.js";
|
||||
import { ICodeModeConfigRepo } from "../../code-mode/repo.js";
|
||||
import type { ApprovalPolicy } from "@x/shared/dist/code-mode.js";
|
||||
|
||||
// Inputs for the bg-task builtin tools. Reuse the canonical schema field
|
||||
// descriptions; only `triggers` gets a tighter contextual override (the
|
||||
|
|
@ -816,6 +820,85 @@ export const BuiltinTools: z.infer<typeof BuiltinToolsSchema> = {
|
|||
},
|
||||
},
|
||||
|
||||
code_agent_run: {
|
||||
description: 'Run a coding/software task with the selected on-device coding agent (Claude Code or Codex) inside a project folder. Streams the agent\'s tool calls, file diffs, and plan into the chat and surfaces permission requests inline. Use this for ALL code-mode work (writing/editing/reading code, running tests, debugging, exploring a repo). Reuses one persistent session per chat, so follow-up requests keep context.',
|
||||
inputSchema: z.object({
|
||||
agent: z.enum(['claude', 'codex']).describe('Which coding agent to use: "claude" (Claude Code) or "codex". Pick per the active code-mode selection / any in-chat override.'),
|
||||
cwd: z.string().describe('Absolute path to the working directory / project folder the agent should operate in.'),
|
||||
prompt: z.string().describe('The full, self-contained coding instruction for the agent (file names, expected behavior, constraints).'),
|
||||
}),
|
||||
execute: async ({ agent, cwd, prompt }: { agent: 'claude' | 'codex', cwd: string, prompt: string }, ctx?: ToolContext) => {
|
||||
if (!ctx) {
|
||||
return { success: false, message: 'code_agent_run requires run context (runId / streaming).' };
|
||||
}
|
||||
const manager = container.resolve<CodeModeManager>('codeModeManager');
|
||||
const registry = container.resolve<CodePermissionRegistry>('codePermissionRegistry');
|
||||
|
||||
// Approval policy from settings; default to asking the user.
|
||||
let policy: ApprovalPolicy = 'ask';
|
||||
try {
|
||||
const cfg = await container.resolve<ICodeModeConfigRepo>('codeModeConfigRepo').getConfig();
|
||||
if (cfg.approvalPolicy) policy = cfg.approvalPolicy;
|
||||
} catch {
|
||||
// fall back to 'ask'
|
||||
}
|
||||
|
||||
// Cancel the coding turn (and unblock any pending approval) if the run is stopped.
|
||||
const onAbort = () => {
|
||||
manager.cancel(ctx.runId).catch(() => {});
|
||||
registry.cancelRun(ctx.runId);
|
||||
};
|
||||
if (ctx.signal.aborted) onAbort();
|
||||
else ctx.signal.addEventListener('abort', onAbort, { once: true });
|
||||
|
||||
let finalText = '';
|
||||
const changedFiles = new Set<string>();
|
||||
try {
|
||||
const result = await manager.runPrompt({
|
||||
runId: ctx.runId,
|
||||
agent,
|
||||
cwd,
|
||||
prompt,
|
||||
policy,
|
||||
onEvent: (event) => {
|
||||
if (event.type === 'message' && event.role === 'agent') finalText += event.text;
|
||||
if (event.type === 'tool_call_update') for (const f of event.diffs) changedFiles.add(f);
|
||||
void ctx.publish({
|
||||
runId: ctx.runId,
|
||||
type: 'code-run-event',
|
||||
toolCallId: ctx.toolCallId,
|
||||
event,
|
||||
subflow: [],
|
||||
});
|
||||
},
|
||||
ask: (permAsk) => registry.request(ctx.runId, (requestId) => {
|
||||
void ctx.publish({
|
||||
runId: ctx.runId,
|
||||
type: 'code-run-permission-request',
|
||||
toolCallId: ctx.toolCallId,
|
||||
requestId,
|
||||
ask: permAsk,
|
||||
subflow: [],
|
||||
});
|
||||
}),
|
||||
});
|
||||
return {
|
||||
success: result.stopReason === 'end_turn',
|
||||
stopReason: result.stopReason,
|
||||
summary: finalText.trim(),
|
||||
changedFiles: [...changedFiles],
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
message: `Coding agent failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
};
|
||||
} finally {
|
||||
ctx.signal.removeEventListener('abort', onAbort);
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
// ============================================================================
|
||||
// Browser Skills (browser-use/browser-harness domain-skills cache)
|
||||
// ============================================================================
|
||||
|
|
|
|||
|
|
@ -71,7 +71,9 @@ The workspace lives at \`${WorkDir}\`.
|
|||
export function buildBackgroundTaskAgent(): z.infer<typeof Agent> {
|
||||
const tools: Record<string, z.infer<typeof ToolAttachment>> = {};
|
||||
for (const name of Object.keys(BuiltinTools)) {
|
||||
if (name === 'executeCommand') continue;
|
||||
// code_agent_run requires an interactive UI for permission approvals — skip it
|
||||
// here (headless) so it can't hang on an approval no one can answer.
|
||||
if (name === 'executeCommand' || name === 'code_agent_run') continue;
|
||||
tools[name] = { type: 'builtin', name };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
import type { PermissionDecision } from './types.js';
|
||||
|
||||
interface Pending {
|
||||
runId: string;
|
||||
resolve: (decision: PermissionDecision) => void;
|
||||
}
|
||||
|
||||
// Holds in-flight mid-run permission asks. The agent (via the broker) calls
|
||||
// request() which BLOCKS the coding turn until the user answers; the renderer's
|
||||
// answer arrives over IPC and calls resolve(). This is separate from the LLM
|
||||
// tool-loop's pre-call permission gate, which can't model a mid-execution wait.
|
||||
export class CodePermissionRegistry {
|
||||
private readonly pending = new Map<string, Pending>();
|
||||
private counter = 0;
|
||||
|
||||
// Register a pending ask, hand the generated requestId to `emit` (so the caller
|
||||
// can publish the UI event), and resolve once the user answers.
|
||||
request(runId: string, emit: (requestId: string) => void): Promise<PermissionDecision> {
|
||||
const requestId = `cpr-${runId}-${++this.counter}`;
|
||||
return new Promise<PermissionDecision>((resolve) => {
|
||||
this.pending.set(requestId, { runId, resolve });
|
||||
emit(requestId);
|
||||
});
|
||||
}
|
||||
|
||||
// Called from the IPC handler when the user answers a card.
|
||||
resolve(requestId: string, decision: PermissionDecision): void {
|
||||
const entry = this.pending.get(requestId);
|
||||
if (!entry) return;
|
||||
this.pending.delete(requestId);
|
||||
entry.resolve(decision);
|
||||
}
|
||||
|
||||
// On run stop/cancel: reject anything still waiting so the turn can unwind.
|
||||
cancelRun(runId: string): void {
|
||||
for (const [id, entry] of [...this.pending]) {
|
||||
if (entry.runId === runId) {
|
||||
this.pending.delete(id);
|
||||
entry.resolve('reject');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,43 +1,11 @@
|
|||
// Rowboat-facing types for the ACP code-mode engine. These are intentionally
|
||||
// decoupled from the raw @agentclientprotocol/sdk schema so the IPC layer (Phase 2)
|
||||
// and renderer (Phase 3) consume a small, stable surface instead of the full protocol.
|
||||
|
||||
export type CodingAgent = 'claude' | 'codex';
|
||||
|
||||
// How the permission broker answers an agent's requestPermission, before any
|
||||
// per-tool "allow for this session" memory is applied.
|
||||
// ask -> surface every gated action to the user
|
||||
// auto-approve-reads -> silently allow read-only tool calls, ask for the rest
|
||||
// yolo -> auto-approve everything (the safe, scoped equivalent of
|
||||
// `claude --dangerously-skip-permissions` — our toggle, not a flag)
|
||||
export type ApprovalPolicy = 'ask' | 'auto-approve-reads' | 'yolo';
|
||||
|
||||
// A user's decision for a single permission request.
|
||||
export type PermissionDecision = 'allow_once' | 'allow_always' | 'reject';
|
||||
|
||||
// What we hand to the UI (Phase 3) when the agent asks for permission.
|
||||
export interface PermissionAsk {
|
||||
toolCallId?: string;
|
||||
title: string;
|
||||
kind?: string; // tool kind, e.g. "edit" | "execute" | "read"
|
||||
/** Whether this looks like a read-only action (used by auto-approve-reads). */
|
||||
isRead: boolean;
|
||||
}
|
||||
|
||||
// Normalized stream events emitted for a coding run. The renderer renders these;
|
||||
// the engine maps raw ACP session/update notifications onto this union.
|
||||
export type CodeRunEvent =
|
||||
// role distinguishes the agent's own output from replayed user turns
|
||||
// (loadSession streams the whole prior conversation back on resume).
|
||||
| { type: 'message'; role: 'agent' | 'user'; text: string }
|
||||
| { type: 'thought' }
|
||||
| { type: 'tool_call'; id?: string; title?: string; kind?: string; status?: string }
|
||||
| { type: 'tool_call_update'; id?: string; status?: string; diffs: string[] }
|
||||
| { type: 'plan'; entries: { content: string; status?: string; priority?: string }[] }
|
||||
| { type: 'permission'; ask: PermissionAsk; decision: PermissionDecision | 'cancelled'; auto: boolean }
|
||||
| { type: 'other'; sessionUpdate: string };
|
||||
|
||||
export interface RunPromptResult {
|
||||
stopReason: string;
|
||||
sessionId: string;
|
||||
}
|
||||
// Rowboat-facing types for the ACP code-mode engine. The schemas live in
|
||||
// @x/shared (so the IPC/renderer layers share them); we re-export the inferred
|
||||
// types here so the engine modules import from one local barrel.
|
||||
export type {
|
||||
CodingAgent,
|
||||
ApprovalPolicy,
|
||||
PermissionDecision,
|
||||
PermissionAsk,
|
||||
CodeRunEvent,
|
||||
RunPromptResult,
|
||||
} from '@x/shared/dist/code-mode.js';
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
import z from "zod";
|
||||
import { ApprovalPolicy } from "@x/shared/dist/code-mode.js";
|
||||
|
||||
export const CodeModeConfig = z.object({
|
||||
enabled: z.boolean(),
|
||||
// How the ACP engine answers the coding agent's permission requests.
|
||||
// Optional for back-compat; the tool defaults to "ask" when unset.
|
||||
approvalPolicy: ApprovalPolicy.optional(),
|
||||
});
|
||||
export type CodeModeConfig = z.infer<typeof CodeModeConfig>;
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ import { IAbortRegistry, InMemoryAbortRegistry } from "../runs/abort-registry.js
|
|||
import { FSAgentScheduleRepo, IAgentScheduleRepo } from "../agent-schedule/repo.js";
|
||||
import { FSAgentScheduleStateRepo, IAgentScheduleStateRepo } from "../agent-schedule/state-repo.js";
|
||||
import { FSSlackConfigRepo, ISlackConfigRepo } from "../slack/repo.js";
|
||||
import { CodeModeManager } from "../code-mode/acp/manager.js";
|
||||
import { CodePermissionRegistry } from "../code-mode/acp/permission-registry.js";
|
||||
import type { IBrowserControlService } from "../application/browser-control/service.js";
|
||||
import type { INotificationService } from "../application/notification/service.js";
|
||||
|
||||
|
|
@ -43,6 +45,11 @@ container.register({
|
|||
agentScheduleRepo: asClass<IAgentScheduleRepo>(FSAgentScheduleRepo).singleton(),
|
||||
agentScheduleStateRepo: asClass<IAgentScheduleStateRepo>(FSAgentScheduleStateRepo).singleton(),
|
||||
slackConfigRepo: asClass<ISlackConfigRepo>(FSSlackConfigRepo).singleton(),
|
||||
|
||||
// ACP code-mode engine: the manager holds live agent connections for the app
|
||||
// lifetime (warm sessions across messages); the registry brokers mid-run approvals.
|
||||
codeModeManager: asClass(CodeModeManager).singleton(),
|
||||
codePermissionRegistry: asClass(CodePermissionRegistry).singleton(),
|
||||
});
|
||||
|
||||
export default container;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
import { BuiltinTools } from '../application/lib/builtin-tools.js';
|
||||
|
||||
export function getRaw(): string {
|
||||
// code_agent_run needs an interactive UI to answer its permission asks; exclude it
|
||||
// from this headless agent so it can't hang waiting on an approval no one can give.
|
||||
const toolEntries = Object.keys(BuiltinTools)
|
||||
.filter(name => name !== 'code_agent_run')
|
||||
.map(name => ` ${name}:\n type: builtin\n name: ${name}`)
|
||||
.join('\n');
|
||||
|
||||
|
|
|
|||
|
|
@ -152,7 +152,9 @@ Avoid: "I updated the note.", "Done!", "Here is the update:". The summary is a d
|
|||
export function buildLiveNoteAgent(): z.infer<typeof Agent> {
|
||||
const tools: Record<string, z.infer<typeof ToolAttachment>> = {};
|
||||
for (const name of Object.keys(BuiltinTools)) {
|
||||
if (name === 'executeCommand') continue;
|
||||
// code_agent_run requires an interactive UI for permission approvals — skip it
|
||||
// here (headless) so it can't hang on an approval no one can answer.
|
||||
if (name === 'executeCommand' || name === 'code_agent_run') continue;
|
||||
tools[name] = { type: 'builtin', name };
|
||||
}
|
||||
|
||||
|
|
|
|||
70
apps/x/packages/shared/src/code-mode.ts
Normal file
70
apps/x/packages/shared/src/code-mode.ts
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import z from "zod";
|
||||
|
||||
// Shared zod schemas for the ACP code-mode engine. Single source of truth: the
|
||||
// core engine re-exports the inferred TS types, and runs.ts builds the RunEvent
|
||||
// variants that carry these to the renderer.
|
||||
|
||||
export const CodingAgent = z.enum(["claude", "codex"]);
|
||||
export type CodingAgent = z.infer<typeof CodingAgent>;
|
||||
|
||||
// How the permission broker answers the agent's requests before any per-tool
|
||||
// "always allow" memory is applied. `yolo` is the safe, scoped equivalent of
|
||||
// `claude --dangerously-skip-permissions` (our toggle, not a CLI flag).
|
||||
export const ApprovalPolicy = z.enum(["ask", "auto-approve-reads", "yolo"]);
|
||||
export type ApprovalPolicy = z.infer<typeof ApprovalPolicy>;
|
||||
|
||||
export const PermissionDecision = z.enum(["allow_once", "allow_always", "reject"]);
|
||||
export type PermissionDecision = z.infer<typeof PermissionDecision>;
|
||||
|
||||
// What the UI needs to render a permission card.
|
||||
export const PermissionAsk = z.object({
|
||||
toolCallId: z.string().optional(),
|
||||
title: z.string(),
|
||||
kind: z.string().optional(), // tool kind, e.g. "edit" | "execute" | "read"
|
||||
isRead: z.boolean(),
|
||||
});
|
||||
export type PermissionAsk = z.infer<typeof PermissionAsk>;
|
||||
|
||||
// Normalized per-run stream items. The engine maps raw ACP session/update
|
||||
// notifications onto this union; the renderer renders them.
|
||||
export const CodeRunEvent = z.discriminatedUnion("type", [
|
||||
// role distinguishes the agent's own output from replayed user turns
|
||||
// (loadSession streams the whole prior conversation back on resume).
|
||||
z.object({ type: z.literal("message"), role: z.enum(["agent", "user"]), text: z.string() }),
|
||||
z.object({ type: z.literal("thought") }),
|
||||
z.object({
|
||||
type: z.literal("tool_call"),
|
||||
id: z.string().optional(),
|
||||
title: z.string().optional(),
|
||||
kind: z.string().optional(),
|
||||
status: z.string().optional(),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("tool_call_update"),
|
||||
id: z.string().optional(),
|
||||
status: z.string().optional(),
|
||||
diffs: z.array(z.string()),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("plan"),
|
||||
entries: z.array(z.object({
|
||||
content: z.string(),
|
||||
status: z.string().optional(),
|
||||
priority: z.string().optional(),
|
||||
})),
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("permission"),
|
||||
ask: PermissionAsk,
|
||||
decision: z.union([PermissionDecision, z.literal("cancelled")]),
|
||||
auto: z.boolean(),
|
||||
}),
|
||||
z.object({ type: z.literal("other"), sessionUpdate: z.string() }),
|
||||
]);
|
||||
export type CodeRunEvent = z.infer<typeof CodeRunEvent>;
|
||||
|
||||
export const RunPromptResult = z.object({
|
||||
stopReason: z.string(),
|
||||
sessionId: z.string(),
|
||||
});
|
||||
export type RunPromptResult = z.infer<typeof RunPromptResult>;
|
||||
|
|
@ -19,6 +19,7 @@ import { ZListToolkitsResponse } from './composio.js';
|
|||
import { BrowserStateSchema } from './browser-control.js';
|
||||
import { BillingInfoSchema } from './billing.js';
|
||||
import { EmailBlockSchema, GmailThreadSchema } from './blocks.js';
|
||||
import { PermissionDecision } from './code-mode.js';
|
||||
|
||||
// ============================================================================
|
||||
// Runtime Validation Schemas (Single Source of Truth)
|
||||
|
|
@ -440,6 +441,16 @@ const ipcSchemas = {
|
|||
success: z.literal(true),
|
||||
}),
|
||||
},
|
||||
// Answer a mid-run permission request from a code_agent_run coding turn.
|
||||
'codeRun:resolvePermission': {
|
||||
req: z.object({
|
||||
requestId: z.string(),
|
||||
decision: PermissionDecision,
|
||||
}),
|
||||
res: z.object({
|
||||
success: z.literal(true),
|
||||
}),
|
||||
},
|
||||
'codeMode:checkAgentStatus': {
|
||||
req: z.null(),
|
||||
res: z.object({
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { LlmStepStreamEvent } from "./llm-step-events.js";
|
||||
import { Message, ToolCallPart } from "./message.js";
|
||||
import { CodeRunEvent as CodeRunEventSchema, PermissionAsk } from "./code-mode.js";
|
||||
import z from "zod";
|
||||
|
||||
const BaseRunEvent = z.object({
|
||||
|
|
@ -110,6 +111,23 @@ export const ToolPermissionResponseEvent = BaseRunEvent.extend({
|
|||
scope: z.enum(["once", "session", "always"]).optional(),
|
||||
});
|
||||
|
||||
// A structured item from a code_agent_run coding turn (tool call, diff, plan,
|
||||
// message chunk, resolved permission). Fire-and-forget — rendered live.
|
||||
export const CodeRunStreamEvent = BaseRunEvent.extend({
|
||||
type: z.literal("code-run-event"),
|
||||
toolCallId: z.string(),
|
||||
event: CodeRunEventSchema,
|
||||
});
|
||||
|
||||
// The coding agent is asking for permission mid-turn and the run is BLOCKED until
|
||||
// the user answers via `codeRun:resolvePermission` (keyed by requestId).
|
||||
export const CodeRunPermissionRequestEvent = BaseRunEvent.extend({
|
||||
type: z.literal("code-run-permission-request"),
|
||||
toolCallId: z.string(),
|
||||
requestId: z.string(),
|
||||
ask: PermissionAsk,
|
||||
});
|
||||
|
||||
export const RunErrorEvent = BaseRunEvent.extend({
|
||||
type: z.literal("error"),
|
||||
error: z.string(),
|
||||
|
|
@ -134,6 +152,8 @@ export const RunEvent = z.union([
|
|||
AskHumanResponseEvent,
|
||||
ToolPermissionRequestEvent,
|
||||
ToolPermissionResponseEvent,
|
||||
CodeRunStreamEvent,
|
||||
CodeRunPermissionRequestEvent,
|
||||
RunErrorEvent,
|
||||
RunStoppedEvent,
|
||||
]);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue