structured ask human and permissions refactor

2026-04-26 17:06:23 +02:00 · 2025-11-18 19:27:11 +05:30 · 2025-11-18 19:27:11 +05:30 · 7d4484e7c0
commit 7d4484e7c0
parent 28488d5fd1
5 changed files with 447 additions and 307 deletions
--- a/apps/cli/src/application/lib/agent.ts
+++ b/apps/cli/src/application/lib/agent.ts
@ -3,7 +3,6 @@ import fs from "fs";
 import path from "path";
 import { ModelConfig, WorkDir } from "../config/config.js";
 import { Agent, ToolAttachment } from "../entities/agent.js";
-import { createInterface, Interface } from "node:readline/promises";
 import { AssistantContentPart, AssistantMessage, Message, MessageList, ToolCallPart, ToolMessage, UserMessage } from "../entities/message.js";
 import { runIdGenerator } from "./run-id-gen.js";
 import { LanguageModel, stepCountIs, streamText, tool, Tool, ToolSet } from "ai";
@ -11,8 +10,9 @@ import { z } from "zod";
 import { getProvider } from "./models.js";
 import { LlmStepStreamEvent } from "../entities/llm-step-events.js";
 import { execTool } from "./exec-tool.js";
-import { RunEvent } from "../entities/run-events.js";
+import { AskHumanRequestEvent, RunEvent, ToolPermissionRequestEvent, ToolPermissionResponseEvent } from "../entities/run-events.js";
 import { BuiltinTools } from "./builtin-tools.js";
+import { CopilotAgent } from "../assistant/agent.js";

 export async function mapAgentTool(t: z.infer<typeof ToolAttachment>): Promise<Tool> {
    switch (t.type) {
@ -75,7 +75,7 @@ export class RunLogger {
    }

    log(event: z.infer<typeof RunEvent>) {
-        if (event.type !== "stream-event") {
+        if (event.type !== "llm-stream-event") {
            this.fileHandle.write(JSON.stringify(event) + "\n");
        }
    }
@ -161,6 +161,9 @@ function normaliseAskHumanToolCall(message: z.infer<typeof AssistantMessage>) {
 }

 export async function loadAgent(id: string): Promise<z.infer<typeof Agent>> {
+    if (id === "copilot") {
+        return CopilotAgent;
+    }
    const agentPath = path.join(WorkDir, "agents", `${id}.json`);
    const agent = fs.readFileSync(agentPath, "utf8");
    return Agent.parse(JSON.parse(agent));
@ -230,14 +233,7 @@ export function convertFromMessages(messages: z.infer<typeof Message>[]): ModelM
    return result;
 }

-
-export async function* streamAgentTurn(opts: {
-    agent: z.infer<typeof Agent>;
-    messages: z.infer<typeof MessageList>;
-}): AsyncGenerator<z.infer<typeof RunEvent>, void, unknown> {
-    const { agent, messages } = opts;
-
-    // set up tools
+async function buildTools(agent: z.infer<typeof Agent>): Promise<ToolSet> {
    const tools: ToolSet = {};
    for (const [name, tool] of Object.entries(agent.tools ?? {})) {
        try {
@ -247,105 +243,340 @@ export async function* streamAgentTurn(opts: {
            continue;
        }
    }
+    return tools;
+}

-    // set up
+export class AgentState {
+    logger: RunLogger | null = null;
+    runId: string | null = null;
+    agent: z.infer<typeof Agent> | null = null;
+    agentName: string;
+    messages: z.infer<typeof MessageList> = [];
+    lastAssistantMsg: z.infer<typeof AssistantMessage> | null = null;
+    subflowStates: Record<string, AgentState> = {};
+    toolCallIdMap: Record<string, z.infer<typeof ToolCallPart>> = {};
+    pendingToolCalls: Record<string, true> = {};
+    pendingToolPermissionRequests: Record<string, z.infer<typeof ToolPermissionRequestEvent>> = {};
+    pendingAskHumanRequests: Record<string, z.infer<typeof AskHumanRequestEvent>> = {};
+    allowedToolCallIds: Record<string, true> = {};
+    deniedToolCallIds: Record<string, true> = {};
+
+    constructor(agentName: string, runId?: string) {
+        this.agentName = agentName;
+        this.runId = runId || runIdGenerator.next();
+        this.logger = new RunLogger(this.runId);
+        if (!runId) {
+            this.logger.log({
+                type: "start",
+                runId: this.runId,
+                agentName: this.agentName,
+                subflow: [],
+            });
+        }
+    }
+
+    getPendingPermissions(): z.infer<typeof ToolPermissionRequestEvent>[] {
+        const response: z.infer<typeof ToolPermissionRequestEvent>[] = [];
+        for (const [id, subflowState] of Object.entries(this.subflowStates)) {
+            for (const perm of subflowState.getPendingPermissions()) {
+                response.push({
+                    ...perm,
+                    subflow: [id, ...perm.subflow],
+                });
+            }
+        }
+        for (const perm of Object.values(this.pendingToolPermissionRequests)) {
+            response.push({
+                ...perm,
+                subflow: [],
+            });
+        }
+        return response;
+    }
+
+    getPendingAskHumans(): z.infer<typeof AskHumanRequestEvent>[] {
+        const response: z.infer<typeof AskHumanRequestEvent>[] = [];
+        for (const [id, subflowState] of Object.entries(this.subflowStates)) {
+            for (const ask of subflowState.getPendingAskHumans()) {
+                response.push({
+                    ...ask,
+                    subflow: [id, ...ask.subflow],
+                });
+            }
+        }
+        for (const ask of Object.values(this.pendingAskHumanRequests)) {
+            response.push({
+                ...ask,
+                subflow: [],
+            });
+        }
+        return response;
+    }
+
+    finalResponse(): string {
+        if (!this.lastAssistantMsg) {
+            return '';
+        }
+        if (typeof this.lastAssistantMsg.content === "string") {
+            return this.lastAssistantMsg.content;
+        }
+        return this.lastAssistantMsg.content.reduce((acc, part) => {
+            if (part.type === "text") {
+                return acc + part.text;
+            }
+            return acc;
+        }, "");
+    }
+
+    ingest(event: z.infer<typeof RunEvent>) {
+        if (event.subflow.length > 0) {
+            const { subflow, ...rest } = event;
+            this.subflowStates[subflow[0]].ingest({
+                ...rest,
+                subflow: subflow.slice(1),
+            });
+            return;
+        }
+        switch (event.type) {
+            case "message":
+                this.messages.push(event.message);
+                if (event.message.content instanceof Array) {
+                    for (const part of event.message.content) {
+                        if (part.type === "tool-call") {
+                            this.toolCallIdMap[part.toolCallId] = part;
+                            this.pendingToolCalls[part.toolCallId] = true;
+                        }
+                    }
+                }
+                if (event.message.role === "tool") {
+                    const message = event.message as z.infer<typeof ToolMessage>;
+                    delete this.pendingToolCalls[message.toolCallId];
+                }
+                if (event.message.role === "assistant") {
+                    this.lastAssistantMsg = event.message;
+                }
+                break;
+            case "spawn-subflow":
+                this.subflowStates[event.toolCallId] = new AgentState(event.agentName);
+                break;
+            case "tool-permission-request":
+                this.pendingToolPermissionRequests[event.toolCall.toolCallId] = event;
+                break;
+            case "tool-permission-response":
+                switch (event.response) {
+                    case "approve":
+                        this.allowedToolCallIds[event.toolCallId] = true;
+                        break;
+                    case "deny":
+                        this.deniedToolCallIds[event.toolCallId] = true;
+                        break;
+                }
+                delete this.pendingToolPermissionRequests[event.toolCallId];
+                break;
+            case "ask-human-request":
+                this.pendingAskHumanRequests[event.toolCallId] = event;
+                break;
+            case "ask-human-response":
+                // console.error('im here', this.agentName, this.runId, event.subflow);
+                const ogEvent = this.pendingAskHumanRequests[event.toolCallId];
+                this.messages.push({
+                    role: "tool",
+                    content: JSON.stringify({
+                        userResponse: event.response,
+                    }),
+                    toolCallId: ogEvent.toolCallId,
+                    toolName: this.toolCallIdMap[ogEvent.toolCallId]!.toolName,
+                });
+                delete this.pendingAskHumanRequests[ogEvent.toolCallId];
+                break;
+        }
+    }
+
+    ingestAndLog(event: z.infer<typeof RunEvent>) {
+        this.ingest(event);
+        this.logger!.log(event);
+    }
+
+    *ingestAndLogAndYield(event: z.infer<typeof RunEvent>): Generator<z.infer<typeof RunEvent>, void, unknown> {
+        this.ingestAndLog(event);
+        yield event;
+    }
+}
+
+export async function* streamAgent(state: AgentState): AsyncGenerator<z.infer<typeof RunEvent>, void, unknown> {
+    // set up agent
+    const agent = await loadAgent(state.agentName);
+
+    // set up tools
+    const tools = await buildTools(agent);
+
+    // set up provider + model
    const provider = getProvider(agent.provider);
    const model = provider(agent.model || ModelConfig.defaults.model);
+    let loopCounter = 0;

-    // run one turn
    while (true) {
+        // console.error(`loop counter: ${loopCounter++}`)
+        // if last response is from assistant and text, so exit
+        const lastMessage = state.messages[state.messages.length - 1];
+        if (lastMessage
+            && lastMessage.role === "assistant"
+            && (typeof lastMessage.content === "string"
+                || !lastMessage.content.some(part => part.type === "tool-call")
+            )
+        ) {
+            // console.error("Nothing to do, exiting (a.)")
+            return;
+        }
+
+        // execute any pending tool calls
+        for (const toolCallId of Object.keys(state.pendingToolCalls)) {
+            const toolCall = state.toolCallIdMap[toolCallId];
+
+            // if ask-human, skip
+            if (toolCall.toolName === "ask-human") {
+                continue;
+            }
+
+            // if tool has been denied, deny
+            if (state.deniedToolCallIds[toolCallId])  {
+                yield* state.ingestAndLogAndYield({
+                    type: "message",
+                    message: {
+                        role: "tool",
+                        content: "Unable to execute this tool: Permission was denied.",
+                        toolCallId: toolCallId,
+                        toolName: toolCall.toolName,
+                    },
+                    subflow: [],
+                });
+                continue;
+            }
+
+            // if permission is pending on this tool call, allow execution
+            if (state.pendingToolPermissionRequests[toolCallId]) {
+                continue;
+            }
+
+            // execute approved tool
+            yield* state.ingestAndLogAndYield({
+                type: "tool-invocation",
+                toolName: toolCall.toolName,
+                input: JSON.stringify(toolCall.arguments),
+                subflow: [],
+            });
+            let result: any = null;
+            if (agent.tools![toolCall.toolName].type === "agent") {
+                let subflowState = state.subflowStates[toolCallId];
+                for await (const event of streamAgent(subflowState)) {
+                    yield* state.ingestAndLogAndYield({
+                        ...event,
+                        subflow: [toolCallId, ...event.subflow],
+                    });
+                }
+                if (!subflowState.getPendingAskHumans().length && !subflowState.getPendingPermissions().length) {
+                    result = subflowState.finalResponse();
+                }
+            } else {
+                result = await execTool(agent.tools![toolCall.toolName], toolCall.arguments);
+            }
+            if (result) {
+                const resultMsg: z.infer<typeof ToolMessage> = {
+                    role: "tool",
+                    content: JSON.stringify(result),
+                    toolCallId: toolCall.toolCallId,
+                    toolName: toolCall.toolName,
+                };
+                yield* state.ingestAndLogAndYield({
+                    type: "tool-result",
+                    toolName: toolCall.toolName,
+                    result: result,
+                    subflow: [],
+                });
+                yield* state.ingestAndLogAndYield({
+                    type: "message",
+                    message: resultMsg,
+                    subflow: [],
+                });
+            }
+        }
+
+        // if pending state, exit
+        if (state.getPendingAskHumans().length || state.getPendingPermissions().length) {
+            // console.error("pending asks or permissions, exiting (b.)")
+            return;
+        }
+
+        // if current message state isn't runnable, exit
+        if (state.messages.length === 0 || state.messages[state.messages.length - 1].role === "assistant") {
+            // console.error("current message state isn't runnable, exiting (c.)")
+            return;
+        }
+
+        // run one LLM turn.
        // stream agent response and build message
        const messageBuilder = new StreamStepMessageBuilder();
        for await (const event of streamLlm(
            model,
-            messages,
+            state.messages,
            agent.instructions,
            tools,
        )) {
            messageBuilder.ingest(event);
-            yield {
-                type: "stream-event",
+            yield* state.ingestAndLogAndYield({
+                type: "llm-stream-event",
                event: event,
-            };
-        }
-
-        // build and emit final message from agent response
-        const msg = messageBuilder.get();
-        normaliseAskHumanToolCall(msg);
-        messages.push(msg);
-        yield {
-            type: "message",
-            message: msg,
-        };
-
-        // handle tool calls
-        const mappedToolCalls: z.infer<typeof MappedToolCall>[] = [];
-        let msgToolCallParts: z.infer<typeof ToolCallPart>[] = [];
-        if (msg.content instanceof Array) {
-            msgToolCallParts = msg.content.filter(part => part.type === "tool-call");
-        }
-        const hasToolCalls = msgToolCallParts.length > 0;
-
-        // validate and map tool calls
-        for (const part of msgToolCallParts) {
-            const agentTool = tools[part.toolName];
-            if (!agentTool) {
-                throw new Error(`Tool ${part.toolName} not found`);
-            }
-            mappedToolCalls.push({
-                toolCall: part,
-                agentTool: agent.tools![part.toolName],
+                subflow: [],
            });
        }

-        // first, handle tool calls other than ask-human
-        for (const call of mappedToolCalls) {
-            if (call.toolCall.toolName === "ask-human") {
-                continue;
+        // build and emit final message from agent response
+        const message = messageBuilder.get();
+        yield* state.ingestAndLogAndYield({
+            type: "message",
+            message,
+            subflow: [],
+        });
+
+        // if there were any ask-human calls, emit those events
+        if (message.content instanceof Array) {
+            for (const part of message.content) {
+                if (part.type === "tool-call") {
+                    const underlyingTool = agent.tools![part.toolName];
+                    if (underlyingTool.type === "builtin" && underlyingTool.name === "ask-human") {
+                        yield* state.ingestAndLogAndYield({
+                            type: "ask-human-request",
+                            toolCallId: part.toolCallId,
+                            query: part.arguments.question,
+                            subflow: [],
+                        });
+                    }
+                    if (underlyingTool.type === "builtin" && underlyingTool.name === "executeCommand") {
+                        yield *state.ingestAndLogAndYield({
+                            type: "tool-permission-request",
+                            toolCall: part,
+                            subflow: [],
+                        });
+                    }
+                    if (underlyingTool.type === "agent" && underlyingTool.name) {
+                        yield* state.ingestAndLogAndYield({
+                            type: "spawn-subflow",
+                            agentName: underlyingTool.name,
+                            toolCallId: part.toolCallId,
+                            subflow: [],
+                        });
+                        yield* state.ingestAndLogAndYield({
+                            type: "message",
+                            message: {
+                                role: "user",
+                                content: part.arguments.message,
+                            },
+                            subflow: [part.toolCallId],
+                        });
+                    }
+                }
            }
-            const { agentTool, toolCall } = call;
-            yield {
-                type: "tool-invocation",
-                toolName: toolCall.toolName,
-                input: JSON.stringify(toolCall.arguments),
-            };
-            const result = await execTool(agentTool, toolCall.arguments);
-            const resultMsg: z.infer<typeof ToolMessage> = {
-                role: "tool",
-                content: JSON.stringify(result),
-                toolCallId: toolCall.toolCallId,
-                toolName: toolCall.toolName,
-            };
-            messages.push(resultMsg);
-            yield {
-                type: "tool-result",
-                toolName: toolCall.toolName,
-                result: result,
-            };
-            yield {
-                type: "message",
-                message: resultMsg,
-            };
        }
-
-        // then, handle ask-human (only first one)
-        const askHumanCall = mappedToolCalls.filter(call => call.toolCall.toolName === "ask-human")[0];
-        if (askHumanCall) {
-            yield {
-                type: "pause-for-human-input",
-                toolCallId: askHumanCall.toolCall.toolCallId,
-                question: askHumanCall.toolCall.arguments.question as string,
-            };
-            return;
-        }
-
-        // if the agent response had tool calls, replay this agent
-        if (hasToolCalls) {
-            continue;
-        }
-
-        // otherwise, break
-        return;
    }
 }