structured ask human and permissions refactor

2026-04-26 08:56:22 +02:00 · 2025-11-18 19:27:11 +05:30 · 2025-11-18 19:27:11 +05:30 · 7d4484e7c0
commit 7d4484e7c0
parent 28488d5fd1
5 changed files with 447 additions and 307 deletions
--- a/apps/cli/src/app.ts
+++ b/apps/cli/src/app.ts
@ -1,16 +1,13 @@
-import { loadAgent, RunLogger, streamAgentTurn } from "./application/lib/agent.js";
+import { AgentState, streamAgent } from "./application/lib/agent.js";
 import { StreamRenderer } from "./application/lib/stream-renderer.js";
 import { stdin as input, stdout as output } from "node:process";
 import fs from "fs";
 import path from "path";
 import { WorkDir } from "./application/config/config.js";
-import { RunEvent, RunStartEvent } from "./application/entities/run-events.js";
+import { RunEvent } from "./application/entities/run-events.js";
 import { createInterface, Interface } from "node:readline/promises";
-import { runIdGenerator } from "./application/lib/run-id-gen.js";
-import { Agent } from "./application/entities/agent.js";
-import { Message, MessageList, ToolMessage, UserMessage } from "./application/entities/message.js";
+import { ToolCallPart } from "./application/entities/message.js";
 import { z } from "zod";
-import { CopilotAgent } from "./application/assistant/agent.js";

 export async function app(opts: {
    agent: string;
@ -18,9 +15,8 @@ export async function app(opts: {
    input?: string;
    noInteractive?: boolean;
 }) {
-    let askHumanEventMarker: z.infer<typeof RunEvent> & { type: "pause-for-human-input" } | null = null;
-    const messages: z.infer<typeof MessageList> = [];
    const renderer = new StreamRenderer();
+    const state = new AgentState(opts.agent);

    // load existing and assemble state if required
    let runId = opts.runId;
@ -38,143 +34,107 @@ export async function app(opts: {
                }
                const parsed = JSON.parse(line);
                const event = RunEvent.parse(parsed);
-                switch (event.type) {
-                    case "message":
-                        messages.push(event.message);
-                        if (askHumanEventMarker
-                            && event.message.role === "tool"
-                            && event.message.toolCallId === askHumanEventMarker.toolCallId
-                        ) {
-                            askHumanEventMarker = null;
-                        }
-                        break;
-                    case "pause-for-human-input": {
-                        askHumanEventMarker = event;
-                        break;
-                    }
-                }
+                state.ingest(event);
            }
        } finally {
            stream?.close();
        }
    }

-    // create runId if not present
-    if (!runId) {
-        runId = runIdGenerator.next();
-    }
-    const logger = new RunLogger(runId);
-
-    // load agent data
-    let agent: z.infer<typeof Agent> | null = null;
-    if (opts.agent === "copilot") {
-        agent = CopilotAgent;
-    } else {
-        agent = await loadAgent(opts.agent);
-    }
-    if (!agent) {
-        throw new Error("unable to load agent");
-    }
-
-    // emit start event if first time run
-    if (!opts.runId) {
-        const ev = {
-            type: "start",
-            runId,
-            agent: agent.name,
-        } as z.infer<typeof RunStartEvent>;
-        logger.log(ev);
-        renderer.render(ev);
-    }
-
-    // loop between user and agent
-    // add user input from cli, if present
-    if (opts.input) {
-        handleUserInput(opts.input, messages, askHumanEventMarker, renderer, logger);
-    }
    let rl: Interface | null = null;
    if (!opts.noInteractive) {
        rl = createInterface({ input, output });
    }
-    let firstPass = true;
+
    try {
        while (true) {
-            let askInput = false;
-            if (firstPass) {
-                if (!opts.input) {
-                    askInput = true;
-                }
-                firstPass = false;
-            } else {
-                askInput = true;
+            // ask for pending tool permissions
+            for (const perm of Object.values(state.getPendingPermissions())) {
+                const response = await getToolCallPermission(perm.toolCall, rl!);
+                state.ingestAndLog({
+                    type: "tool-permission-response",
+                    response,
+                    toolCallId: perm.toolCall.toolCallId,
+                    subflow: perm.subflow,
+                });
            }
-            if (rl && askInput) {
-                const userInput = await rl.question("You: ");
-                if (["quit", "exit", "q"].includes(userInput.trim().toLowerCase())) {
-                    console.error("Bye!");
-                    return;
-                }
-                handleUserInput(userInput, messages, askHumanEventMarker, renderer, logger);
+
+            // ask for pending human input
+            for (const ask of Object.values(state.getPendingAskHumans())) {
+                const response = await getAskHumanResponse(ask.query, rl!);
+                state.ingestAndLog({
+                    type: "ask-human-response",
+                    response,
+                    toolCallId: ask.toolCallId,
+                    subflow: ask.subflow,
+                });
            }
-            for await (const event of streamAgentTurn({
-                agent,
-                messages,
-            })) {
-                logger.log(event);
+
+            // run one turn
+            for await (const event of streamAgent(state)) {
                renderer.render(event);
-                if (event.type === "pause-for-human-input") {
-                    askHumanEventMarker = event;
-                }
                if (event?.type === "error") {
                    process.exitCode = 1;
                }
            }

-            if (opts.noInteractive) {
-                break;
+            // if nothing pending, get user input
+            if (state.getPendingPermissions().length === 0 && state.getPendingAskHumans().length === 0) {
+                const response = await getUserInput(rl!);
+                state.ingestAndLog({
+                    type: "message",
+                    message: {
+                        role: "user",
+                        content: response,
+                    },
+                    subflow: [],
+                });
            }
        }
    } finally {
-        logger.close();
        rl?.close();
    }
 }

-function handleUserInput(
-    input: string,
-    messages: z.infer<typeof MessageList>,
-    askHumanEventMarker: z.infer<typeof RunEvent> & { type: "pause-for-human-input" } | null,
-    renderer: StreamRenderer,
-    logger: RunLogger,
-) {
-    // if waiting on human input, send as response
-    if (askHumanEventMarker) {
-        const message = {
-            role: "tool",
-            content: JSON.stringify({
-                userResponse: input,
-            }),
-            toolCallId: askHumanEventMarker.toolCallId,
-            toolName: "ask-human",
-        } as z.infer<typeof ToolMessage>;
-        messages.push(message);
-        const ev = {
-            type: "message",
-            message,
-        } as z.infer<typeof RunEvent>;
-        logger.log(ev);
-        renderer.render(ev);
-        askHumanEventMarker = null;
-    } else {
-        const message = {
-            role: "user",
-            content: input,
-        } as z.infer<typeof UserMessage>;
-        messages.push(message);
-        const ev = {
-            type: "message",
-            message,
-        } as z.infer<typeof RunEvent>;
-        logger.log(ev);
+async function getToolCallPermission(
+    call: z.infer<typeof ToolCallPart>,
+    rl: Interface,
+): Promise<"approve" | "deny"> {
+    const question = `Do you want to allow running the following tool: ${call.toolName}?:
+    
+    Tool name: ${call.toolName}
+    Tool arguments: ${JSON.stringify(call.arguments)}
+
+    Choices: y/n/a/d:
+    - y: approve
+    - n: deny
+    `;
+    const input = await rl.question(question);
+    if (input.toLowerCase() === "y") return "approve";
+    if (input.toLowerCase() === "n") return "deny";
+    return "deny";
+}
+
+async function getAskHumanResponse(
+    query: string,
+    rl: Interface,
+): Promise<string> {
+    const input = await rl.question(`The agent is asking for your help with the following query:
+    
+    Question: ${query}
+
+    Please respond to the question.
+    `);
+    return input;
+}
+
+async function getUserInput(
+    rl: Interface,
+): Promise<string> {
+    const input = await rl.question("You: ");
+    if (["quit", "exit", "q"].includes(input.toLowerCase().trim())) {
+        console.error("Bye!");
+        process.exit(0);
    }
+    return input;
 }