diff --git a/apps/cli/src/app.ts b/apps/cli/src/app.ts index eb00e35d..7b1ba6fb 100644 --- a/apps/cli/src/app.ts +++ b/apps/cli/src/app.ts @@ -8,7 +8,7 @@ import { RunEvent, RunStartEvent } from "./application/entities/run-events.js"; import { createInterface, Interface } from "node:readline/promises"; import { runIdGenerator } from "./application/lib/run-id-gen.js"; import { Agent } from "./application/entities/agent.js"; -import { MessageList } from "./application/entities/message.js"; +import { Message, MessageList, ToolMessage, UserMessage } from "./application/entities/message.js"; import { z } from "zod"; import { CopilotAgent } from "./application/assistant/agent.js"; @@ -18,7 +18,7 @@ export async function app(opts: { input?: string; noInteractive?: boolean; }) { - let inputCount = 0; + let askHumanEventMarker: z.infer & { type: "pause-for-human-input" } | null = null; const messages: z.infer = []; const renderer = new StreamRenderer(); @@ -41,7 +41,17 @@ export async function app(opts: { switch (event.type) { case "message": messages.push(event.message); + if (askHumanEventMarker + && event.message.role === "tool" + && event.message.toolCallId === askHumanEventMarker.toolCallId + ) { + askHumanEventMarker = null; + } break; + case "pause-for-human-input": { + askHumanEventMarker = event; + break; + } } } } finally { @@ -49,15 +59,6 @@ export async function app(opts: { } } - // add user input - if (opts.input) { - messages.push({ - role: "user", - content: opts.input, - }); - inputCount++; - } - // create runId if not present if (!runId) { runId = runIdGenerator.next(); @@ -87,6 +88,10 @@ export async function app(opts: { } // loop between user and agent + // add user input from cli, if present + if (opts.input) { + handleUserInput(opts.input, messages, askHumanEventMarker, renderer, logger); + } let rl: Interface | null = null; if (!opts.noInteractive) { rl = createInterface({ input, output }); @@ -109,11 +114,7 @@ export async function app(opts: { console.error("Bye!"); return; } - inputCount++; - messages.push({ - role: "user", - content: userInput, - }); + handleUserInput(userInput, messages, askHumanEventMarker, renderer, logger); } for await (const event of streamAgentTurn({ agent, @@ -121,6 +122,9 @@ export async function app(opts: { })) { logger.log(event); renderer.render(event); + if (event.type === "pause-for-human-input") { + askHumanEventMarker = event; + } if (event?.type === "error") { process.exitCode = 1; } @@ -134,4 +138,43 @@ export async function app(opts: { logger.close(); rl?.close(); } +} + +function handleUserInput( + input: string, + messages: z.infer, + askHumanEventMarker: z.infer & { type: "pause-for-human-input" } | null, + renderer: StreamRenderer, + logger: RunLogger, +) { + // if waiting on human input, send as response + if (askHumanEventMarker) { + const message = { + role: "tool", + content: JSON.stringify({ + userResponse: input, + }), + toolCallId: askHumanEventMarker.toolCallId, + toolName: "ask-human", + } as z.infer; + messages.push(message); + const ev = { + type: "message", + message, + } as z.infer; + logger.log(ev); + renderer.render(ev); + askHumanEventMarker = null; + } else { + const message = { + role: "user", + content: input, + } as z.infer; + messages.push(message); + const ev = { + type: "message", + message, + } as z.infer; + logger.log(ev); + } } \ No newline at end of file diff --git a/apps/cli/src/application/entities/run-events.ts b/apps/cli/src/application/entities/run-events.ts index 1ce6a7a6..6784c845 100644 --- a/apps/cli/src/application/entities/run-events.ts +++ b/apps/cli/src/application/entities/run-events.ts @@ -50,6 +50,7 @@ export const RunEndEvent = BaseRunEvent.extend({ export const RunPauseEvent = BaseRunEvent.extend({ type: z.literal("pause-for-human-input"), toolCallId: z.string(), + question: z.string(), }); export const RunResumeEvent = BaseRunEvent.extend({ diff --git a/apps/cli/src/application/lib/agent.ts b/apps/cli/src/application/lib/agent.ts index 3661acaf..3ee99728 100644 --- a/apps/cli/src/application/lib/agent.ts +++ b/apps/cli/src/application/lib/agent.ts @@ -12,7 +12,6 @@ import { getProvider } from "./models.js"; import { LlmStepStreamEvent } from "../entities/llm-step-events.js"; import { execTool } from "./exec-tool.js"; import { RunEvent } from "../entities/run-events.js"; -import { CopilotAgent } from "../assistant/agent.js"; import { BuiltinTools } from "./builtin-tools.js"; export async function mapAgentTool(t: z.infer): Promise { @@ -36,6 +35,14 @@ export async function mapAgentTool(t: z.infer): Promise) { + if (typeof message.content === "string") { + return; + } + let askHumanToolCall: z.infer | null = null; + const newParts = []; + for (const part of message.content as z.infer[]) { + if (part.type === "tool-call" && part.toolName === "ask-human") { + if (!askHumanToolCall) { + askHumanToolCall = part; + } else { + (askHumanToolCall as z.infer).arguments += "\n" + part.arguments; + } + break; + } else { + newParts.push(part); + } + } + if (askHumanToolCall) { + newParts.push(askHumanToolCall); + } + message.content = newParts; +} + export async function loadAgent(id: string): Promise> { const agentPath = path.join(WorkDir, "agents", `${id}.json`); const agent = fs.readFileSync(agentPath, "utf8"); @@ -240,6 +271,7 @@ export async function* streamAgentTurn(opts: { // build and emit final message from agent response const msg = messageBuilder.get(); + normaliseAskHumanToolCall(msg); messages.push(msg); yield { type: "message", @@ -266,7 +298,11 @@ export async function* streamAgentTurn(opts: { }); } + // first, handle tool calls other than ask-human for (const call of mappedToolCalls) { + if (call.toolCall.toolName === "ask-human") { + continue; + } const { agentTool, toolCall } = call; yield { type: "tool-invocation", @@ -292,13 +328,24 @@ export async function* streamAgentTurn(opts: { }; } + // then, handle ask-human (only first one) + const askHumanCall = mappedToolCalls.filter(call => call.toolCall.toolName === "ask-human")[0]; + if (askHumanCall) { + yield { + type: "pause-for-human-input", + toolCallId: askHumanCall.toolCall.toolCallId, + question: askHumanCall.toolCall.arguments.question as string, + }; + return; + } + // if the agent response had tool calls, replay this agent if (hasToolCalls) { continue; } // otherwise, break - break; + return; } } @@ -314,12 +361,12 @@ async function* streamLlm( system: instructions, tools, stopWhen: stepCountIs(1), - providerOptions: { - openai: { - reasoningEffort: "low", - reasoningSummary: "auto", - }, - } + // providerOptions: { + // openai: { + // reasoningEffort: "low", + // reasoningSummary: "auto", + // }, + // } }); for await (const event of fullStream) { // console.log("\n\n\t>>>>\t\tstream event", JSON.stringify(event)); diff --git a/apps/cli/src/application/lib/exec-tool.ts b/apps/cli/src/application/lib/exec-tool.ts index 3c78e619..5ed146ce 100644 --- a/apps/cli/src/application/lib/exec-tool.ts +++ b/apps/cli/src/application/lib/exec-tool.ts @@ -67,6 +67,11 @@ async function execAgentTool(agentTool: z.infer & { type: if (event.type === "message" && event.message.role === "assistant") { lastMsg = event.message; } + if (event.type === "pause-for-human-input") { + return `I need more information from a human in order to continue. I should use the ask-human tool to ask the user for a response on the question below. Once the user comes back with an answer, call this tool again with the answer embedded in the original input that you used to call this tool the first time. + + Question: ${event.question}`; + } if (event.type === "error") { throw new Error(event.error); } diff --git a/apps/cli/src/application/lib/stream-renderer.ts b/apps/cli/src/application/lib/stream-renderer.ts index 341be492..136b0357 100644 --- a/apps/cli/src/application/lib/stream-renderer.ts +++ b/apps/cli/src/application/lib/stream-renderer.ts @@ -62,6 +62,10 @@ export class StreamRenderer { this.onError(event.error); break; } + case "pause-for-human-input": { + this.onPauseForHumanInput(event.toolCallId, event.question); + break; + } } } @@ -194,6 +198,13 @@ export class StreamRenderer { this.write("\n"); } + private onPauseForHumanInput(toolCallId: string, question: string) { + this.write(this.cyan(`\n→ Pause for human input (${toolCallId})`)); + this.write("\n"); + this.write(this.bold("Question: ") + question); + this.write("\n"); + } + private onUsage(usage: { inputTokens?: number; outputTokens?: number;