mirror of
https://github.com/rowboatlabs/rowboat.git
synced 2026-04-26 17:06:23 +02:00
structured ask human and permissions refactor
This commit is contained in:
parent
28488d5fd1
commit
7d4484e7c0
5 changed files with 447 additions and 307 deletions
|
|
@ -3,7 +3,6 @@ import fs from "fs";
|
|||
import path from "path";
|
||||
import { ModelConfig, WorkDir } from "../config/config.js";
|
||||
import { Agent, ToolAttachment } from "../entities/agent.js";
|
||||
import { createInterface, Interface } from "node:readline/promises";
|
||||
import { AssistantContentPart, AssistantMessage, Message, MessageList, ToolCallPart, ToolMessage, UserMessage } from "../entities/message.js";
|
||||
import { runIdGenerator } from "./run-id-gen.js";
|
||||
import { LanguageModel, stepCountIs, streamText, tool, Tool, ToolSet } from "ai";
|
||||
|
|
@ -11,8 +10,9 @@ import { z } from "zod";
|
|||
import { getProvider } from "./models.js";
|
||||
import { LlmStepStreamEvent } from "../entities/llm-step-events.js";
|
||||
import { execTool } from "./exec-tool.js";
|
||||
import { RunEvent } from "../entities/run-events.js";
|
||||
import { AskHumanRequestEvent, RunEvent, ToolPermissionRequestEvent, ToolPermissionResponseEvent } from "../entities/run-events.js";
|
||||
import { BuiltinTools } from "./builtin-tools.js";
|
||||
import { CopilotAgent } from "../assistant/agent.js";
|
||||
|
||||
export async function mapAgentTool(t: z.infer<typeof ToolAttachment>): Promise<Tool> {
|
||||
switch (t.type) {
|
||||
|
|
@ -75,7 +75,7 @@ export class RunLogger {
|
|||
}
|
||||
|
||||
log(event: z.infer<typeof RunEvent>) {
|
||||
if (event.type !== "stream-event") {
|
||||
if (event.type !== "llm-stream-event") {
|
||||
this.fileHandle.write(JSON.stringify(event) + "\n");
|
||||
}
|
||||
}
|
||||
|
|
@ -161,6 +161,9 @@ function normaliseAskHumanToolCall(message: z.infer<typeof AssistantMessage>) {
|
|||
}
|
||||
|
||||
export async function loadAgent(id: string): Promise<z.infer<typeof Agent>> {
|
||||
if (id === "copilot") {
|
||||
return CopilotAgent;
|
||||
}
|
||||
const agentPath = path.join(WorkDir, "agents", `${id}.json`);
|
||||
const agent = fs.readFileSync(agentPath, "utf8");
|
||||
return Agent.parse(JSON.parse(agent));
|
||||
|
|
@ -230,14 +233,7 @@ export function convertFromMessages(messages: z.infer<typeof Message>[]): ModelM
|
|||
return result;
|
||||
}
|
||||
|
||||
|
||||
export async function* streamAgentTurn(opts: {
|
||||
agent: z.infer<typeof Agent>;
|
||||
messages: z.infer<typeof MessageList>;
|
||||
}): AsyncGenerator<z.infer<typeof RunEvent>, void, unknown> {
|
||||
const { agent, messages } = opts;
|
||||
|
||||
// set up tools
|
||||
async function buildTools(agent: z.infer<typeof Agent>): Promise<ToolSet> {
|
||||
const tools: ToolSet = {};
|
||||
for (const [name, tool] of Object.entries(agent.tools ?? {})) {
|
||||
try {
|
||||
|
|
@ -247,105 +243,340 @@ export async function* streamAgentTurn(opts: {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
return tools;
|
||||
}
|
||||
|
||||
// set up
|
||||
export class AgentState {
|
||||
logger: RunLogger | null = null;
|
||||
runId: string | null = null;
|
||||
agent: z.infer<typeof Agent> | null = null;
|
||||
agentName: string;
|
||||
messages: z.infer<typeof MessageList> = [];
|
||||
lastAssistantMsg: z.infer<typeof AssistantMessage> | null = null;
|
||||
subflowStates: Record<string, AgentState> = {};
|
||||
toolCallIdMap: Record<string, z.infer<typeof ToolCallPart>> = {};
|
||||
pendingToolCalls: Record<string, true> = {};
|
||||
pendingToolPermissionRequests: Record<string, z.infer<typeof ToolPermissionRequestEvent>> = {};
|
||||
pendingAskHumanRequests: Record<string, z.infer<typeof AskHumanRequestEvent>> = {};
|
||||
allowedToolCallIds: Record<string, true> = {};
|
||||
deniedToolCallIds: Record<string, true> = {};
|
||||
|
||||
constructor(agentName: string, runId?: string) {
|
||||
this.agentName = agentName;
|
||||
this.runId = runId || runIdGenerator.next();
|
||||
this.logger = new RunLogger(this.runId);
|
||||
if (!runId) {
|
||||
this.logger.log({
|
||||
type: "start",
|
||||
runId: this.runId,
|
||||
agentName: this.agentName,
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
getPendingPermissions(): z.infer<typeof ToolPermissionRequestEvent>[] {
|
||||
const response: z.infer<typeof ToolPermissionRequestEvent>[] = [];
|
||||
for (const [id, subflowState] of Object.entries(this.subflowStates)) {
|
||||
for (const perm of subflowState.getPendingPermissions()) {
|
||||
response.push({
|
||||
...perm,
|
||||
subflow: [id, ...perm.subflow],
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const perm of Object.values(this.pendingToolPermissionRequests)) {
|
||||
response.push({
|
||||
...perm,
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
getPendingAskHumans(): z.infer<typeof AskHumanRequestEvent>[] {
|
||||
const response: z.infer<typeof AskHumanRequestEvent>[] = [];
|
||||
for (const [id, subflowState] of Object.entries(this.subflowStates)) {
|
||||
for (const ask of subflowState.getPendingAskHumans()) {
|
||||
response.push({
|
||||
...ask,
|
||||
subflow: [id, ...ask.subflow],
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const ask of Object.values(this.pendingAskHumanRequests)) {
|
||||
response.push({
|
||||
...ask,
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
finalResponse(): string {
|
||||
if (!this.lastAssistantMsg) {
|
||||
return '';
|
||||
}
|
||||
if (typeof this.lastAssistantMsg.content === "string") {
|
||||
return this.lastAssistantMsg.content;
|
||||
}
|
||||
return this.lastAssistantMsg.content.reduce((acc, part) => {
|
||||
if (part.type === "text") {
|
||||
return acc + part.text;
|
||||
}
|
||||
return acc;
|
||||
}, "");
|
||||
}
|
||||
|
||||
ingest(event: z.infer<typeof RunEvent>) {
|
||||
if (event.subflow.length > 0) {
|
||||
const { subflow, ...rest } = event;
|
||||
this.subflowStates[subflow[0]].ingest({
|
||||
...rest,
|
||||
subflow: subflow.slice(1),
|
||||
});
|
||||
return;
|
||||
}
|
||||
switch (event.type) {
|
||||
case "message":
|
||||
this.messages.push(event.message);
|
||||
if (event.message.content instanceof Array) {
|
||||
for (const part of event.message.content) {
|
||||
if (part.type === "tool-call") {
|
||||
this.toolCallIdMap[part.toolCallId] = part;
|
||||
this.pendingToolCalls[part.toolCallId] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (event.message.role === "tool") {
|
||||
const message = event.message as z.infer<typeof ToolMessage>;
|
||||
delete this.pendingToolCalls[message.toolCallId];
|
||||
}
|
||||
if (event.message.role === "assistant") {
|
||||
this.lastAssistantMsg = event.message;
|
||||
}
|
||||
break;
|
||||
case "spawn-subflow":
|
||||
this.subflowStates[event.toolCallId] = new AgentState(event.agentName);
|
||||
break;
|
||||
case "tool-permission-request":
|
||||
this.pendingToolPermissionRequests[event.toolCall.toolCallId] = event;
|
||||
break;
|
||||
case "tool-permission-response":
|
||||
switch (event.response) {
|
||||
case "approve":
|
||||
this.allowedToolCallIds[event.toolCallId] = true;
|
||||
break;
|
||||
case "deny":
|
||||
this.deniedToolCallIds[event.toolCallId] = true;
|
||||
break;
|
||||
}
|
||||
delete this.pendingToolPermissionRequests[event.toolCallId];
|
||||
break;
|
||||
case "ask-human-request":
|
||||
this.pendingAskHumanRequests[event.toolCallId] = event;
|
||||
break;
|
||||
case "ask-human-response":
|
||||
// console.error('im here', this.agentName, this.runId, event.subflow);
|
||||
const ogEvent = this.pendingAskHumanRequests[event.toolCallId];
|
||||
this.messages.push({
|
||||
role: "tool",
|
||||
content: JSON.stringify({
|
||||
userResponse: event.response,
|
||||
}),
|
||||
toolCallId: ogEvent.toolCallId,
|
||||
toolName: this.toolCallIdMap[ogEvent.toolCallId]!.toolName,
|
||||
});
|
||||
delete this.pendingAskHumanRequests[ogEvent.toolCallId];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ingestAndLog(event: z.infer<typeof RunEvent>) {
|
||||
this.ingest(event);
|
||||
this.logger!.log(event);
|
||||
}
|
||||
|
||||
*ingestAndLogAndYield(event: z.infer<typeof RunEvent>): Generator<z.infer<typeof RunEvent>, void, unknown> {
|
||||
this.ingestAndLog(event);
|
||||
yield event;
|
||||
}
|
||||
}
|
||||
|
||||
export async function* streamAgent(state: AgentState): AsyncGenerator<z.infer<typeof RunEvent>, void, unknown> {
|
||||
// set up agent
|
||||
const agent = await loadAgent(state.agentName);
|
||||
|
||||
// set up tools
|
||||
const tools = await buildTools(agent);
|
||||
|
||||
// set up provider + model
|
||||
const provider = getProvider(agent.provider);
|
||||
const model = provider(agent.model || ModelConfig.defaults.model);
|
||||
let loopCounter = 0;
|
||||
|
||||
// run one turn
|
||||
while (true) {
|
||||
// console.error(`loop counter: ${loopCounter++}`)
|
||||
// if last response is from assistant and text, so exit
|
||||
const lastMessage = state.messages[state.messages.length - 1];
|
||||
if (lastMessage
|
||||
&& lastMessage.role === "assistant"
|
||||
&& (typeof lastMessage.content === "string"
|
||||
|| !lastMessage.content.some(part => part.type === "tool-call")
|
||||
)
|
||||
) {
|
||||
// console.error("Nothing to do, exiting (a.)")
|
||||
return;
|
||||
}
|
||||
|
||||
// execute any pending tool calls
|
||||
for (const toolCallId of Object.keys(state.pendingToolCalls)) {
|
||||
const toolCall = state.toolCallIdMap[toolCallId];
|
||||
|
||||
// if ask-human, skip
|
||||
if (toolCall.toolName === "ask-human") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// if tool has been denied, deny
|
||||
if (state.deniedToolCallIds[toolCallId]) {
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "tool",
|
||||
content: "Unable to execute this tool: Permission was denied.",
|
||||
toolCallId: toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
},
|
||||
subflow: [],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// if permission is pending on this tool call, allow execution
|
||||
if (state.pendingToolPermissionRequests[toolCallId]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// execute approved tool
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "tool-invocation",
|
||||
toolName: toolCall.toolName,
|
||||
input: JSON.stringify(toolCall.arguments),
|
||||
subflow: [],
|
||||
});
|
||||
let result: any = null;
|
||||
if (agent.tools![toolCall.toolName].type === "agent") {
|
||||
let subflowState = state.subflowStates[toolCallId];
|
||||
for await (const event of streamAgent(subflowState)) {
|
||||
yield* state.ingestAndLogAndYield({
|
||||
...event,
|
||||
subflow: [toolCallId, ...event.subflow],
|
||||
});
|
||||
}
|
||||
if (!subflowState.getPendingAskHumans().length && !subflowState.getPendingPermissions().length) {
|
||||
result = subflowState.finalResponse();
|
||||
}
|
||||
} else {
|
||||
result = await execTool(agent.tools![toolCall.toolName], toolCall.arguments);
|
||||
}
|
||||
if (result) {
|
||||
const resultMsg: z.infer<typeof ToolMessage> = {
|
||||
role: "tool",
|
||||
content: JSON.stringify(result),
|
||||
toolCallId: toolCall.toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
};
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "tool-result",
|
||||
toolName: toolCall.toolName,
|
||||
result: result,
|
||||
subflow: [],
|
||||
});
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "message",
|
||||
message: resultMsg,
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// if pending state, exit
|
||||
if (state.getPendingAskHumans().length || state.getPendingPermissions().length) {
|
||||
// console.error("pending asks or permissions, exiting (b.)")
|
||||
return;
|
||||
}
|
||||
|
||||
// if current message state isn't runnable, exit
|
||||
if (state.messages.length === 0 || state.messages[state.messages.length - 1].role === "assistant") {
|
||||
// console.error("current message state isn't runnable, exiting (c.)")
|
||||
return;
|
||||
}
|
||||
|
||||
// run one LLM turn.
|
||||
// stream agent response and build message
|
||||
const messageBuilder = new StreamStepMessageBuilder();
|
||||
for await (const event of streamLlm(
|
||||
model,
|
||||
messages,
|
||||
state.messages,
|
||||
agent.instructions,
|
||||
tools,
|
||||
)) {
|
||||
messageBuilder.ingest(event);
|
||||
yield {
|
||||
type: "stream-event",
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "llm-stream-event",
|
||||
event: event,
|
||||
};
|
||||
}
|
||||
|
||||
// build and emit final message from agent response
|
||||
const msg = messageBuilder.get();
|
||||
normaliseAskHumanToolCall(msg);
|
||||
messages.push(msg);
|
||||
yield {
|
||||
type: "message",
|
||||
message: msg,
|
||||
};
|
||||
|
||||
// handle tool calls
|
||||
const mappedToolCalls: z.infer<typeof MappedToolCall>[] = [];
|
||||
let msgToolCallParts: z.infer<typeof ToolCallPart>[] = [];
|
||||
if (msg.content instanceof Array) {
|
||||
msgToolCallParts = msg.content.filter(part => part.type === "tool-call");
|
||||
}
|
||||
const hasToolCalls = msgToolCallParts.length > 0;
|
||||
|
||||
// validate and map tool calls
|
||||
for (const part of msgToolCallParts) {
|
||||
const agentTool = tools[part.toolName];
|
||||
if (!agentTool) {
|
||||
throw new Error(`Tool ${part.toolName} not found`);
|
||||
}
|
||||
mappedToolCalls.push({
|
||||
toolCall: part,
|
||||
agentTool: agent.tools![part.toolName],
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
|
||||
// first, handle tool calls other than ask-human
|
||||
for (const call of mappedToolCalls) {
|
||||
if (call.toolCall.toolName === "ask-human") {
|
||||
continue;
|
||||
// build and emit final message from agent response
|
||||
const message = messageBuilder.get();
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "message",
|
||||
message,
|
||||
subflow: [],
|
||||
});
|
||||
|
||||
// if there were any ask-human calls, emit those events
|
||||
if (message.content instanceof Array) {
|
||||
for (const part of message.content) {
|
||||
if (part.type === "tool-call") {
|
||||
const underlyingTool = agent.tools![part.toolName];
|
||||
if (underlyingTool.type === "builtin" && underlyingTool.name === "ask-human") {
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "ask-human-request",
|
||||
toolCallId: part.toolCallId,
|
||||
query: part.arguments.question,
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
if (underlyingTool.type === "builtin" && underlyingTool.name === "executeCommand") {
|
||||
yield *state.ingestAndLogAndYield({
|
||||
type: "tool-permission-request",
|
||||
toolCall: part,
|
||||
subflow: [],
|
||||
});
|
||||
}
|
||||
if (underlyingTool.type === "agent" && underlyingTool.name) {
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "spawn-subflow",
|
||||
agentName: underlyingTool.name,
|
||||
toolCallId: part.toolCallId,
|
||||
subflow: [],
|
||||
});
|
||||
yield* state.ingestAndLogAndYield({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: part.arguments.message,
|
||||
},
|
||||
subflow: [part.toolCallId],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
const { agentTool, toolCall } = call;
|
||||
yield {
|
||||
type: "tool-invocation",
|
||||
toolName: toolCall.toolName,
|
||||
input: JSON.stringify(toolCall.arguments),
|
||||
};
|
||||
const result = await execTool(agentTool, toolCall.arguments);
|
||||
const resultMsg: z.infer<typeof ToolMessage> = {
|
||||
role: "tool",
|
||||
content: JSON.stringify(result),
|
||||
toolCallId: toolCall.toolCallId,
|
||||
toolName: toolCall.toolName,
|
||||
};
|
||||
messages.push(resultMsg);
|
||||
yield {
|
||||
type: "tool-result",
|
||||
toolName: toolCall.toolName,
|
||||
result: result,
|
||||
};
|
||||
yield {
|
||||
type: "message",
|
||||
message: resultMsg,
|
||||
};
|
||||
}
|
||||
|
||||
// then, handle ask-human (only first one)
|
||||
const askHumanCall = mappedToolCalls.filter(call => call.toolCall.toolName === "ask-human")[0];
|
||||
if (askHumanCall) {
|
||||
yield {
|
||||
type: "pause-for-human-input",
|
||||
toolCallId: askHumanCall.toolCall.toolCallId,
|
||||
question: askHumanCall.toolCall.arguments.question as string,
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
// if the agent response had tool calls, replay this agent
|
||||
if (hasToolCalls) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// otherwise, break
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue