Merge branch 'cli' of github.com:rowboatlabs/rowboat into cli

2026-06-06 19:35:44 +02:00 · 2025-11-18 15:51:31 +05:30 · 2025-11-18 15:51:31 +05:30 · cfaf160e89
commit cfaf160e89
parent fb542afc38 36530c2ccd
6 changed files with 158 additions and 47 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 ![ui](/assets/banner.png)

-<h2 align="center">CLI Tool for Background Agents</h2>
+<h2 align="center">RowboatX - CLI Tool for Background Agents</h2>
 <h5 align="center">

 <p align="center" style="display: flex; justify-content: center; gap: 20px; align-items: center;">
@ -40,50 +40,54 @@
   - E.g. "Generate a NotebookLM-style podcast from my saved articles every morning"
 - 🔧 **Connect any MCP server to add capabilities**
   - Add MCP servers and RowboatX handles the integration
- 🎯 **Control agents with standard Unix commands**
-   - RowboatX uses bash to spawn, monitor, and manage all background agents
+- 🎯 **Let RowboatX control and monitor your background agents**
+   - Easily inspect state on the filesystem 

-With full terminal access and filesystem-as-state architecture, RowboatX gives you complete control over powerful AI automations!
+Inspired by Claude Code, RowboatX brings the same shell-native power to background automations.

 ## Quick start
-1. Set your OpenAI key
+1. Set your LLM API key. Supports OpenAI, Anthropic, Gemini, OpenRouter, LiteLLM, Ollama, and more.
   ```bash
   export OPENAI_API_KEY=your-openai-api-key  
   ```
      
-2. Clone the repository and start Rowboat (requires Docker)
+2. Install RowboatX
   ```bash
-   git clone git@github.com:rowboatlabs/rowboat.git
-   cd rowboat
-   ./start.sh
+   npx @rowboatlabs/rowboatx
   ```

-3. Access the app at [http://localhost:3000](http://localhost:3000).
-
-To add tools, RAG, more LLMs, and  triggers checkout the [Advanced](#advanced) section below.
-
 ## Demos
 #### Meeting-prep assistant
 Chat with the copilot to build a meeting-prep workflow, then add a calendar invite as a trigger. Watch the full demo [here](https://youtu.be/KZTP4xZM2DY).
 [![meeting-prep](https://github.com/user-attachments/assets/27755ef5-6549-476f-b9c0-50bef8770384)](https://youtu.be/KZTP4xZM2DY)

-#### Customer support assistant
-Chat with the copilot to build a customer support assistant, then connect your MCP server, and data for RAG. Watch the full demo [here](https://youtu.be/Xfo-OfgOl8w).
-[![output](https://github.com/user-attachments/assets/97485fd7-64c3-4d60-a627-f756a89dee64)](https://youtu.be/Xfo-OfgOl8w)
+## Examples
+### Add and Manage MCP servers 
+`$ rowboatx`
+- Add MCP: 'Add this MCP server config: \<config\> '
+- Explore tools: 'What tools are there in \<server-name\> '

-#### Personal assistant
-Chat with the copilot to build a personal assistant. Watch the full demo [here](https://youtu.be/6r7P4Vlcn2g).
-[![personal-assistant](https://github.com/user-attachments/assets/0f1c0ffd-23ba-4b49-8bfb-ec7a846f1332)](https://youtu.be/6r7P4Vlcn2g)
+### Create background agents
+`$ rowboatx`
+- 'Create agent to do X.'
+- '... Attach the correct tools from \<mcp-server-name\> to the agent'
+- '... Allow the agent to run shell commands including ffmpeg'

-## Advanced
-1. Native RAG Support: Enable file uploads and URL scraping with Rowboat's built-in RAG capabilities – see [RAG Guide](https://docs.rowboatlabs.com/docs/using-rowboat/rag).
+### Schedule and monitor agents
+`$ rowboatx`
+- 'Make agent \<background-agent-name\> run every day at 10 AM' 
+- 'What agents do I have scheduled to run and at what times'
+- 'When was \<background-agent-name\> last run'
+- 'Are any agents waiting for my input or confirmation'

-2. Custom LLM Providers: Use any LLM provider, including aggregators like OpenRouter and LiteLLM - see [Using more LLM providers](https://docs.rowboatlabs.com/docs/using-rowboat/customise/custom-llms).
+### Run background agents manually
+``` bash
+rowboatx --agent=<agent-name> --input="xyz" --no-interactive=true
+```
+```bash    
+rowboatx --agent=<agent-name> <run_id> # resume from a previous run
+```
+     
+## Rowboat Classic UI

-3. Tools & Triggers: Add tools and event triggers (e.g., Gmail, Slack) for automation – see [Tools](https://docs.rowboatlabs.com/docs/using-rowboat/tools) & [Triggers](https://docs.rowboatlabs.com/docs/using-rowboat/triggers).
-
-4. API & SDK: Integrate Rowboat agents directly into your app – see [API](https://docs.rowboatlabs.com/docs/api-sdk/using_the_api) & [SDK](https://docs.rowboatlabs.com/docs/api-sdk/using_the_sdk) docs.
-
-##
-
-Refer to [Docs](https://docs.rowboatlabs.com/) to learn how to start building agents with Rowboat.
+To use Rowboat Classic UI (not RowboatX), refer to [Classic](https://docs.rowboatlabs.com/). 
--- a/apps/cli/src/app.ts
+++ b/apps/cli/src/app.ts
@ -8,7 +8,7 @@ import { RunEvent, RunStartEvent } from "./application/entities/run-events.js";
 import { createInterface, Interface } from "node:readline/promises";
 import { runIdGenerator } from "./application/lib/run-id-gen.js";
 import { Agent } from "./application/entities/agent.js";
-import { MessageList } from "./application/entities/message.js";
+import { Message, MessageList, ToolMessage, UserMessage } from "./application/entities/message.js";
 import { z } from "zod";
 import { CopilotAgent } from "./application/assistant/agent.js";

@ -18,7 +18,7 @@ export async function app(opts: {
    input?: string;
    noInteractive?: boolean;
 }) {
-    let inputCount = 0;
+    let askHumanEventMarker: z.infer<typeof RunEvent> & { type: "pause-for-human-input" } | null = null;
    const messages: z.infer<typeof MessageList> = [];
    const renderer = new StreamRenderer();

@ -41,7 +41,17 @@ export async function app(opts: {
                switch (event.type) {
                    case "message":
                        messages.push(event.message);
+                        if (askHumanEventMarker
+                            && event.message.role === "tool"
+                            && event.message.toolCallId === askHumanEventMarker.toolCallId
+                        ) {
+                            askHumanEventMarker = null;
+                        }
                        break;
+                    case "pause-for-human-input": {
+                        askHumanEventMarker = event;
+                        break;
+                    }
                }
            }
        } finally {
@ -49,15 +59,6 @@ export async function app(opts: {
        }
    }

-    // add user input
-    if (opts.input) {
-        messages.push({
-            role: "user",
-            content: opts.input,
-        });
-        inputCount++;
-    }
-
    // create runId if not present
    if (!runId) {
        runId = runIdGenerator.next();
@ -87,6 +88,10 @@ export async function app(opts: {
    }

    // loop between user and agent
+    // add user input from cli, if present
+    if (opts.input) {
+        handleUserInput(opts.input, messages, askHumanEventMarker, renderer, logger);
+    }
    let rl: Interface | null = null;
    if (!opts.noInteractive) {
        rl = createInterface({ input, output });
@ -109,11 +114,7 @@ export async function app(opts: {
                    console.error("Bye!");
                    return;
                }
-                inputCount++;
-                messages.push({
-                    role: "user",
-                    content: userInput,
-                });
+                handleUserInput(userInput, messages, askHumanEventMarker, renderer, logger);
            }
            for await (const event of streamAgentTurn({
                agent,
@ -121,6 +122,9 @@ export async function app(opts: {
            })) {
                logger.log(event);
                renderer.render(event);
+                if (event.type === "pause-for-human-input") {
+                    askHumanEventMarker = event;
+                }
                if (event?.type === "error") {
                    process.exitCode = 1;
                }
@ -134,4 +138,43 @@ export async function app(opts: {
        logger.close();
        rl?.close();
    }
+}
+
+function handleUserInput(
+    input: string,
+    messages: z.infer<typeof MessageList>,
+    askHumanEventMarker: z.infer<typeof RunEvent> & { type: "pause-for-human-input" } | null,
+    renderer: StreamRenderer,
+    logger: RunLogger,
+) {
+    // if waiting on human input, send as response
+    if (askHumanEventMarker) {
+        const message = {
+            role: "tool",
+            content: JSON.stringify({
+                userResponse: input,
+            }),
+            toolCallId: askHumanEventMarker.toolCallId,
+            toolName: "ask-human",
+        } as z.infer<typeof ToolMessage>;
+        messages.push(message);
+        const ev = {
+            type: "message",
+            message,
+        } as z.infer<typeof RunEvent>;
+        logger.log(ev);
+        renderer.render(ev);
+        askHumanEventMarker = null;
+    } else {
+        const message = {
+            role: "user",
+            content: input,
+        } as z.infer<typeof UserMessage>;
+        messages.push(message);
+        const ev = {
+            type: "message",
+            message,
+        } as z.infer<typeof RunEvent>;
+        logger.log(ev);
+    }
 }
--- a/apps/cli/src/application/entities/run-events.ts
+++ b/apps/cli/src/application/entities/run-events.ts
@ -50,6 +50,7 @@ export const RunEndEvent = BaseRunEvent.extend({
 export const RunPauseEvent = BaseRunEvent.extend({
    type: z.literal("pause-for-human-input"),
    toolCallId: z.string(),
+    question: z.string(),
 });

 export const RunResumeEvent = BaseRunEvent.extend({
--- a/apps/cli/src/application/lib/agent.ts
+++ b/apps/cli/src/application/lib/agent.ts
@ -12,7 +12,6 @@ import { getProvider } from "./models.js";
 import { LlmStepStreamEvent } from "../entities/llm-step-events.js";
 import { execTool } from "./exec-tool.js";
 import { RunEvent } from "../entities/run-events.js";
-import { CopilotAgent } from "../assistant/agent.js";
 import { BuiltinTools } from "./builtin-tools.js";

 export async function mapAgentTool(t: z.infer<typeof ToolAttachment>): Promise<Tool> {
@ -36,6 +35,14 @@ export async function mapAgentTool(t: z.infer<typeof ToolAttachment>): Promise<T
                }),
            });
        case "builtin":
+            if (t.name === "ask-human") {
+                return tool({
+                    description: "Ask a human before proceeding",
+                    inputSchema: z.object({
+                        question: z.string().describe("The question to ask the human"),
+                    }),
+                });
+            }
            const match = BuiltinTools[t.name];
            if (!match) {
                throw new Error(`Unknown builtin tool: ${t.name}`);
@ -129,6 +136,30 @@ export class StreamStepMessageBuilder {
    }
 }

+function normaliseAskHumanToolCall(message: z.infer<typeof AssistantMessage>) {
+    if (typeof message.content === "string") {
+        return;
+    }
+    let askHumanToolCall: z.infer<typeof ToolCallPart> | null = null;
+    const newParts = [];
+    for (const part of message.content as z.infer<typeof AssistantContentPart>[]) {
+        if (part.type === "tool-call" && part.toolName === "ask-human") {
+            if (!askHumanToolCall) {
+                askHumanToolCall = part;
+            } else {
+                (askHumanToolCall as z.infer<typeof ToolCallPart>).arguments += "\n" + part.arguments;
+            }
+            break;
+        } else {
+            newParts.push(part);
+        }
+    }
+    if (askHumanToolCall) {
+        newParts.push(askHumanToolCall);
+    }
+    message.content = newParts;
+}
+
 export async function loadAgent(id: string): Promise<z.infer<typeof Agent>> {
    const agentPath = path.join(WorkDir, "agents", `${id}.json`);
    const agent = fs.readFileSync(agentPath, "utf8");
@ -240,6 +271,7 @@ export async function* streamAgentTurn(opts: {

        // build and emit final message from agent response
        const msg = messageBuilder.get();
+        normaliseAskHumanToolCall(msg);
        messages.push(msg);
        yield {
            type: "message",
@ -266,7 +298,11 @@ export async function* streamAgentTurn(opts: {
            });
        }

+        // first, handle tool calls other than ask-human
        for (const call of mappedToolCalls) {
+            if (call.toolCall.toolName === "ask-human") {
+                continue;
+            }
            const { agentTool, toolCall } = call;
            yield {
                type: "tool-invocation",
@ -292,13 +328,24 @@ export async function* streamAgentTurn(opts: {
            };
        }

+        // then, handle ask-human (only first one)
+        const askHumanCall = mappedToolCalls.filter(call => call.toolCall.toolName === "ask-human")[0];
+        if (askHumanCall) {
+            yield {
+                type: "pause-for-human-input",
+                toolCallId: askHumanCall.toolCall.toolCallId,
+                question: askHumanCall.toolCall.arguments.question as string,
+            };
+            return;
+        }
+
        // if the agent response had tool calls, replay this agent
        if (hasToolCalls) {
            continue;
        }

        // otherwise, break
-        break;
+        return;
    }
 }

--- a/apps/cli/src/application/lib/exec-tool.ts
+++ b/apps/cli/src/application/lib/exec-tool.ts
@ -67,6 +67,11 @@ async function execAgentTool(agentTool: z.infer<typeof ToolAttachment> & { type:
        if (event.type === "message" && event.message.role === "assistant") {
            lastMsg = event.message;
        }
+        if (event.type === "pause-for-human-input") {
+            return `I need more information from a human in order to continue. I should use the ask-human tool to ask the user for a response on the question below. Once the user comes back with an answer, call this tool again with the answer embedded in the original input that you used to call this tool the first time.
+
+            Question: ${event.question}`;
+        }
        if (event.type === "error") {
            throw new Error(event.error);
        }
--- a/apps/cli/src/application/lib/stream-renderer.ts
+++ b/apps/cli/src/application/lib/stream-renderer.ts
@ -63,6 +63,10 @@ export class StreamRenderer {
                this.onError(event.error);
                break;
            }
+            case "pause-for-human-input": {
+                this.onPauseForHumanInput(event.toolCallId, event.question);
+                break;
+            }
        }
    }

@ -225,6 +229,13 @@ export class StreamRenderer {
        this.write(this.dim("└─────────────\n"));
    }

+    private onPauseForHumanInput(toolCallId: string, question: string) {
+        this.write(this.cyan(`\n→ Pause for human input (${toolCallId})`));
+        this.write("\n");
+        this.write(this.bold("Question: ") + question);
+        this.write("\n");
+    }
+
    private onUsage(usage: {
        inputTokens?: number;
        outputTokens?: number;