From 6b60d324a321729aeb7e96228eb3599a4d314c46 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 12 May 2026 20:39:14 +0200 Subject: [PATCH] multi_agent_chat/main_agent: one specialist per task; advertise write_todos for multi-turn plans --- .../system_prompt/prompts/routing.md | 41 +++++++++++++++---- .../prompts/tools/task/description.md | 8 +--- .../middleware/shared/todos.py | 7 +++- .../multi_agent_chat/middleware/stack.py | 2 +- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md index 8624e032b..1308c112c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/routing.md @@ -9,27 +9,36 @@ simulate one with the other. the workspace KB). - `scrape_webpage` — fetch the body of a specific public URL. - `update_memory` — curate persistent memory (see ``). +- `write_todos` — maintain a structured plan when the turn series spans + multiple specialists or steps. Mark each item + `in_progress` **before** the `task` call that handles it, `completed` + once the call returns. Skip for single-step requests. **You have NO filesystem tools.** Any read, write, edit, move, rename, or search inside the user's workspace goes through `task(knowledge_base, …)` — never via `write_file`, `ls`, or any direct file operation. ### 2. `task(, …)` — specialist subagents -Use `task` for anything beyond the four direct tools above. See +Use `task` for anything beyond the direct tools above. See `` for the live roster. Rules for `task`: -- **One `task` call per turn.** Bundle related work for the same specialist - into a single invocation — the parent graph can't coordinate human - approvals across parallel subagents. +- **One specialist per `task` call.** A single `task` invocation must + describe work that one specialist can do end-to-end. Never bundle work + for two specialists into one task prompt — the specialist you route to + will silently drop the other half. +- **One `task` call per turn.** If the user's request spans multiple + specialists, handle them one at a time across consecutive turns: invoke + the first this turn, return, then invoke the next on your next turn (no + user input required between). Use `write_todos` to keep the plan alive + across those turns. +- Within a single specialist, bundle every related step into the same task + prompt (read + write + summary go together). - Put the **full instructions inside the task prompt** — the specialist cannot see this thread. - Don't claim to already know what a specialist's source contains; invoke the specialist and use what it returns. -Parallelism applies to **direct tool calls** (e.g. two `web_search` calls -for independent queries can go in parallel). It does **not** apply to `task`. - user: "Save these meeting notes to my KB: …" → task(knowledge_base, "Save the meeting notes below to a new document @@ -54,4 +63,22 @@ user: "Find my Q2 roadmap and summarise the milestones." and summarise its milestones. Use glob or grep if the path isn't obvious from the workspace tree.") + + +user: "Create a ClickUp ticket and a Linear ticket for the new feature flag." +→ This turn: + write_todos([ + {content: "Create ClickUp ticket for feature flag rollout", status: "in_progress"}, + {content: "Create Linear ticket for feature flag rollout", status: "pending"}, + ]) + task(clickup, "Create a ClickUp ticket titled 'Feature flag rollout' + in the default list. Description: <…>. Tell me the ticket URL.") +→ Next turn: + write_todos([ + {content: "Create ClickUp ticket for feature flag rollout", status: "completed"}, + {content: "Create Linear ticket for feature flag rollout", status: "in_progress"}, + ]) + task(linear, "Create a Linear ticket titled 'Feature flag rollout' + in the default team. Description: <…>. Tell me the ticket URL.") + diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/task/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/task/description.md index f559b1828..2f47d4df1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/task/description.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/task/description.md @@ -11,9 +11,5 @@ thread, so include all context and constraints, plus what you need back. The specialist will respond in its own format — don't dictate one. - - Rules: - - One `task` call per turn. Bundle related work for the same specialist - into one invocation; the parent graph cannot coordinate human - approvals across parallel subagents. - - Don't claim to already know what a specialist's source contains; - invoke it and use what it returns. + - Routing rules (when to call, how often, how to scope) live in + ``. diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py index ea9173a1d..dac149627 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py @@ -5,5 +5,8 @@ from __future__ import annotations from langchain.agents.middleware import TodoListMiddleware -def build_todos_mw() -> TodoListMiddleware: - return TodoListMiddleware() +def build_todos_mw(*, system_prompt: str | None = None) -> TodoListMiddleware: + """Pass ``system_prompt=""`` to suppress the upstream prompt append. We use a custom system prompt in the main agent.""" + if system_prompt is None: + return TodoListMiddleware() + return TodoListMiddleware(system_prompt=system_prompt) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py index c597cceb9..cc52633fa 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py @@ -132,7 +132,7 @@ def build_main_agent_deepagent_middleware( stack: list[Any] = [ build_busy_mutex_mw(flags), build_otel_mw(flags), - build_todos_mw(), + build_todos_mw(system_prompt=""), memory_mw, build_anonymous_doc_mw( filesystem_mode=filesystem_mode, anon_session_id=anon_session_id