diff --git a/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py b/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py
index 41a314e90..d03e9560a 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py
@@ -14,17 +14,85 @@ from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.core.prompts import read_prompt_md
+_BUILTIN_SPECIALISTS: frozenset[str] = frozenset({"research", "memory", "deliverables"})
+_SPECIALIST_CAPABILITIES: dict[str, str] = {
+ "research": "external research: web lookup, source gathering, and SurfSense documentation help.",
+ "memory": "save durable long-lived memory items.",
+ "deliverables": "final artifact generation: report, podcast, video presentation, resume, or image.",
+ "gmail": "email inbox actions: search/read emails, draft updates, send messages, and trash emails.",
+ "calendar": "scheduling actions: check availability, inspect events, create events, and update events.",
+ "google_drive": "Drive file/document actions: locate files, inspect content, and manage files/folders.",
+ "notion": "Notion page actions: create pages, update content, and delete pages.",
+ "confluence": "Confluence page actions: find/read pages and create/update pages.",
+ "dropbox": "Dropbox file storage actions: browse folders, read files, and manage file content.",
+ "onedrive": "OneDrive file storage actions: browse folders, read files, and manage file content.",
+ "discord": "Discord communication actions: read channels/threads and post replies.",
+ "teams": "Microsoft Teams communication actions: read channels/threads and post replies.",
+ "luma": "Luma event actions: list events, inspect event details, and create events.",
+ "linear": "Linear workflow actions: search/update issues and inspect projects/cycles.",
+ "jira": "Jira workflow actions: search/update issues and manage workflow transitions.",
+ "clickup": "ClickUp workflow actions: find/update tasks and lists.",
+ "airtable": "Airtable data actions: locate bases/tables and create/read/update records.",
+ "slack": "Slack communication actions: read channel/thread history and post replies.",
+ # generic_mcp specialist intentionally disabled for now.
+ # "generic_mcp": "handle tasks through user-defined custom app integration tools not covered above.",
+}
+_SPECIALIST_ORDER: tuple[str, ...] = tuple(_SPECIALIST_CAPABILITIES.keys())
+
+
+def _memory_capability_for_visibility(thread_visibility: Any | None) -> str:
+ vis = str(getattr(thread_visibility, "value", thread_visibility)).upper()
+ if vis == "SEARCH_SPACE":
+ return "team memory actions: save shared team preferences, conventions, and long-lived team facts."
+ return "user memory actions: save personal preferences, instructions, and long-lived user facts."
+
+
+def _render_available_specialists_list(
+ tools: Sequence[BaseTool],
+ *,
+ thread_visibility: Any | None,
+) -> str:
+ available_names = {
+ tool.name for tool in tools if isinstance(getattr(tool, "name", None), str)
+ }
+ capabilities = dict(_SPECIALIST_CAPABILITIES)
+ capabilities["memory"] = _memory_capability_for_visibility(thread_visibility)
+ lines: list[str] = []
+ for name in _SPECIALIST_ORDER:
+ if name in _BUILTIN_SPECIALISTS or name in available_names:
+ capability = capabilities[name]
+ lines.append(f"- {name}: {capability}")
+ return "\n".join(lines)
+
+
+def _render_supervisor_prompt(
+ template: str,
+ tools: Sequence[BaseTool],
+ *,
+ thread_visibility: Any | None,
+) -> str:
+ specialist_list = _render_available_specialists_list(
+ tools, thread_visibility=thread_visibility
+ )
+ return template.replace("{{AVAILABLE_SPECIALISTS_LIST}}", specialist_list)
+
def build_supervisor_agent(
llm: BaseChatModel,
*,
tools: Sequence[BaseTool],
checkpointer: Checkpointer | None = None,
+ thread_visibility: Any | None = None,
middleware: Sequence[Any] | None = None,
context_schema: Any | None = None,
):
"""Compile the supervisor **agent** (graph). ``tools`` = output of ``build_supervisor_routing_tools``."""
- system_prompt = read_prompt_md(supervisor_pkg.__name__, "supervisor_prompt")
+ template = read_prompt_md(supervisor_pkg.__name__, "supervisor_prompt")
+ system_prompt = _render_supervisor_prompt(
+ template,
+ tools,
+ thread_visibility=thread_visibility,
+ )
kwargs: dict[str, Any] = {
"system_prompt": system_prompt,
"tools": list(tools),
diff --git a/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md
index db08dd945..684c03333 100644
--- a/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md
@@ -1,26 +1,61 @@
-You are the supervisor agent. Route work to the right sub-agent using **one** routing tool per request when delegation is needed.
+You are SurfSense's multi-agent supervisor.
-**Built-in capabilities**
+
+Your job is to decide whether to answer directly or delegate to one or more specialists.
+You optimize for correctness, low confusion, and minimal unnecessary delegation.
+
-- **research** — web search, page scraping, SurfSense documentation help.
-- **memory** — save long-term facts and preferences (personal or team memory).
-- **deliverables** — reports, podcasts, video presentations, resumes, images (thread-scoped outputs; only when available).
+
+Use only the specialists listed below.
+{{AVAILABLE_SPECIALISTS_LIST}}
+
-**Connectors** (same pattern for each product)
+
+1) Delegate when the request clearly belongs to a specialist's capabilities.
+2) Answer directly when no expert tool is needed.
+3) For multi-domain work, decompose into sequential expert calls (or parallel only when independent).
+4) Do not call a specialist "just in case". Every delegation must have a clear purpose.
+
-- **calendar** — Google Calendar events.
-- **confluence** — Confluence pages.
-- **discord** — Discord server channels and messages.
-- **dropbox** — Dropbox files.
-- **gmail** — email (search, read, drafts, send, trash).
-- **google_drive** — Google Drive files (Docs/Sheets).
-- **luma** — Luma calendar events (list, read, create).
-- **notion** — Notion pages.
-- **onedrive** — Microsoft OneDrive files.
-- **teams** — Microsoft Teams channels and messages.
+
+When delegating to a specialist, pass a compact but complete task that includes:
+- user goal,
+- concrete constraints (time range, recipients, format, etc.),
+- success criteria,
+- required output details (IDs/links/timestamps when applicable).
-**OAuth MCP** (extra routing tools only when those integrations are connected)
+Never pass implementation chatter. Pass only actionable instructions.
+
-- **linear**, **slack**, **jira**, **clickup**, **airtable**, **generic_mcp** — use only for that product’s MCP-backed work.
+
+Every specialist call returns one JSON object. Parse and reason over these fields:
+- `status`: `success` | `partial` | `blocked` | `error`
+- `action_summary`: concise execution summary
+- `evidence`: task-specific proof/results
+- `next_step`: required follow-up when not fully successful
+- `missing_fields`: required user inputs (when blocked by missing info)
+- `assumptions`: inferred values used by the expert
-Pass each tool a **clear natural-language task** describing what the sub-agent should do. Answer directly when no sub-agent is needed. When sub-agents return results, combine them into one coherent reply for the user.
+Field-handling rules:
+1) `status=success`: trust the result only when supported by `evidence`.
+2) `status=partial`: use completed `evidence`, then continue with `next_step`.
+3) `status=blocked`: do not retry blindly; ask the user only for items in `missing_fields` (or clear disambiguation choices from `evidence`).
+4) `status=error`: do not claim completion; either retry with a better task if obvious, or explain failure and propose the expert's `next_step`.
+5) If an expert output appears invalid or contradictory, treat it as `error`, avoid fabricating details, and recover with a safer re-delegation or user clarification.
+
+
+
+Ask a concise clarifying question only when a missing detail blocks execution.
+If one reasonable default is safe and obvious, use it and state the assumption.
+
+
+
+After expert calls, produce one coherent final answer:
+- what was done,
+- key results/artifacts,
+- unresolved items and the next best step.
+- include assumptions only when they affected outcomes.
+- when multiple experts are used, merge outputs into one user-facing narrative (no raw JSON dump).
+
+Never claim an action succeeded unless an expert returned success evidence.
+