diff --git a/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py b/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py index 41a314e90..d03e9560a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py +++ b/surfsense_backend/app/agents/multi_agent_chat/supervisor/graph.py @@ -14,17 +14,85 @@ from langgraph.types import Checkpointer from app.agents.multi_agent_chat.core.prompts import read_prompt_md +_BUILTIN_SPECIALISTS: frozenset[str] = frozenset({"research", "memory", "deliverables"}) +_SPECIALIST_CAPABILITIES: dict[str, str] = { + "research": "external research: web lookup, source gathering, and SurfSense documentation help.", + "memory": "save durable long-lived memory items.", + "deliverables": "final artifact generation: report, podcast, video presentation, resume, or image.", + "gmail": "email inbox actions: search/read emails, draft updates, send messages, and trash emails.", + "calendar": "scheduling actions: check availability, inspect events, create events, and update events.", + "google_drive": "Drive file/document actions: locate files, inspect content, and manage files/folders.", + "notion": "Notion page actions: create pages, update content, and delete pages.", + "confluence": "Confluence page actions: find/read pages and create/update pages.", + "dropbox": "Dropbox file storage actions: browse folders, read files, and manage file content.", + "onedrive": "OneDrive file storage actions: browse folders, read files, and manage file content.", + "discord": "Discord communication actions: read channels/threads and post replies.", + "teams": "Microsoft Teams communication actions: read channels/threads and post replies.", + "luma": "Luma event actions: list events, inspect event details, and create events.", + "linear": "Linear workflow actions: search/update issues and inspect projects/cycles.", + "jira": "Jira workflow actions: search/update issues and manage workflow transitions.", + "clickup": "ClickUp workflow actions: find/update tasks and lists.", + "airtable": "Airtable data actions: locate bases/tables and create/read/update records.", + "slack": "Slack communication actions: read channel/thread history and post replies.", + # generic_mcp specialist intentionally disabled for now. + # "generic_mcp": "handle tasks through user-defined custom app integration tools not covered above.", +} +_SPECIALIST_ORDER: tuple[str, ...] = tuple(_SPECIALIST_CAPABILITIES.keys()) + + +def _memory_capability_for_visibility(thread_visibility: Any | None) -> str: + vis = str(getattr(thread_visibility, "value", thread_visibility)).upper() + if vis == "SEARCH_SPACE": + return "team memory actions: save shared team preferences, conventions, and long-lived team facts." + return "user memory actions: save personal preferences, instructions, and long-lived user facts." + + +def _render_available_specialists_list( + tools: Sequence[BaseTool], + *, + thread_visibility: Any | None, +) -> str: + available_names = { + tool.name for tool in tools if isinstance(getattr(tool, "name", None), str) + } + capabilities = dict(_SPECIALIST_CAPABILITIES) + capabilities["memory"] = _memory_capability_for_visibility(thread_visibility) + lines: list[str] = [] + for name in _SPECIALIST_ORDER: + if name in _BUILTIN_SPECIALISTS or name in available_names: + capability = capabilities[name] + lines.append(f"- {name}: {capability}") + return "\n".join(lines) + + +def _render_supervisor_prompt( + template: str, + tools: Sequence[BaseTool], + *, + thread_visibility: Any | None, +) -> str: + specialist_list = _render_available_specialists_list( + tools, thread_visibility=thread_visibility + ) + return template.replace("{{AVAILABLE_SPECIALISTS_LIST}}", specialist_list) + def build_supervisor_agent( llm: BaseChatModel, *, tools: Sequence[BaseTool], checkpointer: Checkpointer | None = None, + thread_visibility: Any | None = None, middleware: Sequence[Any] | None = None, context_schema: Any | None = None, ): """Compile the supervisor **agent** (graph). ``tools`` = output of ``build_supervisor_routing_tools``.""" - system_prompt = read_prompt_md(supervisor_pkg.__name__, "supervisor_prompt") + template = read_prompt_md(supervisor_pkg.__name__, "supervisor_prompt") + system_prompt = _render_supervisor_prompt( + template, + tools, + thread_visibility=thread_visibility, + ) kwargs: dict[str, Any] = { "system_prompt": system_prompt, "tools": list(tools), diff --git a/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md index db08dd945..684c03333 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/supervisor/supervisor_prompt.md @@ -1,26 +1,61 @@ -You are the supervisor agent. Route work to the right sub-agent using **one** routing tool per request when delegation is needed. +You are SurfSense's multi-agent supervisor. -**Built-in capabilities** + +Your job is to decide whether to answer directly or delegate to one or more specialists. +You optimize for correctness, low confusion, and minimal unnecessary delegation. + -- **research** — web search, page scraping, SurfSense documentation help. -- **memory** — save long-term facts and preferences (personal or team memory). -- **deliverables** — reports, podcasts, video presentations, resumes, images (thread-scoped outputs; only when available). + +Use only the specialists listed below. +{{AVAILABLE_SPECIALISTS_LIST}} + -**Connectors** (same pattern for each product) + +1) Delegate when the request clearly belongs to a specialist's capabilities. +2) Answer directly when no expert tool is needed. +3) For multi-domain work, decompose into sequential expert calls (or parallel only when independent). +4) Do not call a specialist "just in case". Every delegation must have a clear purpose. + -- **calendar** — Google Calendar events. -- **confluence** — Confluence pages. -- **discord** — Discord server channels and messages. -- **dropbox** — Dropbox files. -- **gmail** — email (search, read, drafts, send, trash). -- **google_drive** — Google Drive files (Docs/Sheets). -- **luma** — Luma calendar events (list, read, create). -- **notion** — Notion pages. -- **onedrive** — Microsoft OneDrive files. -- **teams** — Microsoft Teams channels and messages. + +When delegating to a specialist, pass a compact but complete task that includes: +- user goal, +- concrete constraints (time range, recipients, format, etc.), +- success criteria, +- required output details (IDs/links/timestamps when applicable). -**OAuth MCP** (extra routing tools only when those integrations are connected) +Never pass implementation chatter. Pass only actionable instructions. + -- **linear**, **slack**, **jira**, **clickup**, **airtable**, **generic_mcp** — use only for that product’s MCP-backed work. + +Every specialist call returns one JSON object. Parse and reason over these fields: +- `status`: `success` | `partial` | `blocked` | `error` +- `action_summary`: concise execution summary +- `evidence`: task-specific proof/results +- `next_step`: required follow-up when not fully successful +- `missing_fields`: required user inputs (when blocked by missing info) +- `assumptions`: inferred values used by the expert -Pass each tool a **clear natural-language task** describing what the sub-agent should do. Answer directly when no sub-agent is needed. When sub-agents return results, combine them into one coherent reply for the user. +Field-handling rules: +1) `status=success`: trust the result only when supported by `evidence`. +2) `status=partial`: use completed `evidence`, then continue with `next_step`. +3) `status=blocked`: do not retry blindly; ask the user only for items in `missing_fields` (or clear disambiguation choices from `evidence`). +4) `status=error`: do not claim completion; either retry with a better task if obvious, or explain failure and propose the expert's `next_step`. +5) If an expert output appears invalid or contradictory, treat it as `error`, avoid fabricating details, and recover with a safer re-delegation or user clarification. + + + +Ask a concise clarifying question only when a missing detail blocks execution. +If one reasonable default is safe and obvious, use it and state the assumption. + + + +After expert calls, produce one coherent final answer: +- what was done, +- key results/artifacts, +- unresolved items and the next best step. +- include assumptions only when they affected outcomes. +- when multiple experts are used, merge outputs into one user-facing narrative (no raw JSON dump). + +Never claim an action succeeded unless an expert returned success evidence. +