Merge remote-tracking branch 'upstream/dev' into fix/zero-cache-stale-replica-1355

This commit is contained in:
Anish Sarkar 2026-05-16 19:30:09 +05:30
commit af1d2fa430
601 changed files with 45027 additions and 4681 deletions

View file

@ -25,6 +25,7 @@ CONNECTOR_TYPE_TO_CONNECTOR_AGENT_MAPS: dict[str, str] = {
SUBAGENT_TO_REQUIRED_CONNECTOR_MAP: dict[str, frozenset[str]] = {
"deliverables": frozenset(),
"knowledge_base": frozenset(),
"airtable": frozenset({"AIRTABLE_CONNECTOR"}),
"calendar": frozenset({"GOOGLE_CALENDAR_CONNECTOR"}),
"clickup": frozenset({"CLICKUP_CONNECTOR"}),

View file

@ -11,12 +11,9 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.middleware import (
from app.agents.multi_agent_chat.middleware.stack import (
build_main_agent_deepagent_middleware,
)
from app.agents.multi_agent_chat.subagents.shared.permissions import (
ToolsPermissions,
)
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
@ -42,7 +39,7 @@ def build_compiled_agent_graph_sync(
flags: AgentFeatureFlags,
checkpointer: Checkpointer,
subagent_dependencies: dict[str, Any],
mcp_tools_by_agent: dict[str, ToolsPermissions] | None = None,
mcp_tools_by_agent: dict[str, list[BaseTool]] | None = None,
disabled_tools: list[str] | None = None,
):
"""Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""

View file

@ -10,7 +10,6 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.subagents.shared.permissions import ToolsPermissions
from app.agents.new_chat.agent_cache import (
flags_signature,
get_cache,
@ -25,14 +24,14 @@ from app.db import ChatVisibility
from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
def mcp_signature(mcp_tools_by_agent: dict[str, ToolsPermissions]) -> str:
def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str:
"""Hash the per-agent MCP tool surface so a change rotates the cache key."""
rows = []
for agent_name in sorted(mcp_tools_by_agent.keys()):
perms = mcp_tools_by_agent[agent_name]
allow_names = sorted(item.get("name", "") for item in perms.get("allow", []))
ask_names = sorted(item.get("name", "") for item in perms.get("ask", []))
rows.append((agent_name, allow_names, ask_names))
names = sorted(
getattr(t, "name", "") or "" for t in mcp_tools_by_agent[agent_name]
)
rows.append((agent_name, names))
return stable_hash(rows)
@ -55,7 +54,7 @@ async def build_agent_with_cache(
flags: AgentFeatureFlags,
checkpointer: Checkpointer,
subagent_dependencies: dict[str, Any],
mcp_tools_by_agent: dict[str, ToolsPermissions],
mcp_tools_by_agent: dict[str, list[BaseTool]],
disabled_tools: list[str] | None,
config_id: str | None,
) -> Any:

View file

@ -7,7 +7,6 @@ import time
from collections.abc import Sequence
from typing import Any
from deepagents.graph import BASE_AGENT_PROMPT
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
@ -30,6 +29,10 @@ from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_to
from app.agents.new_chat.tools.registry import build_tools_async
from app.db import ChatVisibility
from app.services.connector_service import ConnectorService
from app.services.user_tool_allowlist import (
fetch_user_allowlist_rulesets,
make_trusted_tool_saver,
)
from app.utils.perf import get_perf_logger
from ..system_prompt import build_main_agent_system_prompt
@ -142,11 +145,49 @@ async def create_multi_agent_chat_deep_agent(
)
mcp_tools_by_agent = {}
_perf_log.info(
"[create_agent] load_mcp_tools_by_connector in %.3fs (%d buckets)",
"[create_agent] load_mcp_tools_by_connector in %.3fs (%d agents)",
time.perf_counter() - _t0,
len(mcp_tools_by_agent),
)
# User-scoped allow-list ("Always Allow" persisted to
# ``SearchSourceConnector.config.trusted_tools``). Layered last in each
# subagent's PermissionMiddleware so user ``allow`` overrides coded
# ``ask`` via last-match-wins. Anonymous turns and read failures both
# degrade to "no user rules" rather than blocking the turn.
user_allowlist_by_subagent: dict[str, Any] = {}
trusted_tool_saver = None
if user_id:
try:
import uuid as _uuid
user_uuid = _uuid.UUID(user_id)
except (TypeError, ValueError):
user_uuid = None
if user_uuid is not None:
_t0 = time.perf_counter()
try:
user_allowlist_by_subagent = await fetch_user_allowlist_rulesets(
db_session,
user_id=user_uuid,
search_space_id=search_space_id,
)
except Exception as e:
logging.warning(
"User allow-list fetch failed; subagents will run without user trust rules this turn: %s",
e,
)
user_allowlist_by_subagent = {}
_perf_log.info(
"[create_agent] fetch_user_allowlist_rulesets in %.3fs (%d subagents have rules)",
time.perf_counter() - _t0,
len(user_allowlist_by_subagent),
)
trusted_tool_saver = make_trusted_tool_saver(user_uuid)
dependencies["user_allowlist_by_subagent"] = user_allowlist_by_subagent
dependencies["trusted_tool_saver"] = trusted_tool_saver
modified_disabled_tools = list(disabled_tools) if disabled_tools else []
if "search_knowledge_base" not in modified_disabled_tools:
@ -218,7 +259,7 @@ async def create_multi_agent_chat_deep_agent(
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
)
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
final_system_prompt = system_prompt
config_id = agent_config.config_id if agent_config is not None else None

View file

@ -1,4 +1,4 @@
"""Assemble the main-agent system prompt from ``markdown/*.md`` fragments."""
"""Assemble the main-agent system prompt from ``prompts/`` fragments."""
from __future__ import annotations

View file

@ -1,7 +1,27 @@
"""Assemble the **main-agent** deep-agent system string only.
"""Assemble the main-agent system prompt from ``prompts/``.
Sections (order matters): core instructions provider citations dynamic
``<registry_subagents>`` SurfSense ``<tools>``.
Section order (default flow)::
<agent_identity>
[user's custom_system_instructions, if any]
<core_behavior> # default body
<knowledge_base_first> # default body
<dynamic_context> # always
<routing> # default body
<specialists> # always (dynamic roster)
<tools> # always (vertical-slice)
<memory_protocol> # default body
<citations> # always
<output_format> # always
<refusal_and_limits> # always
<reminder> # always
``custom_system_instructions`` is **additive**, not a replacement: it slots
between identity and the default body so platform safety nets (KB-first,
routing, citations, output formatting, refusal rules) always apply.
``use_default_system_instructions=False`` skips the four "default body"
sections but keeps all the always-on platform sections.
"""
from __future__ import annotations
@ -10,15 +30,18 @@ from datetime import UTC, datetime
from app.db import ChatVisibility
from .load_md import read_prompt_md
from .sections.citations import build_citations_section
from .sections.provider import build_provider_section
from .sections.registry_subagents import build_registry_subagents_section
from .sections.system_instruction import build_default_system_instruction_xml
from .sections.dynamic_context import build_dynamic_context_section
from .sections.identity import build_identity_section
from .sections.memory_protocol import build_memory_protocol_section
from .sections.specialists import build_specialists_section
from .sections.tools import build_tools_section
def build_main_agent_system_prompt(
*,
registry_subagent_prompt_lines: list[tuple[str, str]],
today: datetime | None = None,
thread_visibility: ChatVisibility | None = None,
enabled_tool_names: set[str] | None = None,
@ -27,27 +50,51 @@ def build_main_agent_system_prompt(
use_default_system_instructions: bool = True,
citations_enabled: bool = True,
model_name: str | None = None,
registry_subagent_prompt_lines: list[tuple[str, str]] | None = None,
) -> str:
resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
visibility = thread_visibility or ChatVisibility.PRIVATE
if custom_system_instructions and custom_system_instructions.strip():
system_block = custom_system_instructions.format(resolved_today=resolved_today)
elif use_default_system_instructions:
system_block = build_default_system_instruction_xml(
visibility=visibility,
resolved_today=resolved_today,
)
else:
system_block = ""
parts: list[str] = []
system_block += build_provider_section(model_name=model_name)
system_block += build_citations_section(citations_enabled=citations_enabled)
system_block += build_registry_subagents_section(registry_subagent_prompt_lines)
system_block += build_tools_section(
visibility=visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
parts.append(
build_identity_section(visibility=visibility, resolved_today=resolved_today)
)
return system_block
if custom_system_instructions and custom_system_instructions.strip():
parts.append(
"\n"
+ custom_system_instructions.format(resolved_today=resolved_today)
+ "\n"
)
if use_default_system_instructions:
parts.append(_wrap(read_prompt_md("core_behavior.md")))
parts.append(_wrap(read_prompt_md("kb_first.md")))
parts.append(build_dynamic_context_section(visibility=visibility))
if use_default_system_instructions:
parts.append(_wrap(read_prompt_md("routing.md")))
parts.append(build_specialists_section(registry_subagent_prompt_lines))
parts.append(
build_tools_section(
visibility=visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
)
)
if use_default_system_instructions:
parts.append(build_memory_protocol_section(visibility=visibility))
parts.append(build_citations_section(citations_enabled=citations_enabled))
parts.append(_wrap(read_prompt_md("output_format.md")))
parts.append(_wrap(read_prompt_md("refusal_and_limits.md")))
parts.append(_wrap(read_prompt_md("reminder.md")))
return "".join(p for p in parts if p)
def _wrap(fragment: str) -> str:
return f"\n{fragment}\n" if fragment else ""

View file

@ -1,14 +1,14 @@
"""Load main-agent-only markdown from ``system_prompt/markdown/`` (``importlib.resources``)."""
"""Load main-agent prompt fragments from ``system_prompt/prompts/``."""
from __future__ import annotations
from importlib import resources
_PROMPTS_PACKAGE = "app.agents.multi_agent_chat.main_agent.system_prompt.markdown"
_PROMPTS_PACKAGE = "app.agents.multi_agent_chat.main_agent.system_prompt.prompts"
def read_prompt_md(filename: str) -> str:
"""Load ``markdown/{filename}`` (e.g. ``agent_private.md`` or ``tools/_preamble.md``)."""
"""Load ``prompts/{filename}`` (e.g. ``core_behavior.md`` or ``tools/web_search/description.md``)."""
ref = resources.files(_PROMPTS_PACKAGE).joinpath(filename)
if not ref.is_file():
return ""

View file

@ -1,4 +1,4 @@
"""Provider-specific style hints from ``markdown/providers/`` (main agent only)."""
"""Provider-specific style hints from ``prompts/providers/`` (main agent only)."""
from __future__ import annotations

View file

@ -1,4 +1,4 @@
"""Citation fragment for the main agent (chunk-tagged context only)."""
"""``<citations>`` section — on/off variant based on workspace configuration."""
from __future__ import annotations
@ -6,6 +6,6 @@ from ..load_md import read_prompt_md
def build_citations_section(*, citations_enabled: bool) -> str:
name = "citations_on.md" if citations_enabled else "citations_off.md"
fragment = read_prompt_md(name)
variant = "on" if citations_enabled else "off"
fragment = read_prompt_md(f"citations/{variant}.md")
return f"\n{fragment}\n" if fragment else ""

View file

@ -0,0 +1,13 @@
"""``<dynamic_context>`` section — visibility-aware (private vs team thread)."""
from __future__ import annotations
from app.db import ChatVisibility
from ..load_md import read_prompt_md
def build_dynamic_context_section(*, visibility: ChatVisibility) -> str:
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
fragment = read_prompt_md(f"dynamic_context/{variant}.md")
return f"\n{fragment}\n" if fragment else ""

View file

@ -0,0 +1,19 @@
"""``<agent_identity>`` section — visibility-aware, with ``{resolved_today}`` injection."""
from __future__ import annotations
from app.db import ChatVisibility
from ..load_md import read_prompt_md
def build_identity_section(
*,
visibility: ChatVisibility,
resolved_today: str,
) -> str:
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
fragment = read_prompt_md(f"identity/{variant}.md")
if not fragment:
return ""
return "\n" + fragment.format(resolved_today=resolved_today) + "\n"

View file

@ -0,0 +1,13 @@
"""``<memory_protocol>`` section — visibility-aware (user vs team memory)."""
from __future__ import annotations
from app.db import ChatVisibility
from ..load_md import read_prompt_md
def build_memory_protocol_section(*, visibility: ChatVisibility) -> str:
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
fragment = read_prompt_md(f"memory_protocol/{variant}.md")
return f"\n{fragment}\n" if fragment else ""

View file

@ -1,27 +0,0 @@
"""Dynamic ``<registry_subagents>`` block: **task** specialists actually built for this workspace."""
from __future__ import annotations
def build_registry_subagents_section(
registry_subagent_lines: list[tuple[str, str]] | None,
) -> str:
if registry_subagent_lines is None:
return ""
if not registry_subagent_lines:
return (
"\n<registry_subagents>\n"
"No registry specialists are listed for **task** in this workspace.\n"
"</registry_subagents>\n"
)
bullets = "\n".join(
f"- **{name}** — {desc}" for name, desc in registry_subagent_lines
)
return (
"\n<registry_subagents>\n"
"These specialists are registered for **task** (routes without a matching connector are omitted).\n"
f"{bullets}\n"
"The runtime may also offer a general-purpose **task** helper with your tools in a separate context.\n"
"Pick the specialist by **name**. Put full instructions in the task prompt; they do not see this thread.\n"
"</registry_subagents>\n"
)

View file

@ -0,0 +1,15 @@
"""``<specialists>`` section — live ``task`` roster for this workspace.
The roster is non-empty by contract: ``deliverables`` and ``knowledge_base``
both declare ``frozenset()`` in ``SUBAGENT_TO_REQUIRED_CONNECTOR_MAP``, so
they survive every connector-based exclusion pass.
"""
from __future__ import annotations
def build_specialists_section(
specialist_lines: list[tuple[str, str]],
) -> str:
bullets = "\n".join(f"- **{name}** — {desc}" for name, desc in specialist_lines)
return f"\n<specialists>\n{bullets}\n</specialists>\n"

View file

@ -1,35 +0,0 @@
"""Default ``<system_instruction>`` block for the main agent only."""
from __future__ import annotations
from app.db import ChatVisibility
from ..load_md import read_prompt_md
_PRIVATE_ORDER = (
"agent_private.md",
"kb_only_policy_private.md",
"main_agent_tool_routing.md",
"parameter_resolution.md",
"memory_protocol_private.md",
)
_TEAM_ORDER = (
"agent_team.md",
"kb_only_policy_team.md",
"main_agent_tool_routing.md",
"parameter_resolution.md",
"memory_protocol_team.md",
)
def build_default_system_instruction_xml(
*,
visibility: ChatVisibility,
resolved_today: str,
) -> str:
order = _TEAM_ORDER if visibility == ChatVisibility.SEARCH_SPACE else _PRIVATE_ORDER
parts = [read_prompt_md(name) for name in order]
body = "\n\n".join(p for p in parts if p)
return f"\n<system_instruction>\n{body}\n\n</system_instruction>\n".format(
resolved_today=resolved_today,
)

View file

@ -1,4 +1,4 @@
"""Main-agent ``<tools>`` block (memory + research builtins only; see ``main_agent.tools``)."""
"""Main-agent ``<tools>`` block (memory + research builtins + ``task``)."""
from __future__ import annotations

View file

@ -1,6 +1,7 @@
"""``<tools>`` + ``<tool_call_examples>`` from ``system_prompt/markdown/{tools,examples}/``.
"""Compose the ``<tools>`` block from per-tool vertical-slice folders.
Only documents tools the main agent actually binds not full ``new_chat``.
Each tool lives in ``prompts/tools/<name>/`` with ``description.md`` and an
``example.md``. Visibility variants live in ``{private,team}/`` subfolders.
"""
from __future__ import annotations
@ -13,16 +14,10 @@ from .load_md import read_prompt_md
_MEMORY_VARIANT_TOOLS: frozenset[str] = frozenset({"update_memory"})
def _tool_fragment_path(tool_name: str, variant: str) -> str:
def _tool_fragment(tool_name: str, variant: str, leaf: str) -> str:
if tool_name in _MEMORY_VARIANT_TOOLS:
return f"tools/{tool_name}_{variant}.md"
return f"tools/{tool_name}.md"
def _example_fragment_path(tool_name: str, variant: str) -> str:
if tool_name in _MEMORY_VARIANT_TOOLS:
return f"examples/{tool_name}_{variant}.md"
return f"examples/{tool_name}.md"
return read_prompt_md(f"tools/{tool_name}/{variant}/{leaf}")
return read_prompt_md(f"tools/{tool_name}/{leaf}")
def _format_tool_label(tool_name: str) -> str:
@ -35,26 +30,35 @@ def build_tools_instruction_block(
enabled_tool_names: set[str] | None,
disabled_tool_names: set[str] | None,
) -> str:
"""Render ``<tools>``. ``task`` is always included: at least ``deliverables``
and ``knowledge_base`` are always in ``<specialists>`` (see constants)."""
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
parts: list[str] = []
preamble = read_prompt_md("tools/_preamble.md")
if preamble:
parts.append(preamble + "\n")
examples: list[str] = []
parts: list[str] = ["\n<tools>\n"]
for tool_name in MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED:
if enabled_tool_names is not None and tool_name not in enabled_tool_names:
continue
instruction = read_prompt_md(_tool_fragment_path(tool_name, variant))
if instruction:
parts.append(instruction + "\n")
description = _tool_fragment(tool_name, variant, "description.md")
example = _tool_fragment(tool_name, variant, "example.md")
example = read_prompt_md(_example_fragment_path(tool_name, variant))
if not description and not example:
continue
if description:
parts.append(description + "\n")
if example:
examples.append(example + "\n")
parts.append("\n" + example + "\n")
parts.append("\n")
task_description = read_prompt_md("tools/task/description.md")
task_example = read_prompt_md("tools/task/example.md")
if task_description:
parts.append(task_description + "\n")
if task_example:
parts.append("\n" + task_example + "\n")
parts.append("\n")
known_disabled = (
set(disabled_tool_names) & set(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
@ -68,19 +72,13 @@ def build_tools_instruction_block(
if n in known_disabled
)
parts.append(
"\n"
"DISABLED TOOLS (by user, main-agent scope):\n"
f"These SurfSense tools were disabled on the main agent for this session: {disabled_list}.\n"
"You do NOT have access to them and MUST NOT claim you can use them.\n"
"If the user still needs that capability, delegate with **task** if a subagent covers it,\n"
"otherwise explain it is disabled on the main agent for this session.\n"
"<disabled_tools>\n"
f"Disabled for this session: {disabled_list}.\n"
"Don't claim you can use them. If the user needs that capability,\n"
"delegate with `task` when a specialist covers it; otherwise say\n"
"the tool is disabled.\n"
"</disabled_tools>\n"
)
parts.append("\n</tools>\n")
if examples:
parts.append("<tool_call_examples>")
parts.extend(examples)
parts.append("</tool_call_examples>\n")
parts.append("</tools>\n")
return "".join(parts)

View file

@ -1 +0,0 @@
"""Markdown fragments for the **main-agent** system prompt only (`importlib.resources`)."""

View file

@ -1,9 +0,0 @@
You are SurfSenses **main agent**: you answer using the users knowledge context,
lightweight research tools, and memory — and you **delegate** integrations and
specialized work via **task** (see `<tool_routing>` in this prompt).
Today's date (UTC): {resolved_today}
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.

View file

@ -1,11 +0,0 @@
You are SurfSenses **main agent** for this team space: you answer using shared
knowledge context, lightweight research tools, and memory — and you **delegate**
integrations and specialized work via **task** (see `<tool_routing>` in this prompt).
In this team thread, each message is prefixed with **[DisplayName of the author]**. Use this to attribute and reference the author of anything in the discussion (who asked a question, made a suggestion, or contributed an idea) and to cite who said what in your answers.
Today's date (UTC): {resolved_today}
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.

View file

@ -1,15 +0,0 @@
<citation_instructions>
IMPORTANT: Citations are DISABLED for this configuration.
DO NOT include `[citation:…]` markers anywhere — even if tool descriptions or examples
mention them. Ignore citation-format reminders elsewhere in this prompt when they conflict
with this block.
Instead:
1. Answer in plain prose; optional markdown links to public URLs when sources are URLs.
2. Do NOT expose raw chunk IDs, document IDs, or internal IDs to the user.
3. Present indexed or doc-search facts naturally without attribution markers.
When answering from workspace or docs context: integrate facts cleanly without claiming
“this comes from chunk X”.
</citation_instructions>

View file

@ -1,15 +0,0 @@
<citation_instructions>
This block appears **before** `<tools>` so it wins over any tool-example wording below.
Apply chunk citations **only** when the runtime injects `<document>` / `<chunk id='…'>` blocks
(e.g. from SurfSense docs search or priority documents).
1. For each factual statement taken from those chunks, add `[citation:chunk_id]` using the **exact** `chunk_id` string from `<chunk id='…'>`.
2. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated).
3. Never invent or normalize ids; if unsure, omit the citation.
4. Plain brackets only — no markdown links, no `([citation:…](url))`, no footnote numbering.
Chunk ids may be numeric, prefixed (e.g. `doc-45`), or URLs when the source is web-shaped — copy verbatim.
If no chunk-tagged documents appear in context this turn, do not fabricate citations.
</citation_instructions>

View file

@ -1,13 +0,0 @@
- User: "Check out https://dev.to/some-article"
- Call: `scrape_webpage(url="https://dev.to/some-article")`
- Respond with a structured analysis — key points, takeaways.
- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
- Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
- Respond with a thorough summary using headings and bullet points.
- User: (after discussing https://example.com/stats) "Can you get the live data from that page?"
- Call: `scrape_webpage(url="https://example.com/stats")`
- IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
- User: "https://example.com/blog/weekend-recipes"
- Call: `scrape_webpage(url="https://example.com/blog/weekend-recipes")`
- When a user sends just a URL with no instructions, scrape it and provide a concise summary of the content.

View file

@ -1,9 +0,0 @@
- User: "How do I install SurfSense?"
- Call: `search_surfsense_docs(query="installation setup")`
- User: "What connectors does SurfSense support?"
- Call: `search_surfsense_docs(query="available connectors integrations")`
- User: "How do I set up the Notion connector?"
- Call: `search_surfsense_docs(query="Notion connector setup configuration")` (how-to docs). Changing data inside Notion itself → **task**.
- User: "How do I use Docker to run SurfSense?"
- Call: `search_surfsense_docs(query="Docker installation setup")`

View file

@ -1,16 +0,0 @@
- <user_name>Alex</user_name>, <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
- The user casually shared a durable fact. Use their first name in the entry, short neutral heading:
update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n")
- User: "Remember that I prefer concise answers over detailed explanations"
- Durable preference. Merge with existing memory, add a new heading:
update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n")
- User: "I actually moved to Tokyo last month"
- Updated fact, date prefix reflects when recorded:
update_memory(updated_memory="## Interests & background\n...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...")
- User: "I'm a freelance photographer working on a nature documentary"
- Durable background info under a fitting heading:
update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n")
- User: "Always respond in bullet points"
- Standing instruction:
update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n")

View file

@ -1,7 +0,0 @@
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
- Durable team decision:
update_memory(updated_memory="- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...")
- User: "Our office is in downtown Seattle, 5th floor"
- Durable team fact:
update_memory(updated_memory="- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...")

View file

@ -1,8 +0,0 @@
- User: "What's the current USD to INR exchange rate?"
- Call: `web_search(query="current USD to INR exchange rate")`
- Answer from returned snippets or scrape a top URL if needed; use markdown links to sources.
- User: "What's the latest news about AI?"
- Call: `web_search(query="latest AI news today")`
- User: "What's the weather in New York?"
- Call: `web_search(query="weather New York today")`

View file

@ -1,19 +0,0 @@
<knowledge_base_only_policy>
CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
- Ground factual answers in what you actually receive this turn: injected workspace
documents (when present), **search_surfsense_docs**, **web_search**, **scrape_webpage**,
or substantive results summarized from a **task** subagent you invoked.
- Do NOT answer factual or informational questions from general knowledge unless the user
explicitly grants permission after you say you did not find enough in those sources.
- If indexed/docs search returns nothing relevant AND **web_search** / **scrape_webpage**
(and **task**, if already tried appropriately) still do not supply an answer, you MUST:
1. Say you could not find enough in their workspace/docs/tools output.
2. Ask: "Would you like me to answer from my general knowledge instead?"
3. ONLY then answer from general knowledge after they clearly say yes.
- This policy does NOT apply to:
* Casual conversation, greetings, or meta-questions about SurfSense (e.g. "what can you do?")
* Formatting or analysis of content already in the chat
* Clear rewrite/edit instructions ("bullet-point this paragraph")
* Lightweight research with **web_search** / **scrape_webpage**
* Work that belongs on a specialist — use **task**; see `<tool_routing>`
</knowledge_base_only_policy>

View file

@ -1,19 +0,0 @@
<knowledge_base_only_policy>
CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
- Ground factual answers in what you actually receive this turn: injected shared
workspace documents (when present), **search_surfsense_docs**, **web_search**,
**scrape_webpage**, or substantive results summarized from a **task** subagent you invoked.
- Do NOT answer factual questions from general knowledge unless a team member explicitly
grants permission after you say you did not find enough in those sources.
- If indexed/docs search returns nothing relevant AND **web_search** / **scrape_webpage**
(and **task**, if already tried appropriately) still do not supply an answer, you MUST:
1. Say you could not find enough in shared docs/tools output.
2. Ask: "Would you like me to answer from my general knowledge instead?"
3. ONLY then answer from general knowledge after they clearly say yes.
- This policy does NOT apply to:
* Casual conversation, greetings, or meta-questions about SurfSense
* Formatting or analysis of content already in the chat
* Clear rewrite/edit instructions
* Lightweight research with **web_search** / **scrape_webpage**
* Work that belongs on a specialist — use **task**; see `<tool_routing>`
</knowledge_base_only_policy>

View file

@ -1,27 +0,0 @@
<tool_routing>
Use **task** for anything beyond your direct SurfSense tools: calendar, mail,
chat, tickets, documents in third-party systems, connector-specific discovery,
deliverables (reports, podcasts, images, etc.), and other specialized routes.
The live list of specialists you may target with **task** for this workspace is in
`<registry_subagents>` (later in this prompt).
Your **direct** SurfSense tools are only: **update_memory**, **web_search**,
**scrape_webpage**, and **search_surfsense_docs**. The runtime may also attach
deep-agent helpers (e.g. todos, filesystem, **task** itself). Use **task** whenever
the user needs capabilities **not** listed in the `<tools>` section (that section appears
later in this system prompt, after citation rules).
Do not treat live third-party state as if it were already in the indexed knowledge
base; reach it via **task**.
Never emit more than one **task** tool call in the same turn. Bundle related work
for the same specialist into a single **task** invocation (the subagent itself can
call its own tools in parallel inside that one run). Parallel **task** calls would
fan out into multiple concurrent subagent runs whose human-approval interrupts
cannot be coordinated; one **task** at a time is required.
</tool_routing>
<!-- TODO: lift the single-task constraint once the runtime supports parallel task
interrupts end-to-end (multi-interrupt SSE + interrupt-id-keyed Command(resume)
+ keyed surfsense_resume_value side-channel). Until then this nudge is the only
guard; the parent graph's resume cannot address multiple pending interrupts. -->

View file

@ -1,6 +0,0 @@
<memory_protocol>
IMPORTANT — After understanding each user message, ALWAYS check: does this message
reveal durable facts about the user (role, interests, preferences, projects,
background, or standing instructions)? If yes, you MUST call update_memory
alongside your normal response — do not defer this to a later turn.
</memory_protocol>

View file

@ -1,6 +0,0 @@
<memory_protocol>
IMPORTANT — After understanding each user message, ALWAYS check: does this message
reveal durable facts about the team (decisions, conventions, architecture, processes,
or key facts)? If yes, you MUST call update_memory alongside your normal response —
do not defer this to a later turn.
</memory_protocol>

View file

@ -1,15 +0,0 @@
<parameter_resolution>
You do **not** call connector-specific discovery tools yourself (accounts, channels,
Jira cloud IDs, Airtable bases, Slack channels, etc.). Those tools exist only on
**task** subagents.
When the user needs work inside a connected product, delegate with **task** and a
clear goal. If several Slack channels, Jira projects, calendar calendars, etc. could
match and only the integration can list them, **you must not** ask the human for
internal IDs (UUIDs, cloud IDs, opaque keys). The **task** subagent uses connector
tools to list candidates and either picks the only sensible match or asks the user
to choose using **normal labels** (e.g. channel display name, project title), not raw IDs.
If you already have plain-language choices from the user or from prior tool output,
you may pass them through to **task** without re-discovery.
</parameter_resolution>

View file

@ -1,9 +0,0 @@
<tools>
You have access to the following **SurfSense** tools (main-agent scope only):
IMPORTANT: You can ONLY use the tools listed below. Anything else — connectors,
deliverables, or multi-step integration work — goes through **task**, not as a
tool in this list.
Do NOT claim you can use a capability if it is not listed here.

View file

@ -1,10 +0,0 @@
- scrape_webpage: Fetch and extract readable content from a single HTTP(S) URL.
- Use when the user wants the *actual page body* (article, table, dashboard snapshot), not just search snippets.
- Try the tool when a URL is given or referenced; dont refuse without attempting unless the URL is clearly unsafe/invalid.
- Args:
- url: Page to fetch
- max_length: Cap on returned characters (default: 50000)
- Returns: Title, metadata, and markdown-ish body.
- Summarize clearly afterward; link back with `[label](url)`.
- If indexed workspace material is insufficient and the user points at a public URL, scraping is appropriate — still not a substitute for **task** on private connectors.

View file

@ -1,9 +0,0 @@
- search_surfsense_docs: Search official SurfSense documentation (product help).
- Use when the user asks how SurfSense works, setup, connectors at a high level, configuration, etc.
- Not a substitute for **task** when they need actions inside Gmail/Slack/Jira/etc.
- Args:
- query: What to look up in SurfSense docs
- top_k: Number of chunks to retrieve (default: 10)
- Returns: Doc excerpts; chunk ids may appear for attribution — follow the **citation**
instructions block above when citations are enabled; otherwise summarize without `[citation:…]`.

View file

@ -1,12 +0,0 @@
- update_memory: Curate the **personal** long-term memory document for this user.
- Current memory (if any) appears in `<user_memory>` with usage vs limit.
- Call when the user asks to remember/forget, or shares durable facts/preferences/instructions.
- Use the first name from `<user_name>` when writing entries — write “Alex prefers…” not “The user prefers…”.
Do not store the name alone as a memory entry.
- Skip ephemeral chat noise (one-off q/a, greetings, session logistics).
- Args:
- updated_memory: FULL replacement markdown (merge and curate — dont only append).
- Formatting rules:
- Bullets: `- (YYYY-MM-DD) [marker] text` with markers `[fact]`, `[pref]`, `[instr]` (priority when trimming: instr > pref > fact).
- Each bullet under a short `##` heading; keep total size under the limit shown in `<user_memory>`.

View file

@ -1,26 +0,0 @@
- update_memory: Update the team's shared memory document for this search space.
- Your current team memory is already in <team_memory> in your context. The `chars`
and `limit` attributes show current usage and the maximum allowed size.
- This is the team's curated long-term memory — decisions, conventions, key facts.
- NEVER store personal memory in team memory (e.g. personal bio, individual
preferences, or user-only standing instructions).
- Call update_memory when:
* A team member explicitly asks to remember or forget something
* The conversation surfaces durable team decisions, conventions, or facts
that will matter in future conversations
- Do not store short-lived or ephemeral info: one-off questions, greetings,
session logistics, or things that only matter for the current task.
- Args:
- updated_memory: The FULL updated markdown document (not a diff).
Merge new facts with existing ones, update contradictions, remove outdated entries.
Treat every update as a curation pass — consolidate, don't just append.
- Every bullet MUST use this format: - (YYYY-MM-DD) [fact] text
Team memory uses ONLY the [fact] marker. Never use [pref] or [instr] in team memory.
- Keep it concise and well under the character limit shown in <team_memory>.
- Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and
natural. Organize by context — e.g. what the team decided, current architecture,
active processes. Create, split, or merge headings freely as the memory grows.
- Each entry MUST be a single bullet point. Be descriptive but concise — include relevant
details and context rather than just a few words.
- During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.

View file

@ -1,10 +0,0 @@
- web_search: Live public-web search (whatever search backends the workspace configured).
- Use for current events, prices, weather, news, or anything needing fresh public web data.
- For those queries, call this tool rather than guessing from memory or claiming you lack network access.
- If results are thin, say so and offer to refine the query.
- Args:
- query: Specific search terms
- top_k: Max hits (default: 10, max: 50)
- If snippets are too shallow, follow up with **scrape_webpage** on the best URL.
- Present sources with readable markdown links `[label](url)` — never bare URLs.

View file

@ -0,0 +1 @@
"""Main-agent prompt fragments loaded by :mod:`...system_prompt.builder.load_md`."""

View file

@ -0,0 +1 @@
"""``<citations>`` block — ``on`` (cite chunk ids) and ``off`` (hard suppression)."""

View file

@ -0,0 +1,12 @@
<citations>
Citation markers are **disabled** in this configuration.
Do NOT include `[citation:…]` markers anywhere, even if tool descriptions or
examples reference them. Ignore citation-format reminders elsewhere in this
prompt when they conflict with this block.
1. Answer in plain prose. Optional markdown links to public URLs when
sources are URLs.
2. Do not expose raw chunk ids, document ids, or internal ids to the user.
3. Present KB or docs facts naturally without attribution markers.
</citations>

View file

@ -0,0 +1,11 @@
<citations>
Apply chunk citations only when the runtime injects `<document>` /
`<chunk id='…'>` blocks.
1. For each factual statement taken from those chunks, add
`[citation:chunk_id]` using the exact id from `<chunk id='…'>`.
2. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated).
3. Never invent or normalise ids; if unsure, omit.
4. Plain brackets only — no markdown links, no footnote numbering.
5. If no chunk-tagged documents appear this turn, do not fabricate citations.
</citations>

View file

@ -0,0 +1,13 @@
<core_behavior>
- Be concise and direct. No preamble ("Sure!", "Great question!", "I'll now…").
- Don't narrate intent — just act. State the outcome, not the plan.
- If the request is ambiguous, ask before acting. If asked *how* to do
something, explain first, then act.
- Prioritise accuracy over agreement. Disagree respectfully when the user is
wrong; avoid unnecessary superlatives or emotional validation.
- Persist until the task is done or you are genuinely blocked. Don't stop
partway and describe what you *would* do.
- For longer work, give brief progress updates only when they add new
information (a discovery, a tradeoff, a blocker, the start of a non-trivial
step). Don't narrate routine reads.
</core_behavior>

View file

@ -0,0 +1 @@
"""``<dynamic_context>`` block — private and team variants."""

View file

@ -0,0 +1,27 @@
<dynamic_context>
The runtime inserts these system messages each turn. They are authoritative
for *this* turn only.
`<user_memory>` carries the durable personal context the user has accumulated
across sessions — role, interests, preferences, projects, background,
standing instructions. It also reports current character usage versus the
hard limit so you can manage the budget. Treat it as background colour for
your answer, not as the task itself.
`<priority_documents>` lists the workspace documents most relevant to the
latest user message, ranked by relevance score, with `[USER-MENTIONED]`
flagged on anything the user explicitly referenced. When the task is about
workspace content, read these first; matched passages inside each document
are flagged via `<chunk_index>` so you can jump straight to them.
`<workspace_tree>` shows the full `/documents/` folder and file layout. Use
it to resolve paths the user describes in natural language ("my Q2 roadmap",
"last week's meeting notes") into concrete document references before
delegating to a specialist.
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
Each chunk carries a stable `id` attribute.
If a block doesn't appear this turn, work from the conversation alone.
</dynamic_context>

View file

@ -0,0 +1,27 @@
<dynamic_context>
The runtime inserts these system messages each turn. They are authoritative
for *this* turn only.
`<team_memory>` carries the durable shared context this team has built up —
decisions, conventions, architecture notes, processes, key facts. It also
reports current character usage versus the hard limit so you can manage the
budget. Treat it as background colour for your answer, not as the task itself.
`<priority_documents>` lists the workspace documents most relevant to the
latest user message, ranked by relevance score, with `[USER-MENTIONED]`
flagged on anything someone in the thread explicitly referenced. When the
task is about workspace content, read these first; matched passages inside
each document are flagged via `<chunk_index>` so you can jump straight to
them.
`<workspace_tree>` shows the full `/documents/` folder and file layout. Use
it to resolve paths described in natural language ("the Q2 roadmap", "last
week's planning notes") into concrete document references before delegating
to a specialist.
`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
by KB search (from `search_surfsense_docs`, or backing `<priority_documents>`).
Each chunk carries a stable `id` attribute.
If a block doesn't appear this turn, work from the conversation alone.
</dynamic_context>

View file

@ -0,0 +1 @@
"""``<agent_identity>`` block — private and team variants."""

View file

@ -0,0 +1,8 @@
<agent_identity>
You are **SurfSense's main agent**. Your job is to answer the user using their
knowledge base, lightweight web research, persistent memory, and **specialist
subagents** invoked via the `task` tool. You are an orchestrator — most
non-trivial work belongs on a specialist.
Today (UTC): {resolved_today}
</agent_identity>

View file

@ -0,0 +1,11 @@
<agent_identity>
You are **SurfSense's main agent**. Your job is to answer the user using their
shared team knowledge base, lightweight web research, persistent memory, and
**specialist subagents** invoked via the `task` tool. You are an orchestrator
— most non-trivial work belongs on a specialist.
Today (UTC): {resolved_today}
You are in a **team thread**. Each message is prefixed with `[DisplayName]`.
Attribute quotes and decisions to the named author when relevant.
</agent_identity>

View file

@ -0,0 +1,19 @@
<knowledge_base_first>
CRITICAL — ground factual answers in what you actually receive this turn:
- injected workspace context (see `<dynamic_context>`),
- results from your own tool calls (`search_surfsense_docs`, `web_search`,
`scrape_webpage`),
- or substantive summaries returned by a `task` specialist you invoked.
Do **not** answer factual or informational questions from general knowledge
unless the user explicitly authorises it after you say you couldn't find
enough in those sources. The flow when nothing is found:
1. Say you couldn't find enough in their workspace, docs, or tool output.
2. Ask: *"Would you like me to answer from my general knowledge instead?"*
3. Only answer from general knowledge after a clear yes.
This rule does NOT apply to: casual conversation · meta-questions about
SurfSense ("what can you do?") · formatting or analysis of content already
in chat · clear rewrite/edit instructions · lightweight web research.
</knowledge_base_first>

View file

@ -0,0 +1 @@
"""``<memory_protocol>`` block — private and team variants."""

View file

@ -0,0 +1,9 @@
<memory_protocol>
After understanding each user message, check: does it reveal durable facts
about the user — role, interests, preferences, projects, background, or
standing instructions?
If yes, call `update_memory` **alongside** your normal response — don't
defer it to a later turn. Skip ephemeral chat noise (one-off Q/A, greetings,
session logistics). Stay within the budget shown in `<user_memory>`.
</memory_protocol>

View file

@ -0,0 +1,9 @@
<memory_protocol>
After understanding each user message, check: does it reveal durable facts
about the team — decisions, conventions, architecture notes, processes, or
key facts?
If yes, call `update_memory` **alongside** your normal response — don't
defer it to a later turn. Skip ephemeral chat noise (one-off Q/A, greetings,
session logistics). Stay within the budget shown in `<team_memory>`.
</memory_protocol>

View file

@ -0,0 +1,7 @@
<output_format>
- Mathematical formulas: **always** LaTeX. Never backtick code spans or
Unicode symbols for math.
- Never expose internal tool parameter names, backend IDs, or
implementation details. Use natural, user-friendly language.
- External sources: markdown links `[label](url)`, never bare URLs.
</output_format>

View file

@ -14,5 +14,5 @@ Workflow (Understand → Plan → Act → Verify):
Discipline:
- Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
- Path arguments for filesystem tools must be exact strings from tool results — never invent paths.
- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>` or `<priority_documents>`. Otherwise describe the document in natural language and let the subagent resolve it.
</provider_hints>

View file

@ -0,0 +1,12 @@
<refusal_and_limits>
- If a capability is not in `<tools>` and no entry in `<specialists>` covers
it, say so plainly and ask whether the user wants to proceed differently.
Don't pretend you can do it.
- If a `task` call errors or the specialist is unavailable, surface that to
the user with a clear next step. Don't silently retry forever.
- Disabled tools announced by the runtime are off-limits even if documented
elsewhere — say so and offer a `task` alternative if one exists.
- Never claim filesystem access, connector access, or persistent storage you
don't have. The four direct tools and the `<specialists>` list are your
entire surface area.
</refusal_and_limits>

View file

@ -0,0 +1,4 @@
<reminder>
Concise · KB-grounded · delegation-first · one `task` per turn · no direct
filesystem · persist memory when durable facts appear.
</reminder>

View file

@ -0,0 +1,96 @@
<routing>
You have two execution channels. Pick the one that owns the work — never
simulate one with the other.
### 1. Direct tools (you call them yourself)
- `search_surfsense_docs` — SurfSense product docs (setup, configuration,
connector docs, feature behavior).
- `web_search` — search the public web (anything outside SurfSense docs and
the workspace KB).
- `scrape_webpage` — fetch the body of a specific public URL.
- `update_memory` — curate persistent memory (see `<memory_protocol>`).
- `write_todos` — maintain a structured plan when the turn series spans
multiple specialists or steps. Mark each item
`in_progress` **before** the `task` call that handles it, `completed`
once the call returns. Skip for single-step requests.
**You have NO filesystem tools.** Any read, write, edit, move, rename, or
search inside the user's workspace goes through `task(knowledge_base, …)`
never via `write_file`, `ls`, or any direct file operation.
### 2. `task(<specialist>, …)` — specialist subagents
Use `task` for anything beyond the direct tools above. See
`<specialists>` for the live roster.
Rules for `task`:
- **One specialist per `task` call.** A single `task` invocation targets
exactly one specialist; that specialist only has tools for its own
domain, so any work outside that domain in the same prompt won't run.
- **Parallelise independent specialist work.** When a turn needs multiple
`task` calls whose work doesn't depend on each other's results (e.g.
"create a ClickUp ticket AND a Linear ticket"), emit them as parallel
`task` calls. Two `task` calls are independent when:
- Neither's prompt references the other's output, and
- They target different specialists, OR the same specialist with
non-overlapping scopes (e.g. reading two unrelated paths).
- **Serialise dependent work across turns.** If one specialist's output
must inform another's input (e.g. "find the roadmap in my KB, then
email it to Maya"), invoke them on consecutive turns — first finishes,
then you call the second with the first's result baked into its prompt.
Use `write_todos` to keep the plan alive across those turns.
- Within a single specialist, bundle every related step into the same task
prompt (read + write + summary go together).
- Put the **full instructions inside the task prompt** — the specialist
cannot see this thread.
- Don't claim to already know what a specialist's source contains; invoke
the specialist and use what it returns.
<example>
user: "Save these meeting notes to my KB: …"
→ task(knowledge_base, "Save the meeting notes below to a new document
under /documents/notes/. Pick a sensible title and folder; tell me the
path you used.\n\n<notes></notes>")
</example>
<example>
user: "What did Maya say about the Q2 roadmap in Slack last week?"
→ task(slack, "Find messages from Maya about the Q2 roadmap from the past
week. Return the most relevant quotes with channel and timestamp.")
</example>
<example>
user: "What's the current USD/INR rate?"
→ web_search(query="current USD to INR exchange rate")
</example>
<example>
user: "Find my Q2 roadmap and summarise the milestones."
→ task(knowledge_base, "Locate the Q2 roadmap document under /documents
and summarise its milestones. Use glob or grep if the path isn't
obvious from the workspace tree.")
</example>
<example>
user: "Create a ClickUp ticket and a Linear ticket for the new feature flag."
→ Independent work — call both specialists in parallel:
write_todos([
{content: "Create ClickUp ticket for feature flag rollout", status: "in_progress"},
{content: "Create Linear ticket for feature flag rollout", status: "in_progress"},
])
task(clickup, "Create a ClickUp ticket titled 'Feature flag rollout'
in the default list. Description: <…>. Tell me the ticket URL.")
task(linear, "Create a Linear ticket titled 'Feature flag rollout'
in the default team. Description: <…>. Tell me the ticket URL.")
</example>
<example>
user: "Find my Q2 roadmap doc in the KB and email a summary to Maya."
→ The email body depends on the doc's contents — serialise across turns.
This turn:
task(knowledge_base, "Find the Q2 roadmap document under /documents
and return its full text plus a 3-bullet summary.")
Next turn (with the returned summary in hand):
task(gmail, "Send an email to Maya with subject 'Q2 roadmap summary'
and the following body: <summary returned by knowledge_base>.")
</example>
</routing>

View file

@ -0,0 +1 @@
"""``<tools>`` block — one vertical-slice subfolder per direct main-agent tool."""

View file

@ -0,0 +1 @@
"""``scrape_webpage`` — description + few-shot examples."""

View file

@ -0,0 +1,11 @@
- `scrape_webpage` — Fetch and extract readable content from a single URL.
- Use when the user wants the actual page body (article, table, dashboard
snapshot), not just search snippets.
- Try the tool when a URL is given or referenced; don't refuse without
attempting unless the URL is clearly unsafe or invalid.
- Public web only. For URLs behind a connector (Notion pages, Linear
issues, Confluence, anything that needs auth), use `task` with the
matching specialist instead.
- Args: `url`, `max_length` (default 50000).
- Returns title, metadata, and markdown-ish body. Summarise clearly and
link back with `[label](url)`.

View file

@ -0,0 +1,24 @@
<example>
user: "Check out https://dev.to/some-article"
→ scrape_webpage(url="https://dev.to/some-article")
(Respond with a structured analysis — key points, takeaways.)
</example>
<example>
user: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
→ scrape_webpage(url="https://example.com/blog/ai-trends")
(Thorough summary using headings and bullets.)
</example>
<example>
user: (after discussing https://example.com/stats) "Can you get the live data from that page?"
→ scrape_webpage(url="https://example.com/stats")
(Always attempt scraping first. Never refuse before trying.)
</example>
<example>
user: "https://example.com/blog/weekend-recipes"
→ scrape_webpage(url="https://example.com/blog/weekend-recipes")
(When a user sends just a URL with no instructions, scrape it and provide
a concise summary.)
</example>

View file

@ -0,0 +1 @@
"""``search_surfsense_docs`` — description + few-shot examples."""

View file

@ -0,0 +1,10 @@
- `search_surfsense_docs` — Search official SurfSense documentation (product
help).
- Use when the user asks how SurfSense itself works — setup, configuration,
connector documentation, feature behavior, anything covered in the
product docs.
- Not a substitute for `task` when the user wants actions inside a
connected service (Gmail, Slack, Jira, Notion, etc.).
- Args: `query`, `top_k` (default 10).
- Returns doc excerpts; chunk ids may appear for attribution — see
`<citations>` for the contract.

View file

@ -0,0 +1,15 @@
<example>
user: "How do I install SurfSense?"
→ search_surfsense_docs(query="installation setup")
</example>
<example>
user: "What connectors does SurfSense support?"
→ search_surfsense_docs(query="available connectors integrations")
</example>
<example>
user: "How do I set up the Notion connector?"
→ search_surfsense_docs(query="Notion connector setup configuration")
(Changing data inside Notion itself → `task(notion, …)`, not this tool.)
</example>

View file

@ -0,0 +1 @@
"""``task`` — description + few-shot examples for the specialist-delegation tool."""

View file

@ -0,0 +1,15 @@
- `task` — Invoke a specialist subagent.
- Specialists own workspace knowledge-base operations and connected
third-party services (Slack, Notion, Jira, Gmail, etc.). See
`<specialists>` for the live roster.
- Each subagent runs in isolation with its own tool stack and context,
and returns a single synthesized result.
- Args:
- `subagent_type` — name of the specialist to invoke (must match an
entry in `<specialists>`).
- `description` — the FULL task prompt. The specialist cannot see this
thread, so include all context and constraints, plus what you need
back. The specialist will respond in its own format — don't dictate
one.
- Routing rules (when to call, how often, how to scope) live in
`<routing>`.

View file

@ -0,0 +1,20 @@
<example>
user: "Save these meeting notes to my KB: …"
→ task(subagent_type="knowledge_base", description="Save the notes below to
a new document under /documents/notes/. Pick a sensible title and folder;
tell me the path you used.\n\n<notes></notes>")
</example>
<example>
user: "What did Maya say about the Q2 roadmap in Slack last week?"
→ task(subagent_type="slack", description="Find messages from Maya about
the Q2 roadmap from the past week. Return the most relevant quotes with
channel and timestamp.")
</example>
<example>
user: "Find my Q2 roadmap and summarise the milestones."
→ task(subagent_type="knowledge_base", description="Locate the Q2 roadmap
document under /documents and summarise its milestones. Use glob or grep
if the path isn't obvious from the workspace tree.")
</example>

View file

@ -0,0 +1 @@
"""``update_memory`` — private and team visibility variants."""

View file

@ -0,0 +1 @@
"""``update_memory`` (private variant) — description + few-shot examples."""

View file

@ -0,0 +1,15 @@
- `update_memory` — Curate the **personal** long-term memory document for
this user.
- The current memory (if any) appears in `<user_memory>` with usage vs limit.
- Call when the user asks to remember or forget something, or shares
durable facts, preferences, or instructions.
- Use the first name from `<user_name>` when writing entries — write
"Alex prefers…" not "The user prefers…". Don't store the name alone as a
memory entry.
- Skip ephemeral chat noise (one-off Q/A, greetings, session logistics).
- Args: `updated_memory` — FULL replacement markdown (merge and curate,
don't only append).
- Formatting: bullets `- (YYYY-MM-DD) [marker] text` with markers `[fact]`,
`[pref]`, `[instr]` (priority when trimming: `instr > pref > fact`).
Group bullets under short `##` headings; stay under the limit shown in
`<user_memory>`.

View file

@ -0,0 +1,28 @@
<example>
<user_name>Alex</user_name>, <user_memory> is empty.
user: "I'm a space enthusiast, explain astrophage to me"
→ update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n")
(Casual durable fact; use first name, neutral heading.)
</example>
<example>
user: "Remember that I prefer concise answers over detailed explanations"
→ update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n")
(Durable preference; merge with existing memory.)
</example>
<example>
user: "I actually moved to Tokyo last month"
→ update_memory(updated_memory="...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...")
(Updated fact; date reflects when recorded.)
</example>
<example>
user: "I'm a freelance photographer working on a nature documentary"
→ update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n")
</example>
<example>
user: "Always respond in bullet points"
→ update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n")
</example>

View file

@ -0,0 +1 @@
"""``update_memory`` (team variant) — description + few-shot examples."""

View file

@ -0,0 +1,16 @@
- `update_memory` — Curate the team's **shared** long-term memory document
for this search space.
- The current memory (if any) appears in `<team_memory>` with usage vs limit.
- Call when a team member asks to remember or forget something, or when
the conversation surfaces durable team decisions, conventions,
architecture notes, processes, or key facts.
- NEVER store personal memory in team memory (individual bios, personal
preferences, user-only standing instructions).
- Skip ephemeral chat noise (one-off Q/A, greetings, session logistics).
- Args: `updated_memory` — FULL replacement markdown (merge and curate,
don't only append).
- Formatting: bullets `- (YYYY-MM-DD) [fact] text`. Team memory uses ONLY
the `[fact]` marker (never `[pref]` or `[instr]`). Group bullets under
short `##` headings (2-3 words each); stay under the limit shown in
`<team_memory>`. When trimming, prioritise: decisions/conventions > key
facts > current priorities.

View file

@ -0,0 +1,9 @@
<example>
user: "Let's remember that we decided to do weekly standup meetings on Mondays"
→ update_memory(updated_memory="...\n\n## Team rituals\n- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...")
</example>
<example>
user: "Our office is in downtown Seattle, 5th floor"
→ update_memory(updated_memory="...\n\n## Workspace\n- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...")
</example>

View file

@ -0,0 +1 @@
"""``web_search`` — description + few-shot examples."""

View file

@ -0,0 +1,10 @@
- `web_search` — Search the public web.
- Use whenever an answer benefits from external sources — current events,
prices, weather, news, technical references, definitions, background
facts, anything outside SurfSense docs and the workspace KB. Reach for
it whenever freshness matters or you'd otherwise guess from memory.
- Don't refuse with "I lack network access" — call the tool.
- If results are thin, say so and offer to refine the query.
- Args: `query`, `top_k` (default 10, max 50).
- Follow up with `scrape_webpage` on the best URL when snippets are too
shallow. Present sources with `[label](url)` markdown links.

View file

@ -0,0 +1,15 @@
<example>
user: "What's the current USD to INR exchange rate?"
→ web_search(query="current USD to INR exchange rate")
(Answer from snippets; scrape a top URL if needed.)
</example>
<example>
user: "What's the latest news about AI?"
→ web_search(query="latest AI news today")
</example>
<example>
user: "What's the weather in New York?"
→ web_search(query="weather New York today")
</example>

View file

@ -1,7 +0,0 @@
"""Multi-agent middleware stack assembly."""
from __future__ import annotations
from .stack import build_main_agent_deepagent_middleware
__all__ = ["build_main_agent_deepagent_middleware"]

View file

@ -4,21 +4,27 @@ Replaces upstream ``SubAgentMiddleware`` to:
- share the parent's checkpointer with each subagent,
- forward ``runtime.config`` (thread_id, recursion_limit, ) into nested invokes,
- isolate each parallel ``task`` call in its own checkpoint slot via
per-call ``thread_id`` namespacing,
- bridge ``Command(resume=...)`` from the parent into the subagent via the
``config["configurable"]["surfsense_resume_value"]`` side-channel,
``config["configurable"]["surfsense_resume_value"]`` side-channel, keyed by
``tool_call_id`` so parallel siblings never race on a shared scalar,
- target the resume at the captured interrupt id so a follow-up
``HumanInTheLoopMiddleware.after_model`` does not consume the same payload,
- re-raise any new subagent interrupt at the parent so the SSE stream surfaces it.
- stamp each subagent's pending interrupt with the parent's ``tool_call_id``
so ``stream_resume_chat`` can route a flat ``decisions`` list back to the
right paused subagent.
Module layout
-------------
- ``constants`` shared keys / limits.
- ``config`` RunnableConfig + side-channel resume read.
- ``resume`` pending-interrupt detection, fan-out, ``Command(resume=...)`` builder.
- ``propagation`` re-raise pending subagent interrupts at the parent.
- ``task_tool`` the ``task`` tool factory (sync + async).
- ``middleware`` :class:`SurfSenseCheckpointedSubAgentMiddleware` itself.
- ``constants`` shared keys / limits.
- ``config`` RunnableConfig + side-channel resume read + per-call ``thread_id``.
- ``resume`` pending-interrupt detection, fan-out, ``Command(resume=...)`` builder.
- ``propagation`` ``wrap_with_tool_call_id`` helper for stamping interrupt values.
- ``resume_routing`` slice a flat decisions list to per-``tool_call_id`` payloads.
- ``task_tool`` the ``task`` tool factory (sync + async), and the catch-and-stamp chokepoint.
- ``middleware`` :class:`SurfSenseCheckpointedSubAgentMiddleware` itself.
"""
from .middleware import SurfSenseCheckpointedSubAgentMiddleware

View file

@ -21,7 +21,17 @@ _LANGGRAPH_SCRATCHPAD_KEY = "__pregel_scratchpad"
def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
"""RunnableConfig for the nested invoke; raises ``recursion_limit`` to the parent's budget."""
"""RunnableConfig for the nested invoke; raises ``recursion_limit`` and isolates ``thread_id``.
Each parallel subagent invocation lands in its own checkpoint slot keyed
by an extended ``thread_id`` of the form ``{parent_thread}::task:{tool_call_id}``.
The same call across the resume cycle keeps reading from the same snapshot
(``tool_call_id`` is stable per LLM-emitted call).
We namespace via ``thread_id`` rather than ``checkpoint_ns`` because
langgraph's ``aget_state`` interprets a non-empty ``checkpoint_ns`` as a
subgraph path and raises ``ValueError("Subgraph X not found")``.
"""
merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
current_limit = merged.get("recursion_limit")
try:
@ -30,43 +40,68 @@ def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
current_int = 0
if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
configurable: dict[str, Any] = dict(merged.get("configurable") or {})
parent_thread_id = configurable.get("thread_id")
per_call_suffix = f"task:{runtime.tool_call_id}"
configurable["thread_id"] = (
f"{parent_thread_id}::{per_call_suffix}"
if parent_thread_id
else per_call_suffix
)
merged["configurable"] = configurable
return merged
def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
"""Pop the resume payload; siblings share ``configurable`` by reference."""
"""Pop the resume payload for *this* call's ``tool_call_id``.
The configurable holds ``surfsense_resume_value: dict[tool_call_id, payload]``
so parallel sibling subagents (each with their own ``tool_call_id``) read
only their own decision and never race on a shared scalar.
"""
cfg = runtime.config or {}
configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
if not isinstance(configurable, dict):
return None
return configurable.pop("surfsense_resume_value", None)
by_tcid = configurable.get("surfsense_resume_value")
if not isinstance(by_tcid, dict):
return None
payload = by_tcid.pop(runtime.tool_call_id, None)
if not by_tcid:
configurable.pop("surfsense_resume_value", None)
return payload
def has_surfsense_resume(runtime: ToolRuntime) -> bool:
"""True iff a resume payload is queued on this runtime (non-destructive)."""
"""True iff a resume payload for this call's ``tool_call_id`` is queued (non-destructive)."""
cfg = runtime.config or {}
configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
if not isinstance(configurable, dict):
return False
return "surfsense_resume_value" in configurable
by_tcid = configurable.get("surfsense_resume_value")
if not isinstance(by_tcid, dict):
return False
return runtime.tool_call_id in by_tcid
def drain_parent_null_resume(runtime: ToolRuntime) -> None:
"""Consume the parent's lingering ``NULL_TASK_ID/RESUME`` write before delegating.
``stream_resume_chat`` wakes the main agent with
``Command(resume={"decisions": [...]})`` so the propagated
``_lg_interrupt(...)`` can return. langgraph stores that payload as the
parent task's ``null_resume`` pending write, which only gets consumed
*after* ``subagent.[a]invoke`` returns (when the post-call propagation
re-fires). While the subagent is mid-execution, any *new* ``interrupt()``
inside it (e.g. a follow-up tool call after a mixed approve/reject) walks
``subagent_scratchpad parent_scratchpad.get_null_resume`` and picks up
the parent's still-live decisions — mismatching against a different number
of hanging tool calls and crashing ``HumanInTheLoopMiddleware``.
``Command(resume={tool_call_id: {"decisions": [...]}})`` so the previously
propagated parent-level interrupt can return. langgraph stores that
payload as the parent task's ``null_resume`` pending write. The ``task``
tool then forwards this turn's slice into the subagent via its own
``Command(resume=...)``. While the subagent is mid-execution, any *new*
``interrupt()`` inside it (e.g. a follow-up tool call after a mixed
approve/reject) walks ``subagent_scratchpad parent_scratchpad.get_null_resume``
and picks up the parent's still-live decisions — mismatching against a
different number of hanging tool calls and crashing
``HumanInTheLoopMiddleware``.
Draining the write here closes that cross-graph leak so subagent
interrupts pause cleanly and re-propagate as a fresh approval card.
interrupts pause cleanly and bubble back up as a fresh approval card.
"""
cfg = runtime.config or {}
configurable = cfg.get("configurable") if isinstance(cfg, dict) else None

View file

@ -12,7 +12,6 @@ from deepagents.middleware.subagents import (
SubAgentMiddleware,
)
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langchain.chat_models import init_chat_model
from langgraph.types import Checkpointer
@ -81,10 +80,6 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
middleware: list[Any] = list(spec.get("middleware", []))
interrupt_on = spec.get("interrupt_on")
if interrupt_on:
middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
specs.append(
{
"name": spec["name"],

View file

@ -1,74 +1,38 @@
"""Re-raise still-pending subagent interrupts at the parent graph level.
"""Stamp the parent's ``tool_call_id`` onto a subagent's pending interrupt value.
After ``subagent.[a]invoke(Command(resume=...))`` returns, the subagent may
still hold a pending interrupt (e.g. the LLM produced a follow-up tool call
that fired a fresh ``interrupt()``). The parent's pregel cannot see that
interrupt because it lives in a separate compiled graph; we re-raise it here
so the parent's SSE stream surfaces it as the next approval card.
When a subagent (compiled as a langgraph subgraph and invoked from a parent
tool node) hits an ``interrupt(...)`` from its HITL middleware, langgraph
raises ``GraphInterrupt`` out of ``subagent.[a]invoke(...)``. The parent's
``task`` tool catches that exception, stamps ``tool_call_id`` onto each
``Interrupt.value`` using :func:`wrap_with_tool_call_id`, and re-raises a
fresh ``GraphInterrupt`` whose values carry that stamp.
``stream_resume_chat`` then reads ``parent.state.interrupts[*].value["tool_call_id"]``
to route a flat ``decisions`` list back to the right paused subagent without
the stamp, parallel HITL across siblings would collapse into an ambiguous
bucket and resume would fail.
This module hosts only the stamping helper; the catch/re-raise lives in
``task_tool.py`` since that's the single chokepoint where the raw exception
is in our hands.
"""
from __future__ import annotations
import logging
from typing import Any
from langchain_core.runnables import Runnable
from langgraph.types import interrupt as _lg_interrupt
from .resume import get_first_pending_subagent_interrupt
def wrap_with_tool_call_id(value: Any, tool_call_id: str) -> dict[str, Any]:
"""Return a value dict that always carries the parent's ``tool_call_id``.
logger = logging.getLogger(__name__)
Dict values are shallow-copied with ``tool_call_id`` stamped on top, so
any value the subagent may already carry under that key (from a deeper
HITL level) is overwritten the parent's call id is the only one
``stream_resume_chat`` correlates against.
def maybe_propagate_subagent_interrupt(
subagent: Runnable,
sub_config: dict[str, Any],
subagent_type: str,
) -> None:
"""Re-raise a still-pending subagent interrupt at the parent so the SSE stream surfaces it."""
get_state_sync = getattr(subagent, "get_state", None)
if not callable(get_state_sync):
return
try:
snapshot = get_state_sync(sub_config)
except Exception: # pragma: no cover - defensive
logger.debug(
"Subagent get_state failed during re-interrupt check",
exc_info=True,
)
return
_pending_id, pending_value = get_first_pending_subagent_interrupt(snapshot)
if pending_value is None:
return
logger.info(
"Re-raising subagent %r interrupt to parent (multi-step HITL)",
subagent_type,
)
_lg_interrupt(pending_value)
async def amaybe_propagate_subagent_interrupt(
subagent: Runnable,
sub_config: dict[str, Any],
subagent_type: str,
) -> None:
"""Async counterpart of :func:`maybe_propagate_subagent_interrupt`."""
aget_state = getattr(subagent, "aget_state", None)
if not callable(aget_state):
return
try:
snapshot = await aget_state(sub_config)
except Exception: # pragma: no cover - defensive
logger.debug(
"Subagent aget_state failed during re-interrupt check",
exc_info=True,
)
return
_pending_id, pending_value = get_first_pending_subagent_interrupt(snapshot)
if pending_value is None:
return
logger.info(
"Re-raising subagent %r interrupt to parent (multi-step HITL)",
subagent_type,
)
_lg_interrupt(pending_value)
Non-dict values are wrapped as ``{"value": <original>, "tool_call_id": ...}``
so simple ``interrupt("approve?")`` patterns still propagate cleanly.
"""
if isinstance(value, dict):
return {**value, "tool_call_id": tool_call_id}
return {"value": value, "tool_call_id": tool_call_id}

View file

@ -0,0 +1,183 @@
"""Route a flat ``decisions`` list to per-``tool_call_id`` resume payloads.
The frontend submits decisions in the same order the SSE stream emitted
approval cards. When multiple parallel subagents are paused, the backend uses
this module to:
1. Read ``state.interrupts`` from the parent's paused snapshot, extracting
``[(tool_call_id, action_count), ...]`` from each interrupt's value.
The ``tool_call_id`` is stamped on by ``propagation.wrap_with_tool_call_id``
inside ``task_tool``'s catch-and-stamp block when a subagent's
``GraphInterrupt`` bubbles up through ``[a]task``.
2. Slice the flat ``decisions`` list against that ordered pending list to
produce the dict shape expected by ``consume_surfsense_resume``.
3. Re-key those slices by ``Interrupt.id`` (langgraph's primitive) for use as
the parent-level ``Command(resume={interrupt_id: payload})`` input the
only shape langgraph accepts when multiple interrupts are pending.
All helpers are pure: callers own the state and the input decisions; we
return new structures and never mutate.
"""
from __future__ import annotations
import logging
from collections.abc import Iterable
from typing import Any
logger = logging.getLogger(__name__)
def slice_decisions_by_tool_call(
decisions: list[dict[str, Any]],
pending: Iterable[tuple[str, int]],
) -> dict[str, dict[str, Any]]:
"""Slice ``decisions`` into ``{tool_call_id: {"decisions": <slice>}}``.
Args:
decisions: Flat list of decisions in the order the SSE stream rendered
them.
pending: Ordered ``(tool_call_id, action_count)`` pairs in the same
order. The slicer consumes ``decisions`` left-to-right.
Returns:
Per-``tool_call_id`` payload dict ready to be written to
``configurable["surfsense_resume_value"]``.
Raises:
ValueError: When the total expected action count differs from the
number of decisions provided. We fail loud rather than silently
dropping or padding so a frontend/backend contract drift surfaces
immediately.
"""
pending_list = list(pending)
expected = sum(count for _, count in pending_list)
if expected != len(decisions):
raise ValueError(
f"Decision count mismatch: pending tool calls expect "
f"{expected} actions but received {len(decisions)} decisions."
)
routed: dict[str, dict[str, Any]] = {}
cursor = 0
for tool_call_id, action_count in pending_list:
routed[tool_call_id] = {"decisions": decisions[cursor : cursor + action_count]}
cursor += action_count
return routed
def collect_pending_tool_calls(state: Any) -> list[tuple[str, int]]:
"""Extract ``[(tool_call_id, action_count), ...]`` from a paused parent state.
Reads ``state.interrupts`` (the bundle langgraph aggregated from each
paused subagent's propagated interrupt). Each interrupt value carries the
``tool_call_id`` that the parent's ``task`` tool was processing — see
``propagation.wrap_with_tool_call_id`` and ``task_tool``'s
``except GraphInterrupt`` chokepoint.
Order is preserved from ``state.interrupts``, which is the order the SSE
stream emitted approval cards. The frontend submits decisions in that
same order, so the slicer can consume them left-to-right.
Interrupts without a ``tool_call_id`` are skipped they were not
produced by our task-routing layer (e.g. parent-side HITL middleware on
a different tool); ``stream_resume_chat`` is not responsible for routing
those.
Args:
state: A langgraph ``StateSnapshot`` (or any object with an
``interrupts`` attribute).
Returns:
Ordered list of ``(tool_call_id, action_count)``. ``action_count`` is
``len(value["action_requests"])`` for HITL-bundle values, or ``1`` for
scalar-style ``interrupt("...")`` values that were wrapped as
``{"value": ..., "tool_call_id": ...}``.
Raises:
ValueError: When an interrupt value carries a ``tool_call_id`` but
the action count cannot be determined (contract bug every
propagated value should be either a HITL bundle or a wrapped
scalar).
"""
pending: list[tuple[str, int]] = []
for idx, interrupt_obj in enumerate(getattr(state, "interrupts", ()) or ()):
value = getattr(interrupt_obj, "value", None)
if not isinstance(value, dict):
logger.warning(
"[hitl_route] interrupt[%d] skipped: value not a dict (type=%s)",
idx,
type(value).__name__,
)
continue
tool_call_id = value.get("tool_call_id")
if not isinstance(tool_call_id, str):
# Should not happen post-stamping; flag loudly if a regression
# ever lets an unstamped value reach the parent state.
logger.warning(
"[hitl_route] interrupt[%d] skipped: no tool_call_id stamp (keys=%s)",
idx,
sorted(value.keys()),
)
continue
action_requests = value.get("action_requests")
if isinstance(action_requests, list):
pending.append((tool_call_id, len(action_requests)))
continue
if "value" in value:
pending.append((tool_call_id, 1))
continue
raise ValueError(
f"Interrupt for tool_call_id={tool_call_id!r} has no "
"``action_requests`` list and is not a wrapped scalar value; "
"cannot determine action count for resume routing."
)
return pending
def build_lg_resume_map(
state: Any, by_tool_call_id: dict[str, dict[str, Any]]
) -> dict[str, dict[str, Any]]:
"""Map ``Interrupt.id → resume_payload`` for langgraph's multi-interrupt resume.
``stream_resume_chat`` builds ``by_tool_call_id`` via
:func:`slice_decisions_by_tool_call`. Langgraph's ``Command(resume=...)``
requires ``Interrupt.id`` keys (not our ``tool_call_id`` stamps) when the
parent state has multiple pending interrupts. This pure helper re-keys the
slice without mutating it, and skips entries that can't be paired (no
stamp, no slice) so contract drift surfaces as a count mismatch at the
call site instead of a silent mis-route.
The two key spaces serve two different consumers:
- ``surfsense_resume_value`` (keyed by ``tool_call_id``): read by the
subagent bridge inside ``task_tool``.
- ``Command(resume=...)`` (keyed by ``Interrupt.id``): read by langgraph's
pregel to wake each pending interrupt site.
Args:
state: A langgraph ``StateSnapshot`` (or any object with an
``interrupts`` iterable).
by_tool_call_id: Output of :func:`slice_decisions_by_tool_call`.
Returns:
Dict ready to be passed as ``Command(resume=<this>)``.
"""
out: dict[str, dict[str, Any]] = {}
for interrupt_obj in getattr(state, "interrupts", ()) or ():
value = getattr(interrupt_obj, "value", None)
if not isinstance(value, dict):
continue
tool_call_id = value.get("tool_call_id")
if not isinstance(tool_call_id, str):
continue
interrupt_id = getattr(interrupt_obj, "id", None)
if not isinstance(interrupt_id, str):
continue
payload = by_tool_call_id.get(tool_call_id)
if payload is None:
continue
out[interrupt_id] = payload
return out

View file

@ -0,0 +1,15 @@
"""Schema-level description for the ``task`` tool.
Loaded from ``prompts/tools/task/description.md`` so the tool-schema text
and the ``<tools>`` block render from the same source.
"""
from __future__ import annotations
from app.agents.multi_agent_chat.main_agent.system_prompt.builder.load_md import (
read_prompt_md,
)
TASK_TOOL_DESCRIPTION: str = read_prompt_md("tools/task/description.md")
__all__ = ["TASK_TOOL_DESCRIPTION"]

View file

@ -9,14 +9,15 @@ re-raises any new pending interrupt back to the parent.
from __future__ import annotations
import logging
from typing import Annotated, Any
from typing import Annotated, Any, NoReturn
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
from langchain.tools import BaseTool, ToolRuntime
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.runnables import Runnable
from langchain_core.tools import StructuredTool
from langgraph.types import Command
from langgraph.errors import GraphInterrupt
from langgraph.types import Command, Interrupt
from .config import (
consume_surfsense_resume,
@ -25,10 +26,7 @@ from .config import (
subagent_invoke_config,
)
from .constants import EXCLUDED_STATE_KEYS
from .propagation import (
amaybe_propagate_subagent_interrupt,
maybe_propagate_subagent_interrupt,
)
from .propagation import wrap_with_tool_call_id
from .resume import (
build_resume_command,
fan_out_decisions_to_match,
@ -39,6 +37,31 @@ from .resume import (
logger = logging.getLogger(__name__)
def _reraise_stamped_subagent_interrupt(
gi: GraphInterrupt, tool_call_id: str
) -> NoReturn:
"""Stamp ``tool_call_id`` onto each pending interrupt value and re-raise.
See :mod:`...propagation` for why this stamp is required for resume routing.
Chained via ``from gi`` so tracebacks point at the subagent's original
``interrupt(...)`` site.
"""
interrupts = gi.args[0] if gi.args else ()
stamped = tuple(
Interrupt(
value=wrap_with_tool_call_id(i.value, tool_call_id),
id=i.id,
)
for i in interrupts
)
logger.info(
"[hitl_route] stamped %d subagent interrupt(s) with tool_call_id=%s",
len(stamped),
tool_call_id,
)
raise GraphInterrupt(stamped) from gi
def build_task_tool_with_parent_config(
subagents: list[dict[str, Any]],
task_description: str | None = None,
@ -161,13 +184,18 @@ def build_task_tool_with_parent_config(
# Prevent the parent's resume payload from leaking into subagent
# interrupts via langgraph's parent_scratchpad fallback.
drain_parent_null_resume(runtime)
result = subagent.invoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
)
try:
result = subagent.invoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
)
except GraphInterrupt as gi:
_reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id)
else:
result = subagent.invoke(subagent_state, config=sub_config)
maybe_propagate_subagent_interrupt(subagent, sub_config, subagent_type)
try:
result = subagent.invoke(subagent_state, config=sub_config)
except GraphInterrupt as gi:
_reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id)
return _return_command_with_state_update(result, runtime.tool_call_id)
async def atask(
@ -181,6 +209,11 @@ def build_task_tool_with_parent_config(
],
runtime: ToolRuntime,
) -> str | Command:
logger.info(
"[hitl_route] atask ENTRY: subagent_type=%r tool_call_id=%s",
subagent_type,
runtime.tool_call_id,
)
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return (
@ -228,13 +261,18 @@ def build_task_tool_with_parent_config(
# Prevent the parent's resume payload from leaking into subagent
# interrupts via langgraph's parent_scratchpad fallback.
drain_parent_null_resume(runtime)
result = await subagent.ainvoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
)
try:
result = await subagent.ainvoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
)
except GraphInterrupt as gi:
_reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id)
else:
result = await subagent.ainvoke(subagent_state, config=sub_config)
await amaybe_propagate_subagent_interrupt(subagent, sub_config, subagent_type)
try:
result = await subagent.ainvoke(subagent_state, config=sub_config)
except GraphInterrupt as gi:
_reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id)
return _return_command_with_state_update(result, runtime.tool_call_id)
return StructuredTool.from_function(

View file

@ -24,4 +24,5 @@ def build_knowledge_priority_mw(
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
inject_system_message=False,
)

View file

@ -20,4 +20,5 @@ def build_knowledge_tree_mw(
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
llm=llm,
inject_system_message=False,
)

Some files were not shown because too many files have changed in this diff Show more