feat: antropic model added fix & kb tooling fixes

- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent. - Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state. - Revised system prompts to reflect the new retrieval method and ensure proper citation handling. - Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool. - Enhanced documentation to guide usage of the new retrieval approach and citation practices.
2026-06-26 21:39:43 +02:00 · 2026-06-25 20:19:44 -07:00 · 2026-06-25 20:19:44 -07:00 · 9642d7ced0
commit 9642d7ced0
parent b4af67f77d
36 changed files with 581 additions and 168 deletions
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
@ -343,6 +343,28 @@ def build_task_tool_with_parent_config(
        cleaned = hint.strip()
        return cleaned or None

+    def _forward_mention_pins(subagent_state: dict, runtime: ToolRuntime) -> None:
+        """Carry the turn's ``@``-mention pins from main context into subagent state.
+
+        Subagents are compiled without a ``context_schema`` and invoked without
+        ``context=``, so ``runtime.context`` (which holds the ``@``-mentioned
+        document/folder ids) does not reach them. The ``task`` tool runs in the
+        main runtime, which *does* have the context, so we copy the pins into the
+        forwarded state where ``search_knowledge_base`` reads them. Only set keys
+        when present so we never clobber pins already on state (e.g. nested
+        ``ask_knowledge_base`` re-entry).
+        """
+        ctx = getattr(runtime, "context", None)
+        if ctx is None:
+            return
+        for state_key, ctx_attr in (
+            ("mentioned_document_ids", "mentioned_document_ids"),
+            ("mentioned_folder_ids", "mentioned_folder_ids"),
+        ):
+            value = getattr(ctx, ctx_attr, None)
+            if value:
+                subagent_state[state_key] = list(value)
+
    def _validate_and_prepare_state(
        subagent_type: str, description: str, runtime: ToolRuntime
    ) -> tuple[Runnable, dict]:
@ -350,6 +372,7 @@ def build_task_tool_with_parent_config(
        subagent_state = {
            k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
        }
+        _forward_mention_pins(subagent_state, runtime)
        hint = _resolve_context_hint(subagent_type, description, runtime)
        if hint:
            # Tagged block so the subagent prompt can pattern-match the section.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@ -1,11 +1,12 @@
 """Main-agent middleware list assembly: one line per slot.

-The main agent is a pure router — filesystem reads/writes are owned by the
-``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
-retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
-search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
-labels. The stack here computes the workspace tree, commits any subagent-side
-staged writes at end of turn (cloud mode), and wires the supporting middleware.
+The main agent is a pure router — both filesystem reads/writes AND knowledge-base
+retrieval are owned by the ``knowledge_base`` subagent and reached via the
+``task`` tool. That subagent runs the hybrid ``search_knowledge_base`` (rendering
+``<retrieved_context>`` with ``[n]`` citation labels) and the FS tools on demand;
+the main agent only sees the specialist's grounded summary. The stack here
+computes the workspace tree, commits any subagent-side staged writes at end of
+turn (cloud mode), and wires the supporting middleware.
 """

 from __future__ import annotations
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@ -1,9 +1,10 @@
 <citations>
 Cite with one token: the bracket label `[n]`. Every citable result —
-`search_knowledge_base` passages, `web_search` results, and prose from a
-`task` knowledge_base/research specialist — already carries `[n]` labels on a
-single shared count. Those labels are the only citation you write; the server
-resolves each one back to its source after the turn.
+`web_search` results and prose from a `task` knowledge_base/research
+specialist (including the knowledge_base specialist's `[n]`-labelled
+workspace findings) — already carries `[n]` labels on a single shared count.
+Those labels are the only citation you write; the server resolves each one
+back to its source after the turn.

 1. Put the label right after the claim it supports.
 2. Several sources for one claim: stack brackets, `[1][2]`.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
@ -13,9 +13,10 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap",
 "last week's meeting notes") into concrete document references before
 delegating to a specialist.

-`<retrieved_context>` blocks hold knowledge-base passages from
-`search_knowledge_base`; each `<document>` inside is in excerpt view and every
-passage is prefixed with an `[n]` citation label.
+Knowledge-base passages are no longer injected here directly: delegate to the
+`knowledge_base` specialist via `task`, which runs the hybrid search/read and
+returns a grounded summary already carrying `[n]` citation labels for you to
+carry through.

-If a block doesn't appear this turn, work from the conversation alone.
+If no grounding arrives this turn, work from the conversation alone.
 </dynamic_context>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
@ -12,9 +12,10 @@ it to resolve paths described in natural language ("the Q2 roadmap", "last
 week's planning notes") into concrete document references before delegating
 to a specialist.

-`<retrieved_context>` blocks hold knowledge-base passages from
-`search_knowledge_base`; each `<document>` inside is in excerpt view and every
-passage is prefixed with an `[n]` citation label.
+Knowledge-base passages are no longer injected here directly: delegate to the
+`knowledge_base` specialist via `task`, which runs the hybrid search/read and
+returns a grounded summary already carrying `[n]` citation labels for you to
+carry through.

-If a block doesn't appear this turn, work from the conversation alone.
+If no grounding arrives this turn, work from the conversation alone.
 </dynamic_context>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
@ -1,16 +1,18 @@
 <knowledge_base_first>
 CRITICAL — ground factual answers in what you actually receive this turn:
- the user's knowledge base via `search_knowledge_base` (your PRIMARY source
-  for anything about their documents, notes, or connected data — the
-  `<workspace_tree>` only lists what exists, so call the tool to read the
-  actual content before answering),
+- the user's knowledge base via `task(knowledge_base, ...)` (your PRIMARY
+  source for anything about their documents, notes, or connected data — the
+  `<workspace_tree>` only lists what exists, so delegate to the specialist to
+  search and read the actual content before answering),
 - injected workspace context (see `<dynamic_context>`),
 - results from your other tool calls (`web_search`, `scrape_webpage`),
 - or substantive summaries returned by a `task` specialist you invoked.

-For questions about the user's own workspace, call `search_knowledge_base`
-first rather than answering from the tree or from memory. Use
-`task(knowledge_base)` when you need a document's full text or deeper reads.
+For questions about the user's own workspace, dispatch
+`task(knowledge_base, ...)` first rather than answering from the tree or from
+memory. The knowledge_base specialist runs hybrid semantic/keyword search and
+full-document reads, then returns a grounded summary with `[n]` citation
+labels for you to carry through into your answer.

 Do **not** answer factual or informational questions from general knowledge
 unless the user explicitly authorises it after you say you couldn't find
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
@ -1,19 +0,0 @@
- `search_knowledge_base` — Search the user's own knowledge base (their
-  indexed documents, notes, files, and connected sources) with hybrid
-  semantic + keyword retrieval.
-  - This is your PRIMARY way to ground factual answers about the user's
-    workspace. The `<workspace_tree>` shows what files exist; this tool pulls
-    the actual relevant content. Call it BEFORE answering any question about
-    the user's documents, notes, or connected data — don't answer from the
-    tree alone or from memory.
-  - Each hit returns the document's virtual path, a relevance score, and the
-    matched snippets. The snippets are often enough to answer directly with a
-    citation.
-  - When you need a document's full text (not just snippets), delegate a read
-    to the `knowledge_base` specialist via `task`, passing the path from the
-    results.
-  - Args: `query` (focused; include concrete entities, acronyms, people,
-    projects, or terms), `top_k` (default 5, max 20).
-  - If nothing relevant comes back, tell the user you couldn't find it in
-    their workspace before offering to search the web or answer from general
-    knowledge.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
@ -1,13 +0,0 @@
-<example>
-user: "What did our Q3 planning doc say about hiring?"
-→ search_knowledge_base(query="Q3 planning hiring headcount plan")
-(Answer from the returned snippets with a citation; if you need the full
-document, task the knowledge_base specialist with the returned path.)
-</example>
-
-<example>
-user: "Summarize my notes on the Acme migration."
-→ search_knowledge_base(query="Acme migration notes")
-→ task(subagent_type="knowledge_base", description="Read <path> and return a
-detailed summary of the Acme migration plan, risks, and timeline.")
-</example>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag
 from __future__ import annotations

 MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
-    "search_knowledge_base",
    "web_search",
    "scrape_webpage",
    "update_memory",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
@ -25,7 +25,6 @@ from app.agents.chat.shared.tools.web_search import create_web_search_tool
 from app.db import ChatVisibility

 from .scrape_webpage import create_scrape_webpage_tool
-from .search_knowledge_base import create_search_knowledge_base_tool
 from .update_memory import (
    create_update_memory_tool,
    create_update_team_memory_tool,
@ -36,14 +35,6 @@ def _build_scrape_webpage_tool(deps: dict[str, Any]) -> BaseTool:
    return create_scrape_webpage_tool(firecrawl_api_key=deps.get("firecrawl_api_key"))


-def _build_search_knowledge_base_tool(deps: dict[str, Any]) -> BaseTool:
-    return create_search_knowledge_base_tool(
-        search_space_id=deps["search_space_id"],
-        available_connectors=deps.get("available_connectors"),
-        available_document_types=deps.get("available_document_types"),
-    )
-
-
 def _build_web_search_tool(deps: dict[str, Any]) -> BaseTool:
    return create_web_search_tool(
        search_space_id=deps.get("search_space_id"),
@ -85,10 +76,6 @@ def _build_update_memory_tool(deps: dict[str, Any]) -> BaseTool:
 _MAIN_AGENT_TOOL_FACTORIES: dict[
    str, tuple[Callable[[dict[str, Any]], BaseTool], tuple[str, ...]]
 ] = {
-    "search_knowledge_base": (
-        _build_search_knowledge_base_tool,
-        ("search_space_id",),
-    ),
    "scrape_webpage": (_build_scrape_webpage_tool, ()),
    "web_search": (_build_web_search_tool, ()),
    "create_automation": (
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
@ -2,13 +2,13 @@

 from __future__ import annotations

-from enum import Enum
+from enum import StrEnum
 from typing import Any

 from pydantic import BaseModel, Field


-class CitationSourceType(str, Enum):
+class CitationSourceType(StrEnum):
    """Source kind of a citable unit; the value is the stable wire/dedup form."""

    KB_CHUNK = "kb_chunk"
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
@ -33,9 +33,7 @@ def render_search_context(
    blocks = [
        block
        for document in documents
-        if (
-            block := render_document(document, view="excerpt", registry=registry)
-        )
+        if (block := render_document(document, view="excerpt", registry=registry))
        is not None
    ]
    if not blocks:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
@ -34,21 +34,13 @@ def render_web_results(
    blocks = [
        block
        for document in documents
-        if (
-            block := render_document(document, view="excerpt", registry=registry)
-        )
+        if (block := render_document(document, view="excerpt", registry=registry))
        is not None
    ]
    if not blocks:
        return None

-    return (
-        "<web_results>\n"
-        + _HEADER
-        + "\n"
-        + "\n".join(blocks)
-        + "\n</web_results>"
-    )
+    return "<web_results>\n" + _HEADER + "\n" + "\n".join(blocks) + "\n</web_results>"


 __all__ = ["render_web_results"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
@ -2,11 +2,48 @@

 from __future__ import annotations

+from typing import TYPE_CHECKING, Any
+
 from langchain.agents.middleware import TodoListMiddleware

+if TYPE_CHECKING:
+    from collections.abc import Awaitable, Callable
+
+
+class _ToolOnlyTodoListMiddleware(TodoListMiddleware):  # type: ignore[type-arg]
+    """``TodoListMiddleware`` that exposes the ``write_todos`` tool but appends
+    no todo system prompt.
+
+    Upstream ``TodoListMiddleware.(a)wrap_model_call`` *always* appends a system
+    text block of ``f"\\n\\n{self.system_prompt}"``. With an empty
+    ``system_prompt`` that block is whitespace-only (``"\\n\\n"``), which
+    Anthropic rejects with ``"system: text content blocks must contain
+    non-whitespace text"`` (OpenAI silently tolerates it). The main agent
+    already documents todo usage in its own system prompt, so we skip the append
+    entirely and let the request through unchanged.
+    """
+
+    def wrap_model_call(self, request: Any, handler: Callable[[Any], Any]) -> Any:
+        return handler(request)
+
+    async def awrap_model_call(
+        self, request: Any, handler: Callable[[Any], Awaitable[Any]]
+    ) -> Any:
+        return await handler(request)
+

 def build_todos_mw(*, system_prompt: str | None = None) -> TodoListMiddleware:
-    """Pass ``system_prompt=""`` to suppress the upstream prompt append. We use a custom system prompt in the main agent."""
+    """Build a todo-list middleware.
+
+    - ``system_prompt=None``: use the upstream default todo system prompt.
+    - ``system_prompt=""`` (or whitespace): contribute the ``write_todos`` tool
+      without appending any todo system prompt. The main agent supplies its own
+      todo guidance, and this avoids emitting a whitespace-only system block that
+      Anthropic rejects.
+    - otherwise: append the given custom todo system prompt.
+    """
    if system_prompt is None:
        return TodoListMiddleware()
+    if not system_prompt.strip():
+        return _ToolOnlyTodoListMiddleware()
    return TodoListMiddleware(system_prompt=system_prompt)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@ -162,6 +162,20 @@ class SurfSenseFilesystemState(FilesystemState):
    normalizer. Merges (union, find-or-create) so parallel/subagent registrations
    stay globally consistent instead of clobbering each other."""

+    mentioned_document_ids: NotRequired[Annotated[list[int], _replace_reducer]]
+    """``@``-mentioned ``Document.id`` pins for this turn.
+
+    Sourced from the per-invocation ``runtime.context`` on the main graph and
+    forwarded into subagent state by the ``task`` tool (subagents are not
+    compiled with a ``context_schema``). Read by ``search_knowledge_base`` to
+    confine retrieval to the pinned documents."""
+
+    mentioned_folder_ids: NotRequired[Annotated[list[int], _replace_reducer]]
+    """``@``-mentioned ``Folder.id`` pins for this turn.
+
+    Same provenance as :data:`mentioned_document_ids`; expanded to the folder's
+    documents by ``search_knowledge_base`` to scope retrieval."""
+
    tree_version: NotRequired[Annotated[int, _replace_reducer]]
    """Monotonically increasing counter; bumped when commits change the KB tree."""

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
@ -20,6 +20,7 @@ from app.agents.chat.multi_agent_chat.subagents.shared.spec import SurfSenseSuba
 from .middleware_stack import build_kb_middleware
 from .prompts import load_description, load_readonly_system_prompt, load_system_prompt
 from .tools.index import DESTRUCTIVE_FS_OPS
+from .tools.search_knowledge_base import create_search_knowledge_base_tool

 NAME = "knowledge_base"
 READONLY_NAME = "knowledge_base_readonly"
@ -32,6 +33,15 @@ KB_RULESET = Ruleset(
 _KB_READONLY_RULESET = Ruleset(origin=READONLY_NAME, rules=[])


+def _build_search_knowledge_base_tool(dependencies: dict[str, Any]) -> BaseTool:
+    """Construct the hybrid-RAG ``search_knowledge_base`` tool from shared deps."""
+    return create_search_knowledge_base_tool(
+        search_space_id=dependencies["search_space_id"],
+        available_connectors=dependencies.get("available_connectors"),
+        available_document_types=dependencies.get("available_document_types"),
+    )
+
+
 def build_subagent(
    *,
    dependencies: dict[str, Any],
@ -49,7 +59,7 @@ def build_subagent(
            "description": load_description(),
            "system_prompt": load_system_prompt(filesystem_mode),
            "model": llm,
-            "tools": [],
+            "tools": [_build_search_knowledge_base_tool(dependencies)],
            "middleware": build_kb_middleware(
                llm=llm,
                dependencies=dependencies,
@ -78,7 +88,7 @@ def build_readonly_subagent(
            "description": "Read-only knowledge_base specialist (invoked via ask_knowledge_base).",
            "system_prompt": load_readonly_system_prompt(filesystem_mode),
            "model": llm,
-            "tools": [],
+            "tools": [_build_search_knowledge_base_tool(dependencies)],
            "middleware": build_kb_middleware(
                llm=llm,
                dependencies=dependencies,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
@ -35,8 +35,21 @@ def _wrap_result(result: dict, tool_call_id: str) -> Command:
            "expected at least one assistant message."
        )
    last_text = (getattr(messages[-1], "text", None) or "").rstrip()
+    # Carry reducer-backed state (notably citation_registry, populated by the
+    # read-only graph's search_knowledge_base call) back up to the caller so
+    # [n] labels emitted via ask_knowledge_base resolve at turn end. Drop
+    # ``messages`` — we synthesize our own ToolMessage — and anything the
+    # subagent boundary excludes.
+    forwarded_state = {
+        k: v
+        for k, v in result.items()
+        if k not in EXCLUDED_STATE_KEYS and k != "messages"
+    }
    return Command(
-        update={"messages": [ToolMessage(last_text, tool_call_id=tool_call_id)]}
+        update={
+            **forwarded_state,
+            "messages": [ToolMessage(last_text, tool_call_id=tool_call_id)],
+        }
    )


--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@ -10,6 +10,15 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor
  2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
  3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.

+## Searching vs. reading
+
+You have two complementary ways to pull workspace content:
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval across the whole indexed knowledge base (documents, files, and connector content), not just `/documents/`. Use it FIRST for any open-ended factual/informational question ("what did we decide about pricing?", "summarise our onboarding process") where you need the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file`** — full text of one specific document you have already located by path. Use it when you need the complete document body (to edit it, or to quote at length) rather than top matches.
+
+A common flow is `search_knowledge_base` to find the relevant passages and their source documents, then `read_file` on the winning path when you need the full body. Honor any `@`-mention pins automatically applied to the search scope.
+
 For writes (where you choose the path yourself):

 - **Discover the user's existing conventions before inventing a path.** Scan `<workspace_tree>` for folders that already hold similar content (e.g. an existing `/documents/meetings/` with dated standup notes, or `/documents/projects/<name>/`). When a convention exists, follow it. Use `ls`, `glob`, or `grep` to look closer when the tree is truncated.
@ -36,11 +45,11 @@ You construct the structured `evidence` fields from your own knowledge of what y

 ## Citations in your prose

-When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+Both `read_file` and `search_knowledge_base` return passages prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.

-### Where the labels live in `read_file` output
+### Where the labels live

-A KB document reads back like this — only the bracketed `[n]` is a citation label:
+`read_file` returns a KB-indexed `/documents/` file as a `<document … view="full">` block; `search_knowledge_base` returns a `<retrieved_context>` block of the top-matching passages. In both, only the bracketed `[n]` is a citation label:

 ```
 <document title="Q2 Roadmap" source="File" view="full">
@ -49,10 +58,18 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 </document>
 ```

+```
+<retrieved_context>
+  <document title="Pricing notes" source="File">
+    [7] We agreed on usage-based pricing …
+  </document>
+</retrieved_context>
+```
+
 ### Rules

 - Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` or `search_knowledge_base` output you are summarising this turn. If you can't see it, omit the citation.
 - Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
 - Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
@ -126,7 +143,7 @@ Return **only** one JSON object (no markdown or prose outside it):
  "status": "success" | "partial" | "blocked" | "error",
  "action_summary": string,
  "evidence": {
-    "operation": "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
+    "operation": "search_knowledge_base" | "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@ -11,6 +11,15 @@ You are the SurfSense workspace specialist for the user's local folders.
  3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.

+## Searching the indexed knowledge base vs. reading local files
+
+Two complementary content sources:
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval over the user's *indexed* knowledge base (documents and connector content), which is separate from the local folders your FS tools read. Use it FIRST for open-ended factual/informational questions where you want the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file` / `ls` / `glob` / `grep`** — operate on the user's *local* folders. Use these to locate and read on-disk files by path.
+
+These are different stores: `search_knowledge_base` will not surface arbitrary local files, and the FS tools do not see indexed-only content. Pick the source the request points at (or use both when helpful).
+
 For writes (where you choose the path yourself):

 - **Discover the user's existing conventions before inventing a path.** Inspect the relevant mount's folder layout via `ls` / `list_tree` and look for folders that already hold similar content (e.g. an existing `/notes/meetings/` with dated standup files, or `/projects/<name>/`). When a convention exists, follow it.
@ -32,11 +41,13 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.

-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (See "Citations in your prose" below for when `citations` is populated.)

 ## Citations in your prose

-In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
+Your **filesystem** tools read local files only, which are not KB-indexed and carry no `[n]` citation labels: when a fact comes from a local-file read, do not emit `[n]` or `[citation:…]` markers — the absolute path is the only reference.
+
+The **`search_knowledge_base`** tool is different: it queries the indexed knowledge base and returns a `<retrieved_context>` block whose passages each carry a bracketed `[n]` label. When a fact in your `action_summary` or `evidence.content_excerpt` came from a search passage, append its `[n]` exactly as shown and list those numbers in `evidence.citations`. Copy labels digit-for-digit; confirm the bracketed label appears in this turn's output before emitting it; write the bare `[n]` only (no `[citation:…]` wrapper, markdown links, or ranges). Stack multiple as `[3][4]`. Leave `evidence.citations` `null` when you only touched local files.

 ## Examples

@ -104,7 +115,7 @@ Return **only** one JSON object (no markdown or prose outside it):
  "status": "success" | "partial" | "blocked" | "error",
  "action_summary": string,
  "evidence": {
-    "operation": "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
+    "operation": "search_knowledge_base" | "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@ -11,6 +11,11 @@ The caller's question often references documents by description (`"my meeting no

 If a precise path was already given, use it directly — skip the lookup.

+## Searching vs. reading
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval across the whole indexed knowledge base. Use it FIRST for open-ended factual questions where you want the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file`** — full text of one document you have already located by path. Use it when you need the complete body.
+
 ## Interpreting tool results

 - **Success** — file content (for `read_file`) or a listing (for `ls` / `glob` / `grep` / `list_tree`).
@ -29,11 +34,11 @@ Reply in plain prose:

 ## Citations

-When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+Both `read_file` and `search_knowledge_base` return passages prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.

-### Where the labels live in `read_file` output
+### Where the labels live

-A KB document reads back like this — only the bracketed `[n]` is a citation label:
+`read_file` returns a KB-indexed `/documents/` file as a `<document … view="full">` block; `search_knowledge_base` returns a `<retrieved_context>` block of top-matching passages. In both, only the bracketed `[n]` is a citation label:

 ```
 <document title="Q2 Roadmap" source="File" view="full">
@ -42,10 +47,18 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 </document>
 ```

+```
+<retrieved_context>
+  <document title="Pricing notes" source="File">
+    [7] We agreed on usage-based pricing …
+  </document>
+</retrieved_context>
+```
+
 ### Rules

 - Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` or `search_knowledge_base` output you are summarising this turn. If you can't see it, omit the citation.
 - Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
 - Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@ -12,6 +12,13 @@ The caller's question often references files by description (`"my meeting notes

 If a precise path was already given, use it directly — skip the lookup.

+## Searching the indexed knowledge base vs. reading local files
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval over the user's *indexed* knowledge base (separate from the local folders your FS tools read). Use it FIRST for open-ended factual questions where you want the most relevant passages. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file` / `ls` / `glob` / `grep`** — operate on the user's *local* folders.
+
+These are different stores; pick the source the request points at (or use both when helpful).
+
 ## Interpreting tool results

 - **Success** — file content (for `read_file`) or a listing (for `ls` / `glob` / `grep` / `list_tree`).
@ -30,4 +37,6 @@ Reply in plain prose:

 ## Citations

-In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
+Your **filesystem** tools read local files only, which are not KB-indexed and carry no `[n]` citation labels: cite local-file claims with the absolute path and do not emit `[n]` or `[citation:…]` markers for them.
+
+The **`search_knowledge_base`** tool is different: it queries the indexed knowledge base and returns a `<retrieved_context>` block whose passages each carry a bracketed `[n]` label. When a claim came from a search passage, append its `[n]` exactly as shown (copy digit-for-digit; confirm it appears in this turn's output; bare `[n]` only, stack as `[3][4]`, never ranges). The caller relays these verbatim and the server resolves them.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
@ -1,11 +1,12 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
+"""On-demand ``search_knowledge_base`` knowledge_base-subagent tool (citation-spine RAG).

-The main agent calls this when it decides it needs knowledge-base content. The
-tool runs one hybrid search, renders the matched passages as a
-``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
-labels, and persists the conversation's ``CitationRegistry`` onto graph state so
-the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
-turn.
+The knowledge_base subagent calls this when it needs hybrid semantic + keyword
+retrieval over the user's indexed knowledge base. The tool runs one hybrid
+search, renders the matched passages as a ``<retrieved_context>`` block whose
+passages carry server-assigned ``[n]`` labels, and persists the conversation's
+``CitationRegistry`` onto graph state so the ``[n]`` -> ``[citation:<payload>]``
+normalizer can resolve them after the turn. The registry merges across the
+subagent boundary (reducer-backed, forwarded by ``task``/``ask_knowledge_base``).
 """

 from __future__ import annotations
@ -62,6 +63,29 @@ def _search_types(
    return tuple(sorted(types)) or None


+def _resolve_mention_pins(
+    runtime: ToolRuntime[None, SurfSenseFilesystemState],
+) -> tuple[list[int] | None, list[int] | None]:
+    """Read the turn's ``@``-mention pins, preferring state over context.
+
+    On a subagent graph the pins arrive via forwarded **state** (the ``task``
+    tool copies them off the main ``runtime.context`` since subagents have no
+    ``context_schema``). On the main graph — or any future direct invocation
+    with ``context=`` — they arrive via ``runtime.context``. State wins when
+    both are present; context is the fallback.
+    """
+    state = getattr(runtime, "state", None) or {}
+    document_ids = state.get("mentioned_document_ids")
+    folder_ids = state.get("mentioned_folder_ids")
+    if document_ids or folder_ids:
+        return document_ids or None, folder_ids or None
+    ctx = getattr(runtime, "context", None)
+    return (
+        getattr(ctx, "mentioned_document_ids", None),
+        getattr(ctx, "mentioned_folder_ids", None),
+    )
+
+
 async def _build_search_scope(
    session: AsyncSession,
    *,
@ -70,12 +94,12 @@ async def _build_search_scope(
    runtime: ToolRuntime[None, SurfSenseFilesystemState],
 ) -> SearchScope:
    """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
-    ctx = getattr(runtime, "context", None)
+    mentioned_document_ids, mentioned_folder_ids = _resolve_mention_pins(runtime)
    document_ids = await referenced_document_ids(
        session,
        search_space_id=search_space_id,
-        document_ids=getattr(ctx, "mentioned_document_ids", None),
-        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
+        document_ids=mentioned_document_ids,
+        folder_ids=mentioned_folder_ids,
    )
    return SearchScope(
        document_types=document_types,
--- a/surfsense_backend/app/agents/chat/runtime/references/connectors.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
@ -53,9 +53,7 @@ async def resolve_connector_references(
    )
    accessible = {row.id: row for row in rows.all()}

-    chip_by_id = {
-        chip.id: chip for chip in (chips or []) if chip.kind == "connector"
-    }
+    chip_by_id = {chip.id: chip for chip in (chips or []) if chip.kind == "connector"}

    references: list[ConnectorReference] = []
    for connector_id in dict.fromkeys(connector_ids):
--- a/surfsense_backend/app/agents/chat/runtime/references/models.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/models.py
@ -8,11 +8,11 @@ a class-level discriminator used by the renderer and scope builder.
 from __future__ import annotations

 from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
 from typing import ClassVar


-class ReferenceKind(str, Enum):
+class ReferenceKind(StrEnum):
    """What the user pointed at; the value is the label shown to the model."""

    DOCUMENT = "document"
--- a/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
@ -33,9 +33,7 @@ def render_reference_pointers(references: list[Reference]) -> str | None:
    lines = [_render_pointer(reference) for reference in references]
    return (
        "<referenced_this_turn>\n"
-        f"{_HEADER}\n"
-        + "\n".join(lines)
-        + "\n</referenced_this_turn>"
+        f"{_HEADER}\n" + "\n".join(lines) + "\n</referenced_this_turn>"
    )