From 9642d7ced005cd4673947a05f4dcff747076eee1 Mon Sep 17 00:00:00 2001
From: "DESKTOP-RTLN3BA\\$punk" <vermarohanfinal@gmail.com>
Date: Thu, 25 Jun 2026 20:19:44 -0700
Subject: [PATCH] feat: antropic model added fix & kb tooling fixes

- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent.
- Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state.
- Revised system prompts to reflect the new retrieval method and ensure proper citation handling.
- Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool.
- Enhanced documentation to guide usage of the new retrieval approach and citation practices.
---
 .../task_tool.py                              |  23 ++++
 .../main_agent/middleware/stack.py            |  13 +-
 .../system_prompt/prompts/citations/on.md     |   9 +-
 .../prompts/dynamic_context/private.md        |   9 +-
 .../prompts/dynamic_context/team.md           |   9 +-
 .../system_prompt/prompts/kb_first.md         |  16 ++-
 .../search_knowledge_base/description.md      |  19 ---
 .../tools/search_knowledge_base/example.md    |  13 --
 .../main_agent/tools/index.py                 |   1 -
 .../main_agent/tools/registry.py              |  13 --
 .../shared/citations/models.py                |   4 +-
 .../shared/document_render/search_context.py  |   4 +-
 .../shared/document_render/web_results.py     |  12 +-
 .../shared/middleware/todos.py                |  39 +++++-
 .../shared/state/filesystem_state.py          |  14 ++
 .../builtins/knowledge_base/agent.py          |  14 +-
 .../knowledge_base/ask_knowledge_base_tool.py |  15 ++-
 .../knowledge_base/system_prompt_cloud.md     |  27 +++-
 .../knowledge_base/system_prompt_desktop.md   |  17 ++-
 .../system_prompt_readonly_cloud.md           |  21 ++-
 .../system_prompt_readonly_desktop.md         |  11 +-
 .../tools/search_knowledge_base.py            |  44 ++++--
 .../chat/runtime/references/connectors.py     |   4 +-
 .../agents/chat/runtime/references/models.py  |   4 +-
 .../runtime/references/reference_pointers.py  |   4 +-
 .../app/services/llm_router_service.py        |   5 +-
 .../app/services/model_resolver.py            |  29 +++-
 .../tools/test_search_knowledge_base.py       | 125 ++++++++++++++++--
 .../test_transcript.py                        |   6 +-
 .../chat/runtime/test_llm_config_sanitizer.py |  17 ++-
 .../middleware/shared/test_todos_mw.py        |  67 ++++++++++
 .../document_render/test_web_results.py       |   4 +-
 .../shared/retrieval/test_adapter.py          |   7 +-
 .../shared/retrieval/test_service.py          |   6 +-
 .../unit/services/test_model_connections.py   |  46 ++++++-
 .../components/providers/ZeroProvider.tsx     |  78 ++++++++---
 36 files changed, 581 insertions(+), 168 deletions(-)
 delete mode 100644 surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
 delete mode 100644 surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
 rename surfsense_backend/app/agents/chat/multi_agent_chat/{main_agent => subagents/builtins/knowledge_base}/tools/search_knowledge_base.py (74%)
 rename surfsense_backend/tests/integration/agents/multi_agent_chat/{main_agent => subagents/builtins/knowledge_base}/tools/test_search_knowledge_base.py (63%)
 create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py

diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
index 644d3ef82..6698211f7 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
@@ -343,6 +343,28 @@ def build_task_tool_with_parent_config(
         cleaned = hint.strip()
         return cleaned or None
 
+    def _forward_mention_pins(subagent_state: dict, runtime: ToolRuntime) -> None:
+        """Carry the turn's ``@``-mention pins from main context into subagent state.
+
+        Subagents are compiled without a ``context_schema`` and invoked without
+        ``context=``, so ``runtime.context`` (which holds the ``@``-mentioned
+        document/folder ids) does not reach them. The ``task`` tool runs in the
+        main runtime, which *does* have the context, so we copy the pins into the
+        forwarded state where ``search_knowledge_base`` reads them. Only set keys
+        when present so we never clobber pins already on state (e.g. nested
+        ``ask_knowledge_base`` re-entry).
+        """
+        ctx = getattr(runtime, "context", None)
+        if ctx is None:
+            return
+        for state_key, ctx_attr in (
+            ("mentioned_document_ids", "mentioned_document_ids"),
+            ("mentioned_folder_ids", "mentioned_folder_ids"),
+        ):
+            value = getattr(ctx, ctx_attr, None)
+            if value:
+                subagent_state[state_key] = list(value)
+
     def _validate_and_prepare_state(
         subagent_type: str, description: str, runtime: ToolRuntime
     ) -> tuple[Runnable, dict]:
@@ -350,6 +372,7 @@ def build_task_tool_with_parent_config(
         subagent_state = {
             k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
         }
+        _forward_mention_pins(subagent_state, runtime)
         hint = _resolve_context_hint(subagent_type, description, runtime)
         if hint:
             # Tagged block so the subagent prompt can pattern-match the section.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
index d766367de..83053954b 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@@ -1,11 +1,12 @@
 """Main-agent middleware list assembly: one line per slot.
 
-The main agent is a pure router — filesystem reads/writes are owned by the
-``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
-retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
-search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
-labels. The stack here computes the workspace tree, commits any subagent-side
-staged writes at end of turn (cloud mode), and wires the supporting middleware.
+The main agent is a pure router — both filesystem reads/writes AND knowledge-base
+retrieval are owned by the ``knowledge_base`` subagent and reached via the
+``task`` tool. That subagent runs the hybrid ``search_knowledge_base`` (rendering
+``<retrieved_context>`` with ``[n]`` citation labels) and the FS tools on demand;
+the main agent only sees the specialist's grounded summary. The stack here
+computes the workspace tree, commits any subagent-side staged writes at end of
+turn (cloud mode), and wires the supporting middleware.
 """
 
 from __future__ import annotations
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
index a42873fcb..a7c8f39b9 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@@ -1,9 +1,10 @@
 <citations>
 Cite with one token: the bracket label `[n]`. Every citable result —
-`search_knowledge_base` passages, `web_search` results, and prose from a
-`task` knowledge_base/research specialist — already carries `[n]` labels on a
-single shared count. Those labels are the only citation you write; the server
-resolves each one back to its source after the turn.
+`web_search` results and prose from a `task` knowledge_base/research
+specialist (including the knowledge_base specialist's `[n]`-labelled
+workspace findings) — already carries `[n]` labels on a single shared count.
+Those labels are the only citation you write; the server resolves each one
+back to its source after the turn.
 
 1. Put the label right after the claim it supports.
 2. Several sources for one claim: stack brackets, `[1][2]`.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
index 6c47b03a9..07d5b56ee 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
@@ -13,9 +13,10 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap",
 "last week's meeting notes") into concrete document references before
 delegating to a specialist.
 
-`<retrieved_context>` blocks hold knowledge-base passages from
-`search_knowledge_base`; each `<document>` inside is in excerpt view and every
-passage is prefixed with an `[n]` citation label.
+Knowledge-base passages are no longer injected here directly: delegate to the
+`knowledge_base` specialist via `task`, which runs the hybrid search/read and
+returns a grounded summary already carrying `[n]` citation labels for you to
+carry through.
 
-If a block doesn't appear this turn, work from the conversation alone.
+If no grounding arrives this turn, work from the conversation alone.
 </dynamic_context>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
index fcce98fd0..ee4290774 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
@@ -12,9 +12,10 @@ it to resolve paths described in natural language ("the Q2 roadmap", "last
 week's planning notes") into concrete document references before delegating
 to a specialist.
 
-`<retrieved_context>` blocks hold knowledge-base passages from
-`search_knowledge_base`; each `<document>` inside is in excerpt view and every
-passage is prefixed with an `[n]` citation label.
+Knowledge-base passages are no longer injected here directly: delegate to the
+`knowledge_base` specialist via `task`, which runs the hybrid search/read and
+returns a grounded summary already carrying `[n]` citation labels for you to
+carry through.
 
-If a block doesn't appear this turn, work from the conversation alone.
+If no grounding arrives this turn, work from the conversation alone.
 </dynamic_context>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
index 065b72983..9a35a8e55 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
@@ -1,16 +1,18 @@
 <knowledge_base_first>
 CRITICAL — ground factual answers in what you actually receive this turn:
-- the user's knowledge base via `search_knowledge_base` (your PRIMARY source
-  for anything about their documents, notes, or connected data — the
-  `<workspace_tree>` only lists what exists, so call the tool to read the
-  actual content before answering),
+- the user's knowledge base via `task(knowledge_base, ...)` (your PRIMARY
+  source for anything about their documents, notes, or connected data — the
+  `<workspace_tree>` only lists what exists, so delegate to the specialist to
+  search and read the actual content before answering),
 - injected workspace context (see `<dynamic_context>`),
 - results from your other tool calls (`web_search`, `scrape_webpage`),
 - or substantive summaries returned by a `task` specialist you invoked.
 
-For questions about the user's own workspace, call `search_knowledge_base`
-first rather than answering from the tree or from memory. Use
-`task(knowledge_base)` when you need a document's full text or deeper reads.
+For questions about the user's own workspace, dispatch
+`task(knowledge_base, ...)` first rather than answering from the tree or from
+memory. The knowledge_base specialist runs hybrid semantic/keyword search and
+full-document reads, then returns a grounded summary with `[n]` citation
+labels for you to carry through into your answer.
 
 Do **not** answer factual or informational questions from general knowledge
 unless the user explicitly authorises it after you say you couldn't find
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
deleted file mode 100644
index a4854dfff..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
+++ /dev/null
@@ -1,19 +0,0 @@
-- `search_knowledge_base` — Search the user's own knowledge base (their
-  indexed documents, notes, files, and connected sources) with hybrid
-  semantic + keyword retrieval.
-  - This is your PRIMARY way to ground factual answers about the user's
-    workspace. The `<workspace_tree>` shows what files exist; this tool pulls
-    the actual relevant content. Call it BEFORE answering any question about
-    the user's documents, notes, or connected data — don't answer from the
-    tree alone or from memory.
-  - Each hit returns the document's virtual path, a relevance score, and the
-    matched snippets. The snippets are often enough to answer directly with a
-    citation.
-  - When you need a document's full text (not just snippets), delegate a read
-    to the `knowledge_base` specialist via `task`, passing the path from the
-    results.
-  - Args: `query` (focused; include concrete entities, acronyms, people,
-    projects, or terms), `top_k` (default 5, max 20).
-  - If nothing relevant comes back, tell the user you couldn't find it in
-    their workspace before offering to search the web or answer from general
-    knowledge.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
deleted file mode 100644
index 2d9ec61eb..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
+++ /dev/null
@@ -1,13 +0,0 @@
-<example>
-user: "What did our Q3 planning doc say about hiring?"
-→ search_knowledge_base(query="Q3 planning hiring headcount plan")
-(Answer from the returned snippets with a citation; if you need the full
-document, task the knowledge_base specialist with the returned path.)
-</example>
-
-<example>
-user: "Summarize my notes on the Acme migration."
-→ search_knowledge_base(query="Acme migration notes")
-→ task(subagent_type="knowledge_base", description="Read <path> and return a
-detailed summary of the Acme migration plan, risks, and timeline.")
-</example>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
index 40c6f08de..70fb42c0d 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
@@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag
 from __future__ import annotations
 
 MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
-    "search_knowledge_base",
     "web_search",
     "scrape_webpage",
     "update_memory",
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
index 5e7c2d5d6..bdfa67c79 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
@@ -25,7 +25,6 @@ from app.agents.chat.shared.tools.web_search import create_web_search_tool
 from app.db import ChatVisibility
 
 from .scrape_webpage import create_scrape_webpage_tool
-from .search_knowledge_base import create_search_knowledge_base_tool
 from .update_memory import (
     create_update_memory_tool,
     create_update_team_memory_tool,
@@ -36,14 +35,6 @@ def _build_scrape_webpage_tool(deps: dict[str, Any]) -> BaseTool:
     return create_scrape_webpage_tool(firecrawl_api_key=deps.get("firecrawl_api_key"))
 
 
-def _build_search_knowledge_base_tool(deps: dict[str, Any]) -> BaseTool:
-    return create_search_knowledge_base_tool(
-        search_space_id=deps["search_space_id"],
-        available_connectors=deps.get("available_connectors"),
-        available_document_types=deps.get("available_document_types"),
-    )
-
-
 def _build_web_search_tool(deps: dict[str, Any]) -> BaseTool:
     return create_web_search_tool(
         search_space_id=deps.get("search_space_id"),
@@ -85,10 +76,6 @@ def _build_update_memory_tool(deps: dict[str, Any]) -> BaseTool:
 _MAIN_AGENT_TOOL_FACTORIES: dict[
     str, tuple[Callable[[dict[str, Any]], BaseTool], tuple[str, ...]]
 ] = {
-    "search_knowledge_base": (
-        _build_search_knowledge_base_tool,
-        ("search_space_id",),
-    ),
     "scrape_webpage": (_build_scrape_webpage_tool, ()),
     "web_search": (_build_web_search_tool, ()),
     "create_automation": (
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
index 5dccddc5c..1273271af 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
@@ -2,13 +2,13 @@
 
 from __future__ import annotations
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any
 
 from pydantic import BaseModel, Field
 
 
-class CitationSourceType(str, Enum):
+class CitationSourceType(StrEnum):
     """Source kind of a citable unit; the value is the stable wire/dedup form."""
 
     KB_CHUNK = "kb_chunk"
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
index 418a2142d..9ab475f0c 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
@@ -33,9 +33,7 @@ def render_search_context(
     blocks = [
         block
         for document in documents
-        if (
-            block := render_document(document, view="excerpt", registry=registry)
-        )
+        if (block := render_document(document, view="excerpt", registry=registry))
         is not None
     ]
     if not blocks:
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
index b310c7b3a..c0ea7e167 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
@@ -34,21 +34,13 @@ def render_web_results(
     blocks = [
         block
         for document in documents
-        if (
-            block := render_document(document, view="excerpt", registry=registry)
-        )
+        if (block := render_document(document, view="excerpt", registry=registry))
         is not None
     ]
     if not blocks:
         return None
 
-    return (
-        "<web_results>\n"
-        + _HEADER
-        + "\n"
-        + "\n".join(blocks)
-        + "\n</web_results>"
-    )
+    return "<web_results>\n" + _HEADER + "\n" + "\n".join(blocks) + "\n</web_results>"
 
 
 __all__ = ["render_web_results"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
index dac149627..0316d6e2d 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
@@ -2,11 +2,48 @@
 
 from __future__ import annotations
 
+from typing import TYPE_CHECKING, Any
+
 from langchain.agents.middleware import TodoListMiddleware
 
+if TYPE_CHECKING:
+    from collections.abc import Awaitable, Callable
+
+
+class _ToolOnlyTodoListMiddleware(TodoListMiddleware):  # type: ignore[type-arg]
+    """``TodoListMiddleware`` that exposes the ``write_todos`` tool but appends
+    no todo system prompt.
+
+    Upstream ``TodoListMiddleware.(a)wrap_model_call`` *always* appends a system
+    text block of ``f"\\n\\n{self.system_prompt}"``. With an empty
+    ``system_prompt`` that block is whitespace-only (``"\\n\\n"``), which
+    Anthropic rejects with ``"system: text content blocks must contain
+    non-whitespace text"`` (OpenAI silently tolerates it). The main agent
+    already documents todo usage in its own system prompt, so we skip the append
+    entirely and let the request through unchanged.
+    """
+
+    def wrap_model_call(self, request: Any, handler: Callable[[Any], Any]) -> Any:
+        return handler(request)
+
+    async def awrap_model_call(
+        self, request: Any, handler: Callable[[Any], Awaitable[Any]]
+    ) -> Any:
+        return await handler(request)
+
 
 def build_todos_mw(*, system_prompt: str | None = None) -> TodoListMiddleware:
-    """Pass ``system_prompt=""`` to suppress the upstream prompt append. We use a custom system prompt in the main agent."""
+    """Build a todo-list middleware.
+
+    - ``system_prompt=None``: use the upstream default todo system prompt.
+    - ``system_prompt=""`` (or whitespace): contribute the ``write_todos`` tool
+      without appending any todo system prompt. The main agent supplies its own
+      todo guidance, and this avoids emitting a whitespace-only system block that
+      Anthropic rejects.
+    - otherwise: append the given custom todo system prompt.
+    """
     if system_prompt is None:
         return TodoListMiddleware()
+    if not system_prompt.strip():
+        return _ToolOnlyTodoListMiddleware()
     return TodoListMiddleware(system_prompt=system_prompt)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
index b00670615..a82057759 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@@ -162,6 +162,20 @@ class SurfSenseFilesystemState(FilesystemState):
     normalizer. Merges (union, find-or-create) so parallel/subagent registrations
     stay globally consistent instead of clobbering each other."""
 
+    mentioned_document_ids: NotRequired[Annotated[list[int], _replace_reducer]]
+    """``@``-mentioned ``Document.id`` pins for this turn.
+
+    Sourced from the per-invocation ``runtime.context`` on the main graph and
+    forwarded into subagent state by the ``task`` tool (subagents are not
+    compiled with a ``context_schema``). Read by ``search_knowledge_base`` to
+    confine retrieval to the pinned documents."""
+
+    mentioned_folder_ids: NotRequired[Annotated[list[int], _replace_reducer]]
+    """``@``-mentioned ``Folder.id`` pins for this turn.
+
+    Same provenance as :data:`mentioned_document_ids`; expanded to the folder's
+    documents by ``search_knowledge_base`` to scope retrieval."""
+
     tree_version: NotRequired[Annotated[int, _replace_reducer]]
     """Monotonically increasing counter; bumped when commits change the KB tree."""
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
index 2720589ef..f193c2404 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
@@ -20,6 +20,7 @@ from app.agents.chat.multi_agent_chat.subagents.shared.spec import SurfSenseSuba
 from .middleware_stack import build_kb_middleware
 from .prompts import load_description, load_readonly_system_prompt, load_system_prompt
 from .tools.index import DESTRUCTIVE_FS_OPS
+from .tools.search_knowledge_base import create_search_knowledge_base_tool
 
 NAME = "knowledge_base"
 READONLY_NAME = "knowledge_base_readonly"
@@ -32,6 +33,15 @@ KB_RULESET = Ruleset(
 _KB_READONLY_RULESET = Ruleset(origin=READONLY_NAME, rules=[])
 
 
+def _build_search_knowledge_base_tool(dependencies: dict[str, Any]) -> BaseTool:
+    """Construct the hybrid-RAG ``search_knowledge_base`` tool from shared deps."""
+    return create_search_knowledge_base_tool(
+        search_space_id=dependencies["search_space_id"],
+        available_connectors=dependencies.get("available_connectors"),
+        available_document_types=dependencies.get("available_document_types"),
+    )
+
+
 def build_subagent(
     *,
     dependencies: dict[str, Any],
@@ -49,7 +59,7 @@ def build_subagent(
             "description": load_description(),
             "system_prompt": load_system_prompt(filesystem_mode),
             "model": llm,
-            "tools": [],
+            "tools": [_build_search_knowledge_base_tool(dependencies)],
             "middleware": build_kb_middleware(
                 llm=llm,
                 dependencies=dependencies,
@@ -78,7 +88,7 @@ def build_readonly_subagent(
             "description": "Read-only knowledge_base specialist (invoked via ask_knowledge_base).",
             "system_prompt": load_readonly_system_prompt(filesystem_mode),
             "model": llm,
-            "tools": [],
+            "tools": [_build_search_knowledge_base_tool(dependencies)],
             "middleware": build_kb_middleware(
                 llm=llm,
                 dependencies=dependencies,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
index 2c81ca7c2..8b728674f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
@@ -35,8 +35,21 @@ def _wrap_result(result: dict, tool_call_id: str) -> Command:
             "expected at least one assistant message."
         )
     last_text = (getattr(messages[-1], "text", None) or "").rstrip()
+    # Carry reducer-backed state (notably citation_registry, populated by the
+    # read-only graph's search_knowledge_base call) back up to the caller so
+    # [n] labels emitted via ask_knowledge_base resolve at turn end. Drop
+    # ``messages`` — we synthesize our own ToolMessage — and anything the
+    # subagent boundary excludes.
+    forwarded_state = {
+        k: v
+        for k, v in result.items()
+        if k not in EXCLUDED_STATE_KEYS and k != "messages"
+    }
     return Command(
-        update={"messages": [ToolMessage(last_text, tool_call_id=tool_call_id)]}
+        update={
+            **forwarded_state,
+            "messages": [ToolMessage(last_text, tool_call_id=tool_call_id)],
+        }
     )
 
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
index 04be2f321..27bb819f5 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@@ -10,6 +10,15 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor
   2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
   3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 
+## Searching vs. reading
+
+You have two complementary ways to pull workspace content:
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval across the whole indexed knowledge base (documents, files, and connector content), not just `/documents/`. Use it FIRST for any open-ended factual/informational question ("what did we decide about pricing?", "summarise our onboarding process") where you need the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file`** — full text of one specific document you have already located by path. Use it when you need the complete document body (to edit it, or to quote at length) rather than top matches.
+
+A common flow is `search_knowledge_base` to find the relevant passages and their source documents, then `read_file` on the winning path when you need the full body. Honor any `@`-mention pins automatically applied to the search scope.
+
 For writes (where you choose the path yourself):
 
 - **Discover the user's existing conventions before inventing a path.** Scan `<workspace_tree>` for folders that already hold similar content (e.g. an existing `/documents/meetings/` with dated standup notes, or `/documents/projects/<name>/`). When a convention exists, follow it. Use `ls`, `glob`, or `grep` to look closer when the tree is truncated.
@@ -36,11 +45,11 @@ You construct the structured `evidence` fields from your own knowledge of what y
 
 ## Citations in your prose
 
-When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+Both `read_file` and `search_knowledge_base` return passages prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
 
-### Where the labels live in `read_file` output
+### Where the labels live
 
-A KB document reads back like this — only the bracketed `[n]` is a citation label:
+`read_file` returns a KB-indexed `/documents/` file as a `<document … view="full">` block; `search_knowledge_base` returns a `<retrieved_context>` block of the top-matching passages. In both, only the bracketed `[n]` is a citation label:
 
 ```
 <document title="Q2 Roadmap" source="File" view="full">
@@ -49,10 +58,18 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 </document>
 ```
 
+```
+<retrieved_context>
+  <document title="Pricing notes" source="File">
+    [7] We agreed on usage-based pricing …
+  </document>
+</retrieved_context>
+```
+
 ### Rules
 
 - Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
-- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` or `search_knowledge_base` output you are summarising this turn. If you can't see it, omit the citation.
 - Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
 - Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
@@ -126,7 +143,7 @@ Return **only** one JSON object (no markdown or prose outside it):
   "status": "success" | "partial" | "blocked" | "error",
   "action_summary": string,
   "evidence": {
-    "operation": "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
+    "operation": "search_knowledge_base" | "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
     "path": string | null,
     "matched_candidates": [ { "id": string, "label": string } ] | null,
     "content_excerpt": string | null,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
index e0f368bb2..894c856fe 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@@ -11,6 +11,15 @@ You are the SurfSense workspace specialist for the user's local folders.
   3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
   4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 
+## Searching the indexed knowledge base vs. reading local files
+
+Two complementary content sources:
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval over the user's *indexed* knowledge base (documents and connector content), which is separate from the local folders your FS tools read. Use it FIRST for open-ended factual/informational questions where you want the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file` / `ls` / `glob` / `grep`** — operate on the user's *local* folders. Use these to locate and read on-disk files by path.
+
+These are different stores: `search_knowledge_base` will not surface arbitrary local files, and the FS tools do not see indexed-only content. Pick the source the request points at (or use both when helpful).
+
 For writes (where you choose the path yourself):
 
 - **Discover the user's existing conventions before inventing a path.** Inspect the relevant mount's folder layout via `ls` / `list_tree` and look for folders that already hold similar content (e.g. an existing `/notes/meetings/` with dated standup files, or `/projects/<name>/`). When a convention exists, follow it.
@@ -32,11 +41,13 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
 
-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (See "Citations in your prose" below for when `citations` is populated.)
 
 ## Citations in your prose
 
-In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
+Your **filesystem** tools read local files only, which are not KB-indexed and carry no `[n]` citation labels: when a fact comes from a local-file read, do not emit `[n]` or `[citation:…]` markers — the absolute path is the only reference.
+
+The **`search_knowledge_base`** tool is different: it queries the indexed knowledge base and returns a `<retrieved_context>` block whose passages each carry a bracketed `[n]` label. When a fact in your `action_summary` or `evidence.content_excerpt` came from a search passage, append its `[n]` exactly as shown and list those numbers in `evidence.citations`. Copy labels digit-for-digit; confirm the bracketed label appears in this turn's output before emitting it; write the bare `[n]` only (no `[citation:…]` wrapper, markdown links, or ranges). Stack multiple as `[3][4]`. Leave `evidence.citations` `null` when you only touched local files.
 
 ## Examples
 
@@ -104,7 +115,7 @@ Return **only** one JSON object (no markdown or prose outside it):
   "status": "success" | "partial" | "blocked" | "error",
   "action_summary": string,
   "evidence": {
-    "operation": "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
+    "operation": "search_knowledge_base" | "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
     "path": string | null,
     "matched_candidates": [ { "id": string, "label": string } ] | null,
     "content_excerpt": string | null,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
index 10dd0c763..6c3979e7f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@@ -11,6 +11,11 @@ The caller's question often references documents by description (`"my meeting no
 
 If a precise path was already given, use it directly — skip the lookup.
 
+## Searching vs. reading
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval across the whole indexed knowledge base. Use it FIRST for open-ended factual questions where you want the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file`** — full text of one document you have already located by path. Use it when you need the complete body.
+
 ## Interpreting tool results
 
 - **Success** — file content (for `read_file`) or a listing (for `ls` / `glob` / `grep` / `list_tree`).
@@ -29,11 +34,11 @@ Reply in plain prose:
 
 ## Citations
 
-When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+Both `read_file` and `search_knowledge_base` return passages prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
 
-### Where the labels live in `read_file` output
+### Where the labels live
 
-A KB document reads back like this — only the bracketed `[n]` is a citation label:
+`read_file` returns a KB-indexed `/documents/` file as a `<document … view="full">` block; `search_knowledge_base` returns a `<retrieved_context>` block of top-matching passages. In both, only the bracketed `[n]` is a citation label:
 
 ```
 <document title="Q2 Roadmap" source="File" view="full">
@@ -42,10 +47,18 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 </document>
 ```
 
+```
+<retrieved_context>
+  <document title="Pricing notes" source="File">
+    [7] We agreed on usage-based pricing …
+  </document>
+</retrieved_context>
+```
+
 ### Rules
 
 - Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
-- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` or `search_knowledge_base` output you are summarising this turn. If you can't see it, omit the citation.
 - Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
 - Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
index 6e11aea4f..f4edc39d4 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@@ -12,6 +12,13 @@ The caller's question often references files by description (`"my meeting notes
 
 If a precise path was already given, use it directly — skip the lookup.
 
+## Searching the indexed knowledge base vs. reading local files
+
+- **`search_knowledge_base`** — hybrid semantic + keyword retrieval over the user's *indexed* knowledge base (separate from the local folders your FS tools read). Use it FIRST for open-ended factual questions where you want the most relevant passages. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
+- **`read_file` / `ls` / `glob` / `grep`** — operate on the user's *local* folders.
+
+These are different stores; pick the source the request points at (or use both when helpful).
+
 ## Interpreting tool results
 
 - **Success** — file content (for `read_file`) or a listing (for `ls` / `glob` / `grep` / `list_tree`).
@@ -30,4 +37,6 @@ Reply in plain prose:
 
 ## Citations
 
-In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
+Your **filesystem** tools read local files only, which are not KB-indexed and carry no `[n]` citation labels: cite local-file claims with the absolute path and do not emit `[n]` or `[citation:…]` markers for them.
+
+The **`search_knowledge_base`** tool is different: it queries the indexed knowledge base and returns a `<retrieved_context>` block whose passages each carry a bracketed `[n]` label. When a claim came from a search passage, append its `[n]` exactly as shown (copy digit-for-digit; confirm it appears in this turn's output; bare `[n]` only, stack as `[3][4]`, never ranges). The caller relays these verbatim and the server resolves them.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
similarity index 74%
rename from surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
rename to surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
index 9c667c9fe..c6559adee 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
@@ -1,11 +1,12 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
+"""On-demand ``search_knowledge_base`` knowledge_base-subagent tool (citation-spine RAG).
 
-The main agent calls this when it decides it needs knowledge-base content. The
-tool runs one hybrid search, renders the matched passages as a
-``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
-labels, and persists the conversation's ``CitationRegistry`` onto graph state so
-the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
-turn.
+The knowledge_base subagent calls this when it needs hybrid semantic + keyword
+retrieval over the user's indexed knowledge base. The tool runs one hybrid
+search, renders the matched passages as a ``<retrieved_context>`` block whose
+passages carry server-assigned ``[n]`` labels, and persists the conversation's
+``CitationRegistry`` onto graph state so the ``[n]`` -> ``[citation:<payload>]``
+normalizer can resolve them after the turn. The registry merges across the
+subagent boundary (reducer-backed, forwarded by ``task``/``ask_knowledge_base``).
 """
 
 from __future__ import annotations
@@ -62,6 +63,29 @@ def _search_types(
     return tuple(sorted(types)) or None
 
 
+def _resolve_mention_pins(
+    runtime: ToolRuntime[None, SurfSenseFilesystemState],
+) -> tuple[list[int] | None, list[int] | None]:
+    """Read the turn's ``@``-mention pins, preferring state over context.
+
+    On a subagent graph the pins arrive via forwarded **state** (the ``task``
+    tool copies them off the main ``runtime.context`` since subagents have no
+    ``context_schema``). On the main graph — or any future direct invocation
+    with ``context=`` — they arrive via ``runtime.context``. State wins when
+    both are present; context is the fallback.
+    """
+    state = getattr(runtime, "state", None) or {}
+    document_ids = state.get("mentioned_document_ids")
+    folder_ids = state.get("mentioned_folder_ids")
+    if document_ids or folder_ids:
+        return document_ids or None, folder_ids or None
+    ctx = getattr(runtime, "context", None)
+    return (
+        getattr(ctx, "mentioned_document_ids", None),
+        getattr(ctx, "mentioned_folder_ids", None),
+    )
+
+
 async def _build_search_scope(
     session: AsyncSession,
     *,
@@ -70,12 +94,12 @@ async def _build_search_scope(
     runtime: ToolRuntime[None, SurfSenseFilesystemState],
 ) -> SearchScope:
     """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
-    ctx = getattr(runtime, "context", None)
+    mentioned_document_ids, mentioned_folder_ids = _resolve_mention_pins(runtime)
     document_ids = await referenced_document_ids(
         session,
         search_space_id=search_space_id,
-        document_ids=getattr(ctx, "mentioned_document_ids", None),
-        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
+        document_ids=mentioned_document_ids,
+        folder_ids=mentioned_folder_ids,
     )
     return SearchScope(
         document_types=document_types,
diff --git a/surfsense_backend/app/agents/chat/runtime/references/connectors.py b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
index 8d5f36133..ae2df15c3 100644
--- a/surfsense_backend/app/agents/chat/runtime/references/connectors.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
@@ -53,9 +53,7 @@ async def resolve_connector_references(
     )
     accessible = {row.id: row for row in rows.all()}
 
-    chip_by_id = {
-        chip.id: chip for chip in (chips or []) if chip.kind == "connector"
-    }
+    chip_by_id = {chip.id: chip for chip in (chips or []) if chip.kind == "connector"}
 
     references: list[ConnectorReference] = []
     for connector_id in dict.fromkeys(connector_ids):
diff --git a/surfsense_backend/app/agents/chat/runtime/references/models.py b/surfsense_backend/app/agents/chat/runtime/references/models.py
index 8ae151772..362f411f3 100644
--- a/surfsense_backend/app/agents/chat/runtime/references/models.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/models.py
@@ -8,11 +8,11 @@ a class-level discriminator used by the renderer and scope builder.
 from __future__ import annotations
 
 from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
 from typing import ClassVar
 
 
-class ReferenceKind(str, Enum):
+class ReferenceKind(StrEnum):
     """What the user pointed at; the value is the label shown to the model."""
 
     DOCUMENT = "document"
diff --git a/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
index 894d844b1..36167e09a 100644
--- a/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
@@ -33,9 +33,7 @@ def render_reference_pointers(references: list[Reference]) -> str | None:
     lines = [_render_pointer(reference) for reference in references]
     return (
         "<referenced_this_turn>\n"
-        f"{_HEADER}\n"
-        + "\n".join(lines)
-        + "\n</referenced_this_turn>"
+        f"{_HEADER}\n" + "\n".join(lines) + "\n</referenced_this_turn>"
     )
 
 
diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py
index 3affdcce7..06050d124 100644
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@@ -83,7 +83,10 @@ def _sanitize_content(content: Any) -> Any:
             block_type = block.get("type", "text")
             if block_type not in _UNIVERSAL_CONTENT_TYPES:
                 continue
-            if block_type == "text" and not block.get("text"):
+            # Drop blank text blocks. Anthropic rejects whitespace-only system
+            # blocks ("text content blocks must contain non-whitespace text"),
+            # so treat whitespace-only as empty rather than only "".
+            if block_type == "text" and not str(block.get("text") or "").strip():
                 continue
             filtered.append(block)
 
diff --git a/surfsense_backend/app/services/model_resolver.py b/surfsense_backend/app/services/model_resolver.py
index 628c9f473..f31b658a4 100644
--- a/surfsense_backend/app/services/model_resolver.py
+++ b/surfsense_backend/app/services/model_resolver.py
@@ -24,6 +24,21 @@ def ensure_v1(base_url: str | None) -> str | None:
     return f"{stripped}/v1"
 
 
+def strip_version_suffix(base_url: str | None) -> str | None:
+    """Drop a trailing ``/v1`` segment from a base URL.
+
+    Native SDK transports (e.g. Anthropic) expect the API root and append the
+    version path (``/v1/messages``) themselves. A base URL that already carries
+    ``/v1`` would otherwise produce ``/v1/v1/messages`` and a 404.
+    """
+    if not base_url:
+        return None
+    stripped = base_url.rstrip("/")
+    if stripped.endswith("/v1"):
+        return stripped[: -len("/v1")]
+    return stripped
+
+
 def _conn_value(conn: Connection | Mapping[str, Any], key: str) -> Any:
     if isinstance(conn, Mapping):
         return conn.get(key)
@@ -48,11 +63,14 @@ def to_litellm(
     prefix = spec.litellm_prefix or str(provider)
     model_string = f"{prefix}/{model_id}" if prefix else model_id
     if base_url:
-        api_base = (
-            ensure_v1(base_url)
-            if spec.transport == Transport.OPENAI_COMPATIBLE
-            else base_url.rstrip("/")
-        )
+        if spec.transport == Transport.OPENAI_COMPATIBLE:
+            api_base = ensure_v1(base_url)
+        elif provider == "anthropic":
+            # LiteLLM's Anthropic handler appends ``/v1/messages`` to api_base,
+            # so a base URL ending in ``/v1`` must be reduced to the API root.
+            api_base = strip_version_suffix(base_url)
+        else:
+            api_base = base_url.rstrip("/")
         kwargs["api_base"] = api_base
 
     if api_version := extra.get("api_version"):
@@ -90,5 +108,6 @@ def native_connection_from_config(config: Mapping[str, Any]) -> dict[str, Any]:
 __all__ = [
     "ensure_v1",
     "native_connection_from_config",
+    "strip_version_suffix",
     "to_litellm",
 ]
diff --git a/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py b/surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
similarity index 63%
rename from surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
rename to surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
index b25e8eeeb..09e5f0abf 100644
--- a/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
@@ -1,4 +1,4 @@
-"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
+"""Behavior tests for the ``search_knowledge_base`` knowledge_base-subagent tool.
 
 These exercise the tool through its public contract: seed a real document,
 invoke the tool, and assert on the ``Command`` it returns — the rendered
@@ -6,6 +6,12 @@ invoke the tool, and assert on the ``Command`` it returns — the rendered
 back on state is populated.
 The tool's own DB session is redirected to the test session, and the embedding
 leg is pinned so the search is deterministic without a live model.
+
+``@``-mention scoping is covered along BOTH delivery paths: via ``runtime.state``
+(the real subagent path — the ``task`` tool forwards the mentions into state
+because subagents have no ``context_schema``) and via ``runtime.context`` (the
+fallback for any direct main-graph invocation). State takes precedence when both
+are present.
 """
 
 from __future__ import annotations
@@ -18,11 +24,13 @@ import pytest
 from langchain_core.messages import ToolMessage
 from langgraph.types import Command
 
-from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
-from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools import (
+    search_knowledge_base,
+)
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools.search_knowledge_base import (
     create_search_knowledge_base_tool,
 )
-from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.config import config
 from app.db import Chunk, Document, DocumentType, Folder
 
@@ -89,9 +97,7 @@ def _pinned_embedding(monkeypatch):
 
 
 async def _invoke(tool, query: str, state: dict | None = None, context=None):
-    runtime = SimpleNamespace(
-        state=state or {}, tool_call_id="call-1", context=context
-    )
+    runtime = SimpleNamespace(state=state or {}, tool_call_id="call-1", context=context)
     return await tool.coroutine(query, runtime)
 
 
@@ -198,9 +204,7 @@ async def test_document_mention_confines_search_to_pinned_doc(
     )
     tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
 
-    result = await _invoke(
-        tool, "asyncio", context=_mentions(document_ids=[pinned.id])
-    )
+    result = await _invoke(tool, "asyncio", context=_mentions(document_ids=[pinned.id]))
 
     # Search is confined to the pinned doc: only its content is rendered.
     content = result.update["messages"][0].content
@@ -227,11 +231,106 @@ async def test_folder_mention_confines_search_to_folder_documents(
     )
     tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
 
-    result = await _invoke(
-        tool, "asyncio", context=_mentions(folder_ids=[folder.id])
-    )
+    result = await _invoke(tool, "asyncio", context=_mentions(folder_ids=[folder.id]))
 
     # Search is confined to the folder's document: only its content is rendered.
     content = result.update["messages"][0].content
     assert "Inside" in content
     assert "Outside" not in content
+
+
+async def test_document_mention_via_state_confines_search(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    """The real subagent path: mentions arrive on ``runtime.state`` (no context).
+
+    The ``task`` tool forwards ``mentioned_document_ids`` into subagent state
+    because subagents are compiled without a ``context_schema``. This asserts
+    the tool honors that state-delivered pin without any ``runtime.context``.
+    """
+    pinned = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Pinned",
+        text="asyncio appears in the pinned doc.",
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Other",
+        text="asyncio appears in the other doc.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool,
+        "asyncio",
+        state={"mentioned_document_ids": [pinned.id]},
+        context=None,
+    )
+
+    content = result.update["messages"][0].content
+    assert "Pinned" in content
+    assert "Other" not in content
+
+
+async def test_folder_mention_via_state_confines_search(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    """Folder pins delivered via state (subagent path) scope to the folder's docs."""
+    folder = await _add_folder(db_session, search_space_id=db_search_space.id)
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Inside",
+        text="asyncio appears inside the folder.",
+        folder_id=folder.id,
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Outside",
+        text="asyncio appears outside the folder.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool,
+        "asyncio",
+        state={"mentioned_folder_ids": [folder.id]},
+        context=None,
+    )
+
+    content = result.update["messages"][0].content
+    assert "Inside" in content
+    assert "Outside" not in content
+
+
+async def test_state_mentions_take_precedence_over_context(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    """When both carry pins, state wins (the forwarded subagent pin is authoritative)."""
+    state_doc = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="StatePinned",
+        text="asyncio appears in the state-pinned doc.",
+    )
+    context_doc = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="ContextPinned",
+        text="asyncio appears in the context-pinned doc.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool,
+        "asyncio",
+        state={"mentioned_document_ids": [state_doc.id]},
+        context=_mentions(document_ids=[context_doc.id]),
+    )
+
+    content = result.update["messages"][0].content
+    assert "StatePinned" in content
+    assert "ContextPinned" not in content
diff --git a/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py b/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
index b111617cc..c54559271 100644
--- a/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
@@ -7,8 +7,8 @@ import pytest
 from app.agents.chat.runtime.referenced_chat_context import (
     ReferencedChat,
     render_referenced_chats_block,
+    transcript as transcript_mod,
 )
-from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
 from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn
 
 pytestmark = pytest.mark.unit
@@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
 ) -> None:
     monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)
 
-    block = render_referenced_chats_block(
-        [_chat(1, "T", [("assistant", "x" * 500)])]
-    )
+    block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])
 
     assert block is not None
     # The turn is too big to keep whole, so its tail fills the budget with a
diff --git a/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py b/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
index e987f8441..191b0a6d6 100644
--- a/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
@@ -3,13 +3,28 @@
 from __future__ import annotations
 
 import pytest
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, SystemMessage
 
 from app.agents.chat.runtime.llm_config import _sanitize_messages
 
 pytestmark = pytest.mark.unit
 
 
+def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
+    # Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
+    # the system message: Anthropic rejects whitespace-only system blocks.
+    original = SystemMessage(
+        content=[
+            {"type": "text", "text": "real system prompt"},
+            {"type": "text", "text": "\n\n"},
+        ]
+    )
+
+    sanitized = _sanitize_messages([original])
+
+    assert sanitized[0].content == "real system prompt"
+
+
 def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
     original = AIMessage(
         content=[
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
new file mode 100644
index 000000000..b8f69d50d
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
@@ -0,0 +1,67 @@
+"""Regression tests for ``build_todos_mw``.
+
+langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
+text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
+that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
+``"system: text content blocks must contain non-whitespace text"``. The main
+agent supplies its own todo guidance and wants the tool only, so an empty
+prompt must NOT mutate the request's system message.
+"""
+
+from __future__ import annotations
+
+import pytest
+from langchain.agents.middleware import TodoListMiddleware
+
+from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
+    _ToolOnlyTodoListMiddleware,
+    build_todos_mw,
+)
+
+pytestmark = pytest.mark.unit
+
+
+class _Request:
+    def __init__(self) -> None:
+        self.override_called = False
+
+    def override(self, **_kwargs: object) -> _Request:
+        self.override_called = True
+        return self
+
+
+@pytest.mark.parametrize("blank", ["", "   ", "\n\n"])
+def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
+    mw = build_todos_mw(system_prompt=blank)
+    assert isinstance(mw, _ToolOnlyTodoListMiddleware)
+    # Still contributes the write_todos tool.
+    assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
+
+
+async def test_tool_only_middleware_does_not_touch_system_message() -> None:
+    mw = build_todos_mw(system_prompt="")
+    request = _Request()
+    captured: dict[str, object] = {}
+
+    async def handler(req: _Request) -> str:
+        captured["req"] = req
+        return "ok"
+
+    result = await mw.awrap_model_call(request, handler)
+
+    assert result == "ok"
+    assert captured["req"] is request
+    assert request.override_called is False
+
+
+def test_custom_prompt_uses_upstream_middleware() -> None:
+    mw = build_todos_mw(system_prompt="custom todo guidance")
+    assert isinstance(mw, TodoListMiddleware)
+    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
+    assert mw.system_prompt == "custom todo guidance"
+
+
+def test_none_prompt_uses_upstream_default() -> None:
+    mw = build_todos_mw()
+    assert isinstance(mw, TodoListMiddleware)
+    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
index 75cf0e1fb..f96473667 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
@@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
     assert block.startswith("<web_results>")
     assert block.endswith("</web_results>")
     assert "cite a result with its [n]" in block
-    assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    assert (
+        '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    )
     assert "[1] the answer is 42" in block
 
 
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
index fd700dd1d..3650133c2 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
@@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:
 
     assert document.title == "Q3 Launch Notes"
     assert document.source == "Slack"
-    assert [
-        (p.locator["chunk_id"], p.content) for p in document.passages
-    ] == [(880, "a"), (881, "b")]
+    assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
+        (880, "a"),
+        (881, "b"),
+    ]
     assert all(p.locator["document_id"] == 42 for p in document.passages)
 
 
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
index bd44f5dc2..85f77a84e 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
@@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
         document_type="FILE",
         metadata={},
         score=1.0 / document_id,
-        chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
+        chunks=[
+            ChunkHit(
+                chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
+            )
+        ],
     )
 
 
diff --git a/surfsense_backend/tests/unit/services/test_model_connections.py b/surfsense_backend/tests/unit/services/test_model_connections.py
index 937eda806..b4e7c18d7 100644
--- a/surfsense_backend/tests/unit/services/test_model_connections.py
+++ b/surfsense_backend/tests/unit/services/test_model_connections.py
@@ -1,5 +1,49 @@
 from app.services.global_model_catalog import materialize_global_model_catalog
-from app.services.model_resolver import ensure_v1, to_litellm
+from app.services.model_resolver import ensure_v1, strip_version_suffix, to_litellm
+
+
+def test_anthropic_resolver_strips_trailing_v1_from_api_base() -> None:
+    # LiteLLM's Anthropic handler appends ``/v1/messages``; a base URL ending in
+    # ``/v1`` (the frontend default) would otherwise yield ``/v1/v1/messages``.
+    model, kwargs = to_litellm(
+        {
+            "provider": "anthropic",
+            "base_url": "https://api.anthropic.com/v1",
+            "api_key": "sk-ant-test",
+            "extra": {},
+        },
+        "claude-opus-4-8",
+    )
+
+    assert model == "anthropic/claude-opus-4-8"
+    assert kwargs["api_base"] == "https://api.anthropic.com"
+
+
+def test_anthropic_resolver_keeps_root_api_base() -> None:
+    _model, kwargs = to_litellm(
+        {
+            "provider": "anthropic",
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-ant-test",
+            "extra": {},
+        },
+        "claude-opus-4-8",
+    )
+
+    assert kwargs["api_base"] == "https://api.anthropic.com"
+
+
+def test_strip_version_suffix() -> None:
+    assert strip_version_suffix("https://api.anthropic.com/v1") == (
+        "https://api.anthropic.com"
+    )
+    assert strip_version_suffix("https://api.anthropic.com/v1/") == (
+        "https://api.anthropic.com"
+    )
+    assert strip_version_suffix("https://api.anthropic.com") == (
+        "https://api.anthropic.com"
+    )
+    assert strip_version_suffix(None) is None
 
 
 def test_openai_compatible_resolver_uses_explicit_api_base() -> None:
diff --git a/surfsense_web/components/providers/ZeroProvider.tsx b/surfsense_web/components/providers/ZeroProvider.tsx
index 530e9c958..1a157a854 100644
--- a/surfsense_web/components/providers/ZeroProvider.tsx
+++ b/surfsense_web/components/providers/ZeroProvider.tsx
@@ -6,7 +6,7 @@ import {
 	ZeroProvider as ZeroReactProvider,
 } from "@rocicorp/zero/react";
 import { usePathname } from "next/navigation";
-import { useEffect, useMemo, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
 import { useSession } from "@/hooks/use-session";
 import { getDesktopAccessToken } from "@/lib/auth-fetch";
 import { handleUnauthorized, isPublicRoute, refreshSession } from "@/lib/auth-utils";
@@ -54,30 +54,74 @@ async function fetchZeroContext(isDesktop: boolean): Promise<LoadedZeroContext |
 	};
 }
 
+// Cap how many times we will refresh the session in response to Zero's
+// `needs-auth` state before giving up. Without this, a persistent auth failure
+// in zero-cache makes the connection cycle needs-auth -> connecting -> needs-auth
+// indefinitely, each cycle firing a `/auth/jwt/refresh` and quickly tripping the
+// backend rate limiter (HTTP 429).
+const MAX_ZERO_AUTH_REFRESH_ATTEMPTS = 3;
+const ZERO_AUTH_REFRESH_BASE_DELAY_MS = 1_000;
+const ZERO_AUTH_REFRESH_MAX_DELAY_MS = 30_000;
+
 function ZeroAuthSync({ isDesktop }: { isDesktop: boolean }) {
 	const zero = useZero();
 	const connectionState = useConnectionState();
+	const refreshAttemptsRef = useRef(0);
+	const refreshInFlightRef = useRef(false);
+
+	// Once a connection is established, clear the backoff so future
+	// auth expirations get a fresh set of refresh attempts.
+	useEffect(() => {
+		if (connectionState.name === "connected") {
+			refreshAttemptsRef.current = 0;
+		}
+	}, [connectionState.name]);
 
 	useEffect(() => {
 		if (connectionState.name !== "needs-auth") return;
+		if (refreshInFlightRef.current) return;
 
-		refreshSession().then(async (refreshed) => {
-			if (!refreshed) {
-				handleUnauthorized();
-				return;
-			}
+		if (refreshAttemptsRef.current >= MAX_ZERO_AUTH_REFRESH_ATTEMPTS) {
+			handleUnauthorized();
+			return;
+		}
 
-			if (isDesktop) {
-				const newToken = await getDesktopAccessToken();
-				if (!newToken) {
-					handleUnauthorized();
-					return;
-				}
-				zero.connection.connect({ auth: newToken });
-			} else {
-				zero.connection.connect();
-			}
-		});
+		const attempt = refreshAttemptsRef.current;
+		const delayMs =
+			attempt === 0
+				? 0
+				: Math.min(
+						ZERO_AUTH_REFRESH_BASE_DELAY_MS * 2 ** (attempt - 1),
+						ZERO_AUTH_REFRESH_MAX_DELAY_MS
+					);
+
+		refreshInFlightRef.current = true;
+		const timer = setTimeout(() => {
+			refreshAttemptsRef.current += 1;
+			refreshSession()
+				.then(async (refreshed) => {
+					if (!refreshed) {
+						handleUnauthorized();
+						return;
+					}
+
+					if (isDesktop) {
+						const newToken = await getDesktopAccessToken();
+						if (!newToken) {
+							handleUnauthorized();
+							return;
+						}
+						zero.connection.connect({ auth: newToken });
+					} else {
+						zero.connection.connect();
+					}
+				})
+				.finally(() => {
+					refreshInFlightRef.current = false;
+				});
+		}, delayMs);
+
+		return () => clearTimeout(timer);
 	}, [connectionState.name, isDesktop, zero]);
 
 	useEffect(() => {