feat: antropic model added fix & kb tooling fixes

- Updated main-agent middleware to clarify that both filesystem reads/writes and knowledge-base retrieval are handled by the `knowledge_base` subagent. - Introduced `_forward_mention_pins` function to carry `@`-mention pins into subagent state. - Revised system prompts to reflect the new retrieval method and ensure proper citation handling. - Removed the `search_knowledge_base` tool and its related tests, consolidating functionality under the `task` tool. - Enhanced documentation to guide usage of the new retrieval approach and citation practices.
2026-06-26 21:39:43 +02:00 · 2026-06-25 20:19:44 -07:00 · 2026-06-25 20:19:44 -07:00 · 9642d7ced0
commit 9642d7ced0
parent b4af67f77d
36 changed files with 581 additions and 168 deletions
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/checkpointed_subagent_middleware/task_tool.py
@ -343,6 +343,28 @@ def build_task_tool_with_parent_config(
        cleaned = hint.strip()
        return cleaned or None
    def _forward_mention_pins(subagent_state: dict, runtime: ToolRuntime) -> None:
        """Carry the turn's ``@``-mention pins from main context into subagent state.
        Subagents are compiled without a ``context_schema`` and invoked without
        ``context=``, so ``runtime.context`` (which holds the ``@``-mentioned
        document/folder ids) does not reach them. The ``task`` tool runs in the
        main runtime, which *does* have the context, so we copy the pins into the
        forwarded state where ``search_knowledge_base`` reads them. Only set keys
        when present so we never clobber pins already on state (e.g. nested
        ``ask_knowledge_base`` re-entry).
        """
        ctx = getattr(runtime, "context", None)
        if ctx is None:
            return
        for state_key, ctx_attr in (
            ("mentioned_document_ids", "mentioned_document_ids"),
            ("mentioned_folder_ids", "mentioned_folder_ids"),
        ):
            value = getattr(ctx, ctx_attr, None)
            if value:
                subagent_state[state_key] = list(value)
    def _validate_and_prepare_state(
        subagent_type: str, description: str, runtime: ToolRuntime
    ) -> tuple[Runnable, dict]:
@ -350,6 +372,7 @@ def build_task_tool_with_parent_config(
        subagent_state = {
            k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
        }
        _forward_mention_pins(subagent_state, runtime)
        hint = _resolve_context_hint(subagent_type, description, runtime)
        if hint:
            # Tagged block so the subagent prompt can pattern-match the section.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@ -1,11 +1,12 @@
 """Main-agent middleware list assembly: one line per slot.
-The main agent is a pure router — filesystem reads/writes are owned by the
+The main agent is a pure router — both filesystem reads/writes AND knowledge-base
-``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
+retrieval are owned by the ``knowledge_base`` subagent and reached via the
-retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
+``task`` tool. That subagent runs the hybrid ``search_knowledge_base`` (rendering
-search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
+``<retrieved_context>`` with ``[n]`` citation labels) and the FS tools on demand;
-labels. The stack here computes the workspace tree, commits any subagent-side
+the main agent only sees the specialist's grounded summary. The stack here
-staged writes at end of turn (cloud mode), and wires the supporting middleware.
+computes the workspace tree, commits any subagent-side staged writes at end of
 turn (cloud mode), and wires the supporting middleware.
 """
 from __future__ import annotations
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@ -1,9 +1,10 @@
 <citations>
 Cite with one token: the bracket label `[n]`. Every citable result —
-`search_knowledge_base` passages, `web_search` results, and prose from a
+`web_search` results and prose from a `task` knowledge_base/research
-`task` knowledge_base/research specialist — already carries `[n]` labels on a
+specialist (including the knowledge_base specialist's `[n]`-labelled
-single shared count. Those labels are the only citation you write; the server
+workspace findings) — already carries `[n]` labels on a single shared count.
-resolves each one back to its source after the turn.
+Those labels are the only citation you write; the server resolves each one
 back to its source after the turn.
 1. Put the label right after the claim it supports.
 2. Several sources for one claim: stack brackets, `[1][2]`.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
@ -13,9 +13,10 @@ it to resolve paths the user describes in natural language ("my Q2 roadmap",
 "last week's meeting notes") into concrete document references before
 delegating to a specialist.
-`<retrieved_context>` blocks hold knowledge-base passages from
+Knowledge-base passages are no longer injected here directly: delegate to the
-`search_knowledge_base`; each `<document>` inside is in excerpt view and every
+`knowledge_base` specialist via `task`, which runs the hybrid search/read and
-passage is prefixed with an `[n]` citation label.
+returns a grounded summary already carrying `[n]` citation labels for you to
 carry through.
-If a block doesn't appear this turn, work from the conversation alone.
+If no grounding arrives this turn, work from the conversation alone.
 </dynamic_context>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
@ -12,9 +12,10 @@ it to resolve paths described in natural language ("the Q2 roadmap", "last
 week's planning notes") into concrete document references before delegating
 to a specialist.
-`<retrieved_context>` blocks hold knowledge-base passages from
+Knowledge-base passages are no longer injected here directly: delegate to the
-`search_knowledge_base`; each `<document>` inside is in excerpt view and every
+`knowledge_base` specialist via `task`, which runs the hybrid search/read and
-passage is prefixed with an `[n]` citation label.
+returns a grounded summary already carrying `[n]` citation labels for you to
 carry through.
-If a block doesn't appear this turn, work from the conversation alone.
+If no grounding arrives this turn, work from the conversation alone.
 </dynamic_context>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/kb_first.md
@ -1,16 +1,18 @@
 <knowledge_base_first>
 CRITICAL — ground factual answers in what you actually receive this turn:
- the user's knowledge base via `search_knowledge_base` (your PRIMARY source
+- the user's knowledge base via `task(knowledge_base, ...)` (your PRIMARY
-  for anything about their documents, notes, or connected data — the
+  source for anything about their documents, notes, or connected data — the
-  `<workspace_tree>` only lists what exists, so call the tool to read the
+  `<workspace_tree>` only lists what exists, so delegate to the specialist to
-  actual content before answering),
+  search and read the actual content before answering),
 - injected workspace context (see `<dynamic_context>`),
 - results from your other tool calls (`web_search`, `scrape_webpage`),
 - or substantive summaries returned by a `task` specialist you invoked.
-For questions about the user's own workspace, call `search_knowledge_base`
+For questions about the user's own workspace, dispatch
-first rather than answering from the tree or from memory. Use
+`task(knowledge_base, ...)` first rather than answering from the tree or from
-`task(knowledge_base)` when you need a document's full text or deeper reads.
+memory. The knowledge_base specialist runs hybrid semantic/keyword search and
 full-document reads, then returns a grounded summary with `[n]` citation
 labels for you to carry through into your answer.
 Do **not** answer factual or informational questions from general knowledge
 unless the user explicitly authorises it after you say you couldn't find
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/description.md
@ -1,19 +0,0 @@
 - `search_knowledge_base` — Search the user's own knowledge base (their
  indexed documents, notes, files, and connected sources) with hybrid
  semantic + keyword retrieval.
  - This is your PRIMARY way to ground factual answers about the user's
    workspace. The `<workspace_tree>` shows what files exist; this tool pulls
    the actual relevant content. Call it BEFORE answering any question about
    the user's documents, notes, or connected data — don't answer from the
    tree alone or from memory.
  - Each hit returns the document's virtual path, a relevance score, and the
    matched snippets. The snippets are often enough to answer directly with a
    citation.
  - When you need a document's full text (not just snippets), delegate a read
    to the `knowledge_base` specialist via `task`, passing the path from the
    results.
  - Args: `query` (focused; include concrete entities, acronyms, people,
    projects, or terms), `top_k` (default 5, max 20).
  - If nothing relevant comes back, tell the user you couldn't find it in
    their workspace before offering to search the web or answer from general
    knowledge.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/search_knowledge_base/example.md
@ -1,13 +0,0 @@
 <example>
 user: "What did our Q3 planning doc say about hiring?"
 → search_knowledge_base(query="Q3 planning hiring headcount plan")
 (Answer from the returned snippets with a citation; if you need the full
 document, task the knowledge_base specialist with the returned path.)
 </example>
 <example>
 user: "Summarize my notes on the Acme migration."
 → search_knowledge_base(query="Acme migration notes")
 → task(subagent_type="knowledge_base", description="Read <path> and return a
 detailed summary of the Acme migration plan, risks, and timeline.")
 </example>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/index.py
@ -6,7 +6,6 @@ Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subag
 from __future__ import annotations
 MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
    "search_knowledge_base",
    "web_search",
    "scrape_webpage",
    "update_memory",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/registry.py
@ -25,7 +25,6 @@ from app.agents.chat.shared.tools.web_search import create_web_search_tool
 from app.db import ChatVisibility
 from .scrape_webpage import create_scrape_webpage_tool
 from .search_knowledge_base import create_search_knowledge_base_tool
 from .update_memory import (
    create_update_memory_tool,
    create_update_team_memory_tool,
@ -36,14 +35,6 @@ def _build_scrape_webpage_tool(deps: dict[str, Any]) -> BaseTool:
    return create_scrape_webpage_tool(firecrawl_api_key=deps.get("firecrawl_api_key"))
 def _build_search_knowledge_base_tool(deps: dict[str, Any]) -> BaseTool:
    return create_search_knowledge_base_tool(
        search_space_id=deps["search_space_id"],
        available_connectors=deps.get("available_connectors"),
        available_document_types=deps.get("available_document_types"),
    )
 def _build_web_search_tool(deps: dict[str, Any]) -> BaseTool:
    return create_web_search_tool(
        search_space_id=deps.get("search_space_id"),
@ -85,10 +76,6 @@ def _build_update_memory_tool(deps: dict[str, Any]) -> BaseTool:
 _MAIN_AGENT_TOOL_FACTORIES: dict[
    str, tuple[Callable[[dict[str, Any]], BaseTool], tuple[str, ...]]
 ] = {
    "search_knowledge_base": (
        _build_search_knowledge_base_tool,
        ("search_space_id",),
    ),
    "scrape_webpage": (_build_scrape_webpage_tool, ()),
    "web_search": (_build_web_search_tool, ()),
    "create_automation": (
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
@ -2,13 +2,13 @@
 from __future__ import annotations
-from enum import Enum
+from enum import StrEnum
 from typing import Any
 from pydantic import BaseModel, Field
-class CitationSourceType(str, Enum):
+class CitationSourceType(StrEnum):
    """Source kind of a citable unit; the value is the stable wire/dedup form."""
    KB_CHUNK = "kb_chunk"
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
@ -33,9 +33,7 @@ def render_search_context(
    blocks = [
        block
        for document in documents
-        if (
+        if (block := render_document(document, view="excerpt", registry=registry))
            block := render_document(document, view="excerpt", registry=registry)
        )
        is not None
    ]
    if not blocks:
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
@ -34,21 +34,13 @@ def render_web_results(
    blocks = [
        block
        for document in documents
-        if (
+        if (block := render_document(document, view="excerpt", registry=registry))
            block := render_document(document, view="excerpt", registry=registry)
        )
        is not None
    ]
    if not blocks:
        return None
-    return (
+    return "<web_results>\n" + _HEADER + "\n" + "\n".join(blocks) + "\n</web_results>"
        "<web_results>\n"
        + _HEADER
        + "\n"
        + "\n".join(blocks)
        + "\n</web_results>"
    )
 __all__ = ["render_web_results"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/todos.py
@ -2,11 +2,48 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING, Any
 from langchain.agents.middleware import TodoListMiddleware
 if TYPE_CHECKING:
    from collections.abc import Awaitable, Callable
 class _ToolOnlyTodoListMiddleware(TodoListMiddleware):  # type: ignore[type-arg]
    """``TodoListMiddleware`` that exposes the ``write_todos`` tool but appends
    no todo system prompt.
    Upstream ``TodoListMiddleware.(a)wrap_model_call`` *always* appends a system
    text block of ``f"\\n\\n{self.system_prompt}"``. With an empty
    ``system_prompt`` that block is whitespace-only (``"\\n\\n"``), which
    Anthropic rejects with ``"system: text content blocks must contain
    non-whitespace text"`` (OpenAI silently tolerates it). The main agent
    already documents todo usage in its own system prompt, so we skip the append
    entirely and let the request through unchanged.
    """
    def wrap_model_call(self, request: Any, handler: Callable[[Any], Any]) -> Any:
        return handler(request)
    async def awrap_model_call(
        self, request: Any, handler: Callable[[Any], Awaitable[Any]]
    ) -> Any:
        return await handler(request)
 def build_todos_mw(*, system_prompt: str | None = None) -> TodoListMiddleware:
-    """Pass ``system_prompt=""`` to suppress the upstream prompt append. We use a custom system prompt in the main agent."""
+    """Build a todo-list middleware.
    - ``system_prompt=None``: use the upstream default todo system prompt.
    - ``system_prompt=""`` (or whitespace): contribute the ``write_todos`` tool
      without appending any todo system prompt. The main agent supplies its own
      todo guidance, and this avoids emitting a whitespace-only system block that
      Anthropic rejects.
    - otherwise: append the given custom todo system prompt.
    """
    if system_prompt is None:
        return TodoListMiddleware()
    if not system_prompt.strip():
        return _ToolOnlyTodoListMiddleware()
    return TodoListMiddleware(system_prompt=system_prompt)
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@ -162,6 +162,20 @@ class SurfSenseFilesystemState(FilesystemState):
    normalizer. Merges (union, find-or-create) so parallel/subagent registrations
    stay globally consistent instead of clobbering each other."""
    mentioned_document_ids: NotRequired[Annotated[list[int], _replace_reducer]]
    """``@``-mentioned ``Document.id`` pins for this turn.
    Sourced from the per-invocation ``runtime.context`` on the main graph and
    forwarded into subagent state by the ``task`` tool (subagents are not
    compiled with a ``context_schema``). Read by ``search_knowledge_base`` to
    confine retrieval to the pinned documents."""
    mentioned_folder_ids: NotRequired[Annotated[list[int], _replace_reducer]]
    """``@``-mentioned ``Folder.id`` pins for this turn.
    Same provenance as :data:`mentioned_document_ids`; expanded to the folder's
    documents by ``search_knowledge_base`` to scope retrieval."""
    tree_version: NotRequired[Annotated[int, _replace_reducer]]
    """Monotonically increasing counter; bumped when commits change the KB tree."""
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/agent.py
@ -20,6 +20,7 @@ from app.agents.chat.multi_agent_chat.subagents.shared.spec import SurfSenseSuba
 from .middleware_stack import build_kb_middleware
 from .prompts import load_description, load_readonly_system_prompt, load_system_prompt
 from .tools.index import DESTRUCTIVE_FS_OPS
 from .tools.search_knowledge_base import create_search_knowledge_base_tool
 NAME = "knowledge_base"
 READONLY_NAME = "knowledge_base_readonly"
@ -32,6 +33,15 @@ KB_RULESET = Ruleset(
 _KB_READONLY_RULESET = Ruleset(origin=READONLY_NAME, rules=[])
 def _build_search_knowledge_base_tool(dependencies: dict[str, Any]) -> BaseTool:
    """Construct the hybrid-RAG ``search_knowledge_base`` tool from shared deps."""
    return create_search_knowledge_base_tool(
        search_space_id=dependencies["search_space_id"],
        available_connectors=dependencies.get("available_connectors"),
        available_document_types=dependencies.get("available_document_types"),
    )
 def build_subagent(
    *,
    dependencies: dict[str, Any],
@ -49,7 +59,7 @@ def build_subagent(
            "description": load_description(),
            "system_prompt": load_system_prompt(filesystem_mode),
            "model": llm,
-            "tools": [],
+            "tools": [_build_search_knowledge_base_tool(dependencies)],
            "middleware": build_kb_middleware(
                llm=llm,
                dependencies=dependencies,
@ -78,7 +88,7 @@ def build_readonly_subagent(
            "description": "Read-only knowledge_base specialist (invoked via ask_knowledge_base).",
            "system_prompt": load_readonly_system_prompt(filesystem_mode),
            "model": llm,
-            "tools": [],
+            "tools": [_build_search_knowledge_base_tool(dependencies)],
            "middleware": build_kb_middleware(
                llm=llm,
                dependencies=dependencies,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/ask_knowledge_base_tool.py
@ -35,8 +35,21 @@ def _wrap_result(result: dict, tool_call_id: str) -> Command:
            "expected at least one assistant message."
        )
    last_text = (getattr(messages[-1], "text", None) or "").rstrip()
    # Carry reducer-backed state (notably citation_registry, populated by the
    # read-only graph's search_knowledge_base call) back up to the caller so
    # [n] labels emitted via ask_knowledge_base resolve at turn end. Drop
    # ``messages`` — we synthesize our own ToolMessage — and anything the
    # subagent boundary excludes.
    forwarded_state = {
        k: v
        for k, v in result.items()
        if k not in EXCLUDED_STATE_KEYS and k != "messages"
    }
    return Command(
-        update={"messages": [ToolMessage(last_text, tool_call_id=tool_call_id)]}
+        update={
            **forwarded_state,
            "messages": [ToolMessage(last_text, tool_call_id=tool_call_id)],
        }
    )
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@ -10,6 +10,15 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor
  2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
  3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 ## Searching vs. reading
 You have two complementary ways to pull workspace content:
 - **`search_knowledge_base`** — hybrid semantic + keyword retrieval across the whole indexed knowledge base (documents, files, and connector content), not just `/documents/`. Use it FIRST for any open-ended factual/informational question ("what did we decide about pricing?", "summarise our onboarding process") where you need the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
 - **`read_file`** — full text of one specific document you have already located by path. Use it when you need the complete document body (to edit it, or to quote at length) rather than top matches.
 A common flow is `search_knowledge_base` to find the relevant passages and their source documents, then `read_file` on the winning path when you need the full body. Honor any `@`-mention pins automatically applied to the search scope.
 For writes (where you choose the path yourself):
 - **Discover the user's existing conventions before inventing a path.** Scan `<workspace_tree>` for folders that already hold similar content (e.g. an existing `/documents/meetings/` with dated standup notes, or `/documents/projects/<name>/`). When a convention exists, follow it. Use `ls`, `glob`, or `grep` to look closer when the tree is truncated.
@ -36,11 +45,11 @@ You construct the structured `evidence` fields from your own knowledge of what y
 ## Citations in your prose
-When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+Both `read_file` and `search_knowledge_base` return passages prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
-### Where the labels live in `read_file` output
+### Where the labels live
-A KB document reads back like this — only the bracketed `[n]` is a citation label:
+`read_file` returns a KB-indexed `/documents/` file as a `<document … view="full">` block; `search_knowledge_base` returns a `<retrieved_context>` block of the top-matching passages. In both, only the bracketed `[n]` is a citation label:
 ```
 <document title="Q2 Roadmap" source="File" view="full">
@ -49,10 +58,18 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 </document>
 ```
 ```
 <retrieved_context>
  <document title="Pricing notes" source="File">
    [7] We agreed on usage-based pricing …
  </document>
 </retrieved_context>
 ```
 ### Rules
 - Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` or `search_knowledge_base` output you are summarising this turn. If you can't see it, omit the citation.
 - Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
 - Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
@ -126,7 +143,7 @@ Return **only** one JSON object (no markdown or prose outside it):
  "status": "success" | "partial" | "blocked" | "error",
  "action_summary": string,
  "evidence": {
-    "operation": "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
+    "operation": "search_knowledge_base" | "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@ -11,6 +11,15 @@ You are the SurfSense workspace specialist for the user's local folders.
  3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 ## Searching the indexed knowledge base vs. reading local files
 Two complementary content sources:
 - **`search_knowledge_base`** — hybrid semantic + keyword retrieval over the user's *indexed* knowledge base (documents and connector content), which is separate from the local folders your FS tools read. Use it FIRST for open-ended factual/informational questions where you want the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
 - **`read_file` / `ls` / `glob` / `grep`** — operate on the user's *local* folders. Use these to locate and read on-disk files by path.
 These are different stores: `search_knowledge_base` will not surface arbitrary local files, and the FS tools do not see indexed-only content. Pick the source the request points at (or use both when helpful).
 For writes (where you choose the path yourself):
 - **Discover the user's existing conventions before inventing a path.** Inspect the relevant mount's folder layout via `ls` / `list_tree` and look for folders that already hold similar content (e.g. an existing `/notes/meetings/` with dated standup files, or `/projects/<name>/`). When a convention exists, follow it.
@ -32,11 +41,13 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (See "Citations in your prose" below for when `citations` is populated.)
 ## Citations in your prose
-In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
+Your **filesystem** tools read local files only, which are not KB-indexed and carry no `[n]` citation labels: when a fact comes from a local-file read, do not emit `[n]` or `[citation:…]` markers — the absolute path is the only reference.
 The **`search_knowledge_base`** tool is different: it queries the indexed knowledge base and returns a `<retrieved_context>` block whose passages each carry a bracketed `[n]` label. When a fact in your `action_summary` or `evidence.content_excerpt` came from a search passage, append its `[n]` exactly as shown and list those numbers in `evidence.citations`. Copy labels digit-for-digit; confirm the bracketed label appears in this turn's output before emitting it; write the bare `[n]` only (no `[citation:…]` wrapper, markdown links, or ranges). Stack multiple as `[3][4]`. Leave `evidence.citations` `null` when you only touched local files.
 ## Examples
@ -104,7 +115,7 @@ Return **only** one JSON object (no markdown or prose outside it):
  "status": "success" | "partial" | "blocked" | "error",
  "action_summary": string,
  "evidence": {
-    "operation": "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
+    "operation": "search_knowledge_base" | "write_file" | "edit_file" | "read_file" | "ls" | "glob" | "grep" | "mkdir" | "move_file" | "rm" | "rmdir" | "list_tree" | null,
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@ -11,6 +11,11 @@ The caller's question often references documents by description (`"my meeting no
 If a precise path was already given, use it directly — skip the lookup.
 ## Searching vs. reading
 - **`search_knowledge_base`** — hybrid semantic + keyword retrieval across the whole indexed knowledge base. Use it FIRST for open-ended factual questions where you want the most relevant passages rather than one known file. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
 - **`read_file`** — full text of one document you have already located by path. Use it when you need the complete body.
 ## Interpreting tool results
 - **Success** — file content (for `read_file`) or a listing (for `ls` / `glob` / `grep` / `list_tree`).
@ -29,11 +34,11 @@ Reply in plain prose:
 ## Citations
-When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+Both `read_file` and `search_knowledge_base` return passages prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
-### Where the labels live in `read_file` output
+### Where the labels live
-A KB document reads back like this — only the bracketed `[n]` is a citation label:
+`read_file` returns a KB-indexed `/documents/` file as a `<document … view="full">` block; `search_knowledge_base` returns a `<retrieved_context>` block of top-matching passages. In both, only the bracketed `[n]` is a citation label:
 ```
 <document title="Q2 Roadmap" source="File" view="full">
@ -42,10 +47,18 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 </document>
 ```
 ```
 <retrieved_context>
  <document title="Pricing notes" source="File">
    [7] We agreed on usage-based pricing …
  </document>
 </retrieved_context>
 ```
 ### Rules
 - Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` or `search_knowledge_base` output you are summarising this turn. If you can't see it, omit the citation.
 - Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
 - Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@ -12,6 +12,13 @@ The caller's question often references files by description (`"my meeting notes
 If a precise path was already given, use it directly — skip the lookup.
 ## Searching the indexed knowledge base vs. reading local files
 - **`search_knowledge_base`** — hybrid semantic + keyword retrieval over the user's *indexed* knowledge base (separate from the local folders your FS tools read). Use it FIRST for open-ended factual questions where you want the most relevant passages. It returns a `<retrieved_context>` block whose passages each carry a `[n]` citation label.
 - **`read_file` / `ls` / `glob` / `grep`** — operate on the user's *local* folders.
 These are different stores; pick the source the request points at (or use both when helpful).
 ## Interpreting tool results
 - **Success** — file content (for `read_file`) or a listing (for `ls` / `glob` / `grep` / `list_tree`).
@ -30,4 +37,6 @@ Reply in plain prose:
 ## Citations
-In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
+Your **filesystem** tools read local files only, which are not KB-indexed and carry no `[n]` citation labels: cite local-file claims with the absolute path and do not emit `[n]` or `[citation:…]` markers for them.
 The **`search_knowledge_base`** tool is different: it queries the indexed knowledge base and returns a `<retrieved_context>` block whose passages each carry a bracketed `[n]` label. When a claim came from a search passage, append its `[n]` exactly as shown (copy digit-for-digit; confirm it appears in this turn's output; bare `[n]` only, stack as `[3][4]`, never ranges). The caller relays these verbatim and the server resolves them.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/tools/search_knowledge_base.py
@ -1,11 +1,12 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
+"""On-demand ``search_knowledge_base`` knowledge_base-subagent tool (citation-spine RAG).
-The main agent calls this when it decides it needs knowledge-base content. The
+The knowledge_base subagent calls this when it needs hybrid semantic + keyword
-tool runs one hybrid search, renders the matched passages as a
+retrieval over the user's indexed knowledge base. The tool runs one hybrid
-``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
+search, renders the matched passages as a ``<retrieved_context>`` block whose
-labels, and persists the conversation's ``CitationRegistry`` onto graph state so
+passages carry server-assigned ``[n]`` labels, and persists the conversation's
-the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
+``CitationRegistry`` onto graph state so the ``[n]`` -> ``[citation:<payload>]``
-turn.
+normalizer can resolve them after the turn. The registry merges across the
 subagent boundary (reducer-backed, forwarded by ``task``/``ask_knowledge_base``).
 """
 from __future__ import annotations
@ -62,6 +63,29 @@ def _search_types(
    return tuple(sorted(types)) or None
 def _resolve_mention_pins(
    runtime: ToolRuntime[None, SurfSenseFilesystemState],
 ) -> tuple[list[int] | None, list[int] | None]:
    """Read the turn's ``@``-mention pins, preferring state over context.
    On a subagent graph the pins arrive via forwarded **state** (the ``task``
    tool copies them off the main ``runtime.context`` since subagents have no
    ``context_schema``). On the main graph — or any future direct invocation
    with ``context=`` — they arrive via ``runtime.context``. State wins when
    both are present; context is the fallback.
    """
    state = getattr(runtime, "state", None) or {}
    document_ids = state.get("mentioned_document_ids")
    folder_ids = state.get("mentioned_folder_ids")
    if document_ids or folder_ids:
        return document_ids or None, folder_ids or None
    ctx = getattr(runtime, "context", None)
    return (
        getattr(ctx, "mentioned_document_ids", None),
        getattr(ctx, "mentioned_folder_ids", None),
    )
 async def _build_search_scope(
    session: AsyncSession,
    *,
@ -70,12 +94,12 @@ async def _build_search_scope(
    runtime: ToolRuntime[None, SurfSenseFilesystemState],
 ) -> SearchScope:
    """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
-    ctx = getattr(runtime, "context", None)
+    mentioned_document_ids, mentioned_folder_ids = _resolve_mention_pins(runtime)
    document_ids = await referenced_document_ids(
        session,
        search_space_id=search_space_id,
-        document_ids=getattr(ctx, "mentioned_document_ids", None),
+        document_ids=mentioned_document_ids,
-        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
+        folder_ids=mentioned_folder_ids,
    )
    return SearchScope(
        document_types=document_types,
--- a/surfsense_backend/app/agents/chat/runtime/references/connectors.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
@ -53,9 +53,7 @@ async def resolve_connector_references(
    )
    accessible = {row.id: row for row in rows.all()}
-    chip_by_id = {
+    chip_by_id = {chip.id: chip for chip in (chips or []) if chip.kind == "connector"}
        chip.id: chip for chip in (chips or []) if chip.kind == "connector"
    }
    references: list[ConnectorReference] = []
    for connector_id in dict.fromkeys(connector_ids):
--- a/surfsense_backend/app/agents/chat/runtime/references/models.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/models.py
@ -8,11 +8,11 @@ a class-level discriminator used by the renderer and scope builder.
 from __future__ import annotations
 from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
 from typing import ClassVar
-class ReferenceKind(str, Enum):
+class ReferenceKind(StrEnum):
    """What the user pointed at; the value is the label shown to the model."""
    DOCUMENT = "document"
--- a/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
@ -33,9 +33,7 @@ def render_reference_pointers(references: list[Reference]) -> str | None:
    lines = [_render_pointer(reference) for reference in references]
    return (
        "<referenced_this_turn>\n"
-        f"{_HEADER}\n"
+        f"{_HEADER}\n" + "\n".join(lines) + "\n</referenced_this_turn>"
        + "\n".join(lines)
        + "\n</referenced_this_turn>"
    )
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@ -83,7 +83,10 @@ def _sanitize_content(content: Any) -> Any:
            block_type = block.get("type", "text")
            if block_type not in _UNIVERSAL_CONTENT_TYPES:
                continue
-            if block_type == "text" and not block.get("text"):
+            # Drop blank text blocks. Anthropic rejects whitespace-only system
            # blocks ("text content blocks must contain non-whitespace text"),
            # so treat whitespace-only as empty rather than only "".
            if block_type == "text" and not str(block.get("text") or "").strip():
                continue
            filtered.append(block)
--- a/surfsense_backend/app/services/model_resolver.py
+++ b/surfsense_backend/app/services/model_resolver.py
@ -24,6 +24,21 @@ def ensure_v1(base_url: str | None) -> str | None:
    return f"{stripped}/v1"
 def strip_version_suffix(base_url: str | None) -> str | None:
    """Drop a trailing ``/v1`` segment from a base URL.
    Native SDK transports (e.g. Anthropic) expect the API root and append the
    version path (``/v1/messages``) themselves. A base URL that already carries
    ``/v1`` would otherwise produce ``/v1/v1/messages`` and a 404.
    """
    if not base_url:
        return None
    stripped = base_url.rstrip("/")
    if stripped.endswith("/v1"):
        return stripped[: -len("/v1")]
    return stripped
 def _conn_value(conn: Connection | Mapping[str, Any], key: str) -> Any:
    if isinstance(conn, Mapping):
        return conn.get(key)
@ -48,11 +63,14 @@ def to_litellm(
    prefix = spec.litellm_prefix or str(provider)
    model_string = f"{prefix}/{model_id}" if prefix else model_id
    if base_url:
-        api_base = (
+        if spec.transport == Transport.OPENAI_COMPATIBLE:
-            ensure_v1(base_url)
+            api_base = ensure_v1(base_url)
-            if spec.transport == Transport.OPENAI_COMPATIBLE
+        elif provider == "anthropic":
-            else base_url.rstrip("/")
+            # LiteLLM's Anthropic handler appends ``/v1/messages`` to api_base,
-        )
+            # so a base URL ending in ``/v1`` must be reduced to the API root.
            api_base = strip_version_suffix(base_url)
        else:
            api_base = base_url.rstrip("/")
        kwargs["api_base"] = api_base
    if api_version := extra.get("api_version"):
@ -90,5 +108,6 @@ def native_connection_from_config(config: Mapping[str, Any]) -> dict[str, Any]:
 __all__ = [
    "ensure_v1",
    "native_connection_from_config",
    "strip_version_suffix",
    "to_litellm",
 ]
--- a/surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/subagents/builtins/knowledge_base/tools/test_search_knowledge_base.py
@ -1,4 +1,4 @@
-"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
+"""Behavior tests for the ``search_knowledge_base`` knowledge_base-subagent tool.
 These exercise the tool through its public contract: seed a real document,
 invoke the tool, and assert on the ``Command`` it returns — the rendered
@ -6,6 +6,12 @@ invoke the tool, and assert on the ``Command`` it returns — the rendered
 back on state is populated.
 The tool's own DB session is redirected to the test session, and the embedding
 leg is pinned so the search is deterministic without a live model.
 ``@``-mention scoping is covered along BOTH delivery paths: via ``runtime.state``
 (the real subagent path — the ``task`` tool forwards the mentions into state
 because subagents have no ``context_schema``) and via ``runtime.context`` (the
 fallback for any direct main-graph invocation). State takes precedence when both
 are present.
 """
 from __future__ import annotations
@ -18,11 +24,13 @@ import pytest
 from langchain_core.messages import ToolMessage
 from langgraph.types import Command
-from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
-from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
+from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools import (
    search_knowledge_base,
 )
 from app.agents.chat.multi_agent_chat.subagents.builtins.knowledge_base.tools.search_knowledge_base import (
    create_search_knowledge_base_tool,
 )
 from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.config import config
 from app.db import Chunk, Document, DocumentType, Folder
@ -89,9 +97,7 @@ def _pinned_embedding(monkeypatch):
 async def _invoke(tool, query: str, state: dict | None = None, context=None):
-    runtime = SimpleNamespace(
+    runtime = SimpleNamespace(state=state or {}, tool_call_id="call-1", context=context)
        state=state or {}, tool_call_id="call-1", context=context
    )
    return await tool.coroutine(query, runtime)
@ -198,9 +204,7 @@ async def test_document_mention_confines_search_to_pinned_doc(
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
-    result = await _invoke(
+    result = await _invoke(tool, "asyncio", context=_mentions(document_ids=[pinned.id]))
        tool, "asyncio", context=_mentions(document_ids=[pinned.id])
    )
    # Search is confined to the pinned doc: only its content is rendered.
    content = result.update["messages"][0].content
@ -227,11 +231,106 @@ async def test_folder_mention_confines_search_to_folder_documents(
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
-    result = await _invoke(
+    result = await _invoke(tool, "asyncio", context=_mentions(folder_ids=[folder.id]))
        tool, "asyncio", context=_mentions(folder_ids=[folder.id])
    )
    # Search is confined to the folder's document: only its content is rendered.
    content = result.update["messages"][0].content
    assert "Inside" in content
    assert "Outside" not in content
 async def test_document_mention_via_state_confines_search(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    """The real subagent path: mentions arrive on ``runtime.state`` (no context).
    The ``task`` tool forwards ``mentioned_document_ids`` into subagent state
    because subagents are compiled without a ``context_schema``. This asserts
    the tool honors that state-delivered pin without any ``runtime.context``.
    """
    pinned = await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Pinned",
        text="asyncio appears in the pinned doc.",
    )
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Other",
        text="asyncio appears in the other doc.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(
        tool,
        "asyncio",
        state={"mentioned_document_ids": [pinned.id]},
        context=None,
    )
    content = result.update["messages"][0].content
    assert "Pinned" in content
    assert "Other" not in content
 async def test_folder_mention_via_state_confines_search(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    """Folder pins delivered via state (subagent path) scope to the folder's docs."""
    folder = await _add_folder(db_session, search_space_id=db_search_space.id)
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Inside",
        text="asyncio appears inside the folder.",
        folder_id=folder.id,
    )
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Outside",
        text="asyncio appears outside the folder.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(
        tool,
        "asyncio",
        state={"mentioned_folder_ids": [folder.id]},
        context=None,
    )
    content = result.update["messages"][0].content
    assert "Inside" in content
    assert "Outside" not in content
 async def test_state_mentions_take_precedence_over_context(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    """When both carry pins, state wins (the forwarded subagent pin is authoritative)."""
    state_doc = await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="StatePinned",
        text="asyncio appears in the state-pinned doc.",
    )
    context_doc = await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="ContextPinned",
        text="asyncio appears in the context-pinned doc.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(
        tool,
        "asyncio",
        state={"mentioned_document_ids": [state_doc.id]},
        context=_mentions(document_ids=[context_doc.id]),
    )
    content = result.update["messages"][0].content
    assert "StatePinned" in content
    assert "ContextPinned" not in content
--- a/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/referenced_chat_context/test_transcript.py
@ -7,8 +7,8 @@ import pytest
 from app.agents.chat.runtime.referenced_chat_context import (
    ReferencedChat,
    render_referenced_chats_block,
    transcript as transcript_mod,
 )
 from app.agents.chat.runtime.referenced_chat_context import transcript as transcript_mod
 from app.agents.chat.runtime.referenced_chat_context.models import ReferencedChatTurn
 pytestmark = pytest.mark.unit
@ -77,9 +77,7 @@ def test_oversized_single_turn_is_partially_filled_to_use_budget(
 ) -> None:
    monkeypatch.setattr(transcript_mod, "_MAX_CHARS_PER_REFERENCE", 40)
-    block = render_referenced_chats_block(
+    block = render_referenced_chats_block([_chat(1, "T", [("assistant", "x" * 500)])])
        [_chat(1, "T", [("assistant", "x" * 500)])]
    )
    assert block is not None
    # The turn is too big to keep whole, so its tail fills the budget with a
--- a/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/test_llm_config_sanitizer.py
@ -3,13 +3,28 @@
 from __future__ import annotations
 import pytest
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, SystemMessage
 from app.agents.chat.runtime.llm_config import _sanitize_messages
 pytestmark = pytest.mark.unit
 def test_sanitize_messages_drops_whitespace_only_system_text_block() -> None:
    # Mirrors TodoListMiddleware appending ``{"type":"text","text":"\n\n"}`` to
    # the system message: Anthropic rejects whitespace-only system blocks.
    original = SystemMessage(
        content=[
            {"type": "text", "text": "real system prompt"},
            {"type": "text", "text": "\n\n"},
        ]
    )
    sanitized = _sanitize_messages([original])
    assert sanitized[0].content == "real system prompt"
 def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
    original = AIMessage(
        content=[
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/shared/test_todos_mw.py
@ -0,0 +1,67 @@
 """Regression tests for ``build_todos_mw``.
 langchain's ``TodoListMiddleware.(a)wrap_model_call`` always appends a system
 text block ``f"\\n\\n{self.system_prompt}"``. With an empty ``system_prompt``
 that block is whitespace-only (``"\\n\\n"``), which Anthropic rejects:
 ``"system: text content blocks must contain non-whitespace text"``. The main
 agent supplies its own todo guidance and wants the tool only, so an empty
 prompt must NOT mutate the request's system message.
 """
 from __future__ import annotations
 import pytest
 from langchain.agents.middleware import TodoListMiddleware
 from app.agents.chat.multi_agent_chat.shared.middleware.todos import (
    _ToolOnlyTodoListMiddleware,
    build_todos_mw,
 )
 pytestmark = pytest.mark.unit
 class _Request:
    def __init__(self) -> None:
        self.override_called = False
    def override(self, **_kwargs: object) -> _Request:
        self.override_called = True
        return self
@pytest.mark.parametrize("blank", ["", "   ", "\n\n"])
 def test_blank_prompt_returns_tool_only_middleware(blank: str) -> None:
    mw = build_todos_mw(system_prompt=blank)
    assert isinstance(mw, _ToolOnlyTodoListMiddleware)
    # Still contributes the write_todos tool.
    assert any(getattr(t, "name", None) == "write_todos" for t in mw.tools)
 async def test_tool_only_middleware_does_not_touch_system_message() -> None:
    mw = build_todos_mw(system_prompt="")
    request = _Request()
    captured: dict[str, object] = {}
    async def handler(req: _Request) -> str:
        captured["req"] = req
        return "ok"
    result = await mw.awrap_model_call(request, handler)
    assert result == "ok"
    assert captured["req"] is request
    assert request.override_called is False
 def test_custom_prompt_uses_upstream_middleware() -> None:
    mw = build_todos_mw(system_prompt="custom todo guidance")
    assert isinstance(mw, TodoListMiddleware)
    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
    assert mw.system_prompt == "custom todo guidance"
 def test_none_prompt_uses_upstream_default() -> None:
    mw = build_todos_mw()
    assert isinstance(mw, TodoListMiddleware)
    assert not isinstance(mw, _ToolOnlyTodoListMiddleware)
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
@ -49,7 +49,9 @@ def test_wraps_in_web_results_container() -> None:
    assert block.startswith("<web_results>")
    assert block.endswith("</web_results>")
    assert "cite a result with its [n]" in block
-    assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    assert (
        '<document title="Example" source="Web · example.com" view="excerpt">' in block
    )
    assert "[1] the answer is 42" in block
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
@ -32,9 +32,10 @@ def test_maps_identity_source_and_passages() -> None:
    assert document.title == "Q3 Launch Notes"
    assert document.source == "Slack"
-    assert [
+    assert [(p.locator["chunk_id"], p.content) for p in document.passages] == [
-        (p.locator["chunk_id"], p.content) for p in document.passages
+        (880, "a"),
-    ] == [(880, "a"), (881, "b")]
+        (881, "b"),
    ]
    assert all(p.locator["document_id"] == 42 for p in document.passages)
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
@ -23,7 +23,11 @@ def _hit(document_id: int, chunk_id: int) -> DocumentHit:
        document_type="FILE",
        metadata={},
        score=1.0 / document_id,
-        chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
+        chunks=[
            ChunkHit(
                chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0
            )
        ],
    )
--- a/surfsense_backend/tests/unit/services/test_model_connections.py
+++ b/surfsense_backend/tests/unit/services/test_model_connections.py
@ -1,5 +1,49 @@
 from app.services.global_model_catalog import materialize_global_model_catalog
-from app.services.model_resolver import ensure_v1, to_litellm
+from app.services.model_resolver import ensure_v1, strip_version_suffix, to_litellm
 def test_anthropic_resolver_strips_trailing_v1_from_api_base() -> None:
    # LiteLLM's Anthropic handler appends ``/v1/messages``; a base URL ending in
    # ``/v1`` (the frontend default) would otherwise yield ``/v1/v1/messages``.
    model, kwargs = to_litellm(
        {
            "provider": "anthropic",
            "base_url": "https://api.anthropic.com/v1",
            "api_key": "sk-ant-test",
            "extra": {},
        },
        "claude-opus-4-8",
    )
    assert model == "anthropic/claude-opus-4-8"
    assert kwargs["api_base"] == "https://api.anthropic.com"
 def test_anthropic_resolver_keeps_root_api_base() -> None:
    _model, kwargs = to_litellm(
        {
            "provider": "anthropic",
            "base_url": "https://api.anthropic.com",
            "api_key": "sk-ant-test",
            "extra": {},
        },
        "claude-opus-4-8",
    )
    assert kwargs["api_base"] == "https://api.anthropic.com"
 def test_strip_version_suffix() -> None:
    assert strip_version_suffix("https://api.anthropic.com/v1") == (
        "https://api.anthropic.com"
    )
    assert strip_version_suffix("https://api.anthropic.com/v1/") == (
        "https://api.anthropic.com"
    )
    assert strip_version_suffix("https://api.anthropic.com") == (
        "https://api.anthropic.com"
    )
    assert strip_version_suffix(None) is None
 def test_openai_compatible_resolver_uses_explicit_api_base() -> None:
--- a/surfsense_web/components/providers/ZeroProvider.tsx
+++ b/surfsense_web/components/providers/ZeroProvider.tsx
@ -6,7 +6,7 @@ import {
 	ZeroProvider as ZeroReactProvider,
 } from "@rocicorp/zero/react";
 import { usePathname } from "next/navigation";
-import { useEffect, useMemo, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
 import { useSession } from "@/hooks/use-session";
 import { getDesktopAccessToken } from "@/lib/auth-fetch";
 import { handleUnauthorized, isPublicRoute, refreshSession } from "@/lib/auth-utils";
@ -54,30 +54,74 @@ async function fetchZeroContext(isDesktop: boolean): Promise<LoadedZeroContext |
 	};
 }
 // Cap how many times we will refresh the session in response to Zero's
 // `needs-auth` state before giving up. Without this, a persistent auth failure
 // in zero-cache makes the connection cycle needs-auth -> connecting -> needs-auth
 // indefinitely, each cycle firing a `/auth/jwt/refresh` and quickly tripping the
 // backend rate limiter (HTTP 429).
 const MAX_ZERO_AUTH_REFRESH_ATTEMPTS = 3;
 const ZERO_AUTH_REFRESH_BASE_DELAY_MS = 1_000;
 const ZERO_AUTH_REFRESH_MAX_DELAY_MS = 30_000;
 function ZeroAuthSync({ isDesktop }: { isDesktop: boolean }) {
 	const zero = useZero();
 	const connectionState = useConnectionState();
 	const refreshAttemptsRef = useRef(0);
 	const refreshInFlightRef = useRef(false);
 	// Once a connection is established, clear the backoff so future
 	// auth expirations get a fresh set of refresh attempts.
 	useEffect(() => {
 		if (connectionState.name === "connected") {
 			refreshAttemptsRef.current = 0;
 		}
 	}, [connectionState.name]);
 	useEffect(() => {
 		if (connectionState.name !== "needs-auth") return;
 		if (refreshInFlightRef.current) return;
-		refreshSession().then(async (refreshed) => {
+		if (refreshAttemptsRef.current >= MAX_ZERO_AUTH_REFRESH_ATTEMPTS) {
-			if (!refreshed) {
+			handleUnauthorized();
-				handleUnauthorized();
+			return;
-				return;
+		}
 			}
-			if (isDesktop) {
+		const attempt = refreshAttemptsRef.current;
-				const newToken = await getDesktopAccessToken();
+		const delayMs =
-				if (!newToken) {
+			attempt === 0
-					handleUnauthorized();
+				? 0
-					return;
+				: Math.min(
-				}
+						ZERO_AUTH_REFRESH_BASE_DELAY_MS * 2 ** (attempt - 1),
-				zero.connection.connect({ auth: newToken });
+						ZERO_AUTH_REFRESH_MAX_DELAY_MS
-			} else {
+					);
-				zero.connection.connect();
+
-			}
+		refreshInFlightRef.current = true;
-		});
+		const timer = setTimeout(() => {
 			refreshAttemptsRef.current += 1;
 			refreshSession()
 				.then(async (refreshed) => {
 					if (!refreshed) {
 						handleUnauthorized();
 						return;
 					}
 					if (isDesktop) {
 						const newToken = await getDesktopAccessToken();
 						if (!newToken) {
 							handleUnauthorized();
 							return;
 						}
 						zero.connection.connect({ auth: newToken });
 					} else {
 						zero.connection.connect();
 					}
 				})
 				.finally(() => {
 					refreshInFlightRef.current = false;
 				});
 		}, delayMs);
 		return () => clearTimeout(timer);
 	}, [connectionState.name, isDesktop, zero]);
 	useEffect(() => {