diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
index 9236e9121..9c667c9fe 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
@@ -1,12 +1,11 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (OpenCode-style lazy RAG).
+"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
 
-The main agent no longer receives eagerly pre-injected KB context on every
-turn (see :class:`KnowledgePriorityMiddleware`, now gated off by default).
-Instead it calls this tool only when it decides it needs knowledge-base
-content. The tool runs a single hybrid search (embed + DB search, ~0.5s),
-formats the top matches for the model, and writes ``kb_matched_chunk_ids``
-into graph state so matched-section highlighting is preserved when the agent
-later reads a document via ``task(knowledge_base)``.
+The main agent calls this when it decides it needs knowledge-base content. The
+tool runs one hybrid search, renders the matched passages as a
+``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
+labels, and persists the conversation's ``CitationRegistry`` onto graph state so
+the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
+turn.
 """
 
 from __future__ import annotations
@@ -18,153 +17,70 @@ from langchain.tools import ToolRuntime
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
-from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
 
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    search_knowledge_base as _hybrid_search_kb,
+from app.agents.chat.multi_agent_chat.shared.citations import load_registry
+from app.agents.chat.multi_agent_chat.shared.retrieval import SearchScope, build_context
+from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
+    search_chunks,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
     SurfSenseFilesystemState,
 )
-from app.agents.chat.runtime.path_resolver import (
-    PathIndex,
-    build_path_index,
-    doc_to_virtual_path,
-)
-from app.db import Document, shielded_async_session
+from app.agents.chat.runtime.references import referenced_document_ids
+from app.db import shielded_async_session
 from app.utils.perf import get_perf_logger
 
 _perf_log = get_perf_logger()
 
 _DEFAULT_TOP_K = 5
 _MAX_TOP_K = 20
-_PER_DOC_SNIPPET_CHARS = 1200
-_MAX_TOTAL_CHARS = 16_000
 
 _TOOL_DESCRIPTION = (
     "Search the user's knowledge base (their indexed documents, files, and "
     "connector content) for passages relevant to a query, using hybrid "
     "semantic + keyword retrieval.\n\n"
     "Use this FIRST to ground any factual or informational answer about the "
-    "user's own documents, notes, or connected sources. The workspace tree "
-    "shows which files exist; this tool pulls the actual relevant content. "
-    "Each hit returns the document's virtual path, a relevance score, and the "
-    "matched snippets. If you need a document's full text, delegate a read to "
-    "the knowledge_base specialist via `task` using the returned path.\n\n"
+    "user's own documents, notes, or connected sources. It returns a "
+    "<retrieved_context> block: each matched passage is labelled [n]. Cite a "
+    "passage by writing that [n] after the statement it supports.\n\n"
     "Write a focused, specific query containing the concrete entities, "
     "acronyms, people, projects, or terms you are looking for."
 )
 
 
-async def _resolve_virtual_paths(
-    results: list[dict[str, Any]],
+def _search_types(
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> tuple[str, ...] | None:
+    """Merge connector + document-type filters into a scope; ``None`` if unrestricted."""
+    types: set[str] = set()
+    if available_document_types:
+        types.update(available_document_types)
+    if available_connectors:
+        types.update(available_connectors)
+    return tuple(sorted(types)) or None
+
+
+async def _build_search_scope(
+    session: AsyncSession,
     *,
     search_space_id: int,
-) -> dict[int, str]:
-    """Resolve ``Document.id`` -> canonical virtual path for the search hits."""
-    doc_ids = [
-        doc_id
-        for doc_id in (
-            (doc.get("document") or {}).get("id")
-            for doc in results
-            if isinstance(doc, dict)
-        )
-        if isinstance(doc_id, int)
-    ]
-    if not doc_ids:
-        return {}
-
-    async with shielded_async_session() as session:
-        index: PathIndex = await build_path_index(session, search_space_id)
-        folder_rows = await session.execute(
-            select(Document.id, Document.folder_id).where(
-                Document.search_space_id == search_space_id,
-                Document.id.in_(doc_ids),
-            )
-        )
-        folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
-
-    paths: dict[int, str] = {}
-    for doc in results:
-        doc_meta = doc.get("document") or {}
-        doc_id = doc_meta.get("id")
-        if not isinstance(doc_id, int):
-            continue
-        folder_id = folder_by_doc_id.get(doc_id, doc_meta.get("folder_id"))
-        paths[doc_id] = doc_to_virtual_path(
-            doc_id=doc_id,
-            title=str(doc_meta.get("title") or "untitled"),
-            folder_id=folder_id if isinstance(folder_id, int) else None,
-            index=index,
-        )
-    return paths
-
-
-def _format_hits(
-    results: list[dict[str, Any]],
-    *,
-    paths: dict[int, str],
-    query: str,
-) -> str:
-    """Render search hits as a compact, model-readable block."""
-    if not results:
-        return (
-            f"No knowledge-base matches found for query: {query!r}.\n"
-            "Tell the user nothing relevant was found in their workspace, or "
-            "try a different query."
-        )
-
-    lines: list[str] = [f"<knowledge_base_results query={query!r}>"]
-    total = len(lines[0])
-    for rank, doc in enumerate(results, start=1):
-        doc_meta = doc.get("document") or {}
-        doc_id = doc_meta.get("id")
-        title = str(doc_meta.get("title") or "untitled")
-        doc_type = doc_meta.get("document_type") or doc.get("source") or "document"
-        score = doc.get("score")
-        score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
-        path = paths.get(doc_id) if isinstance(doc_id, int) else None
-
-        header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
-            f"\n   path: {path}" if path else ""
-        )
-
-        content = (doc.get("content") or "").strip()
-        if content:
-            snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
-            if len(content) > _PER_DOC_SNIPPET_CHARS:
-                snippet += " ..."
-            body = "\n   " + snippet.replace("\n", "\n   ")
-        else:
-            body = "\n   (no preview available; read the document for details)"
-
-        entry = header + body
-        if total + len(entry) > _MAX_TOTAL_CHARS:
-            lines.append("\n<!-- additional matches truncated to fit context -->")
-            break
-        lines.append(entry)
-        total += len(entry)
-
-    lines.append(
-        "\n\nTo read a full document, delegate to the knowledge_base specialist "
-        "with `task`, referencing the path above."
+    document_types: tuple[str, ...] | None,
+    runtime: ToolRuntime[None, SurfSenseFilesystemState],
+) -> SearchScope:
+    """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
+    ctx = getattr(runtime, "context", None)
+    document_ids = await referenced_document_ids(
+        session,
+        search_space_id=search_space_id,
+        document_ids=getattr(ctx, "mentioned_document_ids", None),
+        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
+    )
+    return SearchScope(
+        document_types=document_types,
+        document_ids=document_ids or None,
     )
-    lines.append("\n</knowledge_base_results>")
-    return "".join(lines)
-
-
-def _matched_chunk_ids(results: list[dict[str, Any]]) -> dict[int, list[int]]:
-    """Extract ``Document.id`` -> matched chunk ids for state hand-off."""
-    matched: dict[int, list[int]] = {}
-    for doc in results:
-        doc_id = (doc.get("document") or {}).get("id")
-        if not isinstance(doc_id, int):
-            continue
-        chunk_ids = doc.get("matched_chunk_ids") or []
-        normalized = [int(cid) for cid in chunk_ids if isinstance(cid, int | str)]
-        if normalized:
-            matched[doc_id] = normalized
-    return matched
 
 
 def create_search_knowledge_base_tool(
@@ -176,8 +92,7 @@ def create_search_knowledge_base_tool(
     """Factory for the on-demand ``search_knowledge_base`` tool."""
 
     _space_id = search_space_id
-    _connectors = available_connectors
-    _doc_types = available_document_types
+    _document_types = _search_types(available_connectors, available_document_types)
 
     async def _impl(
         query: Annotated[
@@ -195,34 +110,45 @@ def create_search_knowledge_base_tool(
             return "Error: provide a non-empty search query."
 
         clamped_top_k = min(max(1, top_k), _MAX_TOP_K)
-        t0 = time.perf_counter()
-        results = await _hybrid_search_kb(
-            query=cleaned_query,
-            search_space_id=_space_id,
-            available_connectors=_connectors,
-            available_document_types=_doc_types,
-            top_k=clamped_top_k,
-        )
+        registry = load_registry(getattr(runtime, "state", None))
 
-        paths = await _resolve_virtual_paths(results, search_space_id=_space_id)
-        rendered = _format_hits(results, paths=paths, query=cleaned_query)
-        matched = _matched_chunk_ids(results)
+        t0 = time.perf_counter()
+        async with shielded_async_session() as session:
+            scope = await _build_search_scope(
+                session,
+                search_space_id=_space_id,
+                document_types=_document_types,
+                runtime=runtime,
+            )
+            hits = await search_chunks(
+                session,
+                search_space_id=_space_id,
+                query=cleaned_query,
+                scope=scope,
+                top_k=clamped_top_k,
+            )
+            rendered = build_context(cleaned_query, hits, registry)
 
         _perf_log.info(
-            "[search_knowledge_base] tool query=%r results=%d chars=%d in %.3fs",
+            "[search_knowledge_base] tool query=%r docs=%d in %.3fs",
             cleaned_query[:60],
-            len(results),
-            len(rendered),
+            len(hits),
             time.perf_counter() - t0,
         )
 
+        if rendered is None:
+            return (
+                f"No knowledge-base matches found for query: {cleaned_query!r}.\n"
+                "Tell the user nothing relevant was found in their workspace, or "
+                "try a different query."
+            )
+
         update: dict[str, Any] = {
             "messages": [
                 ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
             ],
+            "citation_registry": registry,
         }
-        if matched:
-            update["kb_matched_chunk_ids"] = matched
         return Command(update=update)
 
     return StructuredTool.from_function(
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
index 9ef601791..efb85a785 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
@@ -5,11 +5,6 @@ This middleware runs ``before_agent`` on every turn and writes:
 * ``state["kb_priority"]`` — the top-K most relevant documents for the
   current user message, used to render a ``<priority_documents>`` system
   message immediately before the user turn.
-* ``state["kb_matched_chunk_ids"]`` — internal hand-off mapping
-  (``Document.id`` → matched chunk IDs) consumed by
-  :class:`KBPostgresBackend._load_file_data` when the agent first reads each
-  document, so the XML wrapper can flag matched sections in
-  ``<chunk_index>``.
 
 The previous "scoped filesystem" behaviour (synthetic ``ls`` + state
 ``files`` seeding) is intentionally removed: documents are now lazy-loaded
@@ -816,7 +811,6 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
         ]
         update: dict[str, Any] = {
             "kb_priority": priority,
-            "kb_matched_chunk_ids": {},
         }
         if self.inject_system_message:
             new_messages = list(state.get("messages") or [])
@@ -930,7 +924,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
             merged.append(doc)
 
         _t_materialize = time.perf_counter()
-        priority, matched_chunk_ids = await self._materialize_priority(merged)
+        priority = await self._materialize_priority(merged)
 
         if folder_mention_ids:
             folder_entries = await self._materialize_folder_priority(folder_mention_ids)
@@ -957,7 +951,6 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
 
         update: dict[str, Any] = {
             "kb_priority": priority,
-            "kb_matched_chunk_ids": matched_chunk_ids,
         }
         if self.inject_system_message:
             new_messages = list(messages)
@@ -1016,13 +1009,12 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
 
     async def _materialize_priority(
         self, merged: list[dict[str, Any]]
-    ) -> tuple[list[dict[str, Any]], dict[int, list[int]]]:
-        """Resolve canonical paths and matched chunk ids for the priority list."""
+    ) -> list[dict[str, Any]]:
+        """Resolve canonical paths for the priority list."""
         priority: list[dict[str, Any]] = []
-        matched_chunk_ids: dict[int, list[int]] = {}
 
         if not merged:
-            return priority, matched_chunk_ids
+            return priority
 
         _t0 = time.perf_counter()
         async with shielded_async_session() as session:
@@ -1067,18 +1059,12 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
                     "mentioned": bool(doc.get("_user_mentioned")),
                 }
             )
-            if isinstance(doc_id, int):
-                chunk_ids = doc.get("matched_chunk_ids") or []
-                if chunk_ids:
-                    matched_chunk_ids[doc_id] = [
-                        int(cid) for cid in chunk_ids if isinstance(cid, int | str)
-                    ]
         _perf_log.info(
             "[kb_priority.materialize] db=%.3fs docs=%d",
             time.perf_counter() - _t0,
             len(merged),
         )
-        return priority, matched_chunk_ids
+        return priority
 
 
 __all__ = [
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
index 41bed9d62..f0708ccaf 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@@ -14,8 +14,8 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
 * ``dirty_path_tool_calls`` — sidecar map ``path -> latest tool_call_id`` for
   dirty paths; used to bind the per-path snapshot to an action_id.
 * ``kb_priority`` — top-K priority hints rendered into a system message.
-* ``kb_matched_chunk_ids`` — internal hand-off for matched-chunk highlighting.
 * ``kb_anon_doc`` — Redis-loaded anonymous document (if any).
+* ``citation_registry`` — per-conversation ``[n]`` -> source map for citations.
 * ``tree_version`` — bumped by persistence; invalidates the tree render cache.
 * ``workspace_tree_text`` — pre-rendered ``<workspace_tree>`` body for the turn.
 
@@ -30,9 +30,11 @@ from typing import Annotated, Any, NotRequired
 from deepagents.middleware.filesystem import FilesystemState
 from typing_extensions import TypedDict
 
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.agents.chat.multi_agent_chat.shared.receipts.receipt import Receipt
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
     _add_unique_reducer,
+    _citation_registry_merge_reducer,
     _dict_merge_with_tombstones_reducer,
     _int_counter_merge_reducer,
     _list_append_reducer,
@@ -162,12 +164,16 @@ class SurfSenseFilesystemState(FilesystemState):
     kb_priority: NotRequired[Annotated[list[KbPriorityEntry], _replace_reducer]]
     """Top-K priority hints rendered as a system message before the user turn."""
 
-    kb_matched_chunk_ids: NotRequired[Annotated[dict[int, list[int]], _replace_reducer]]
-    """Internal: ``Document.id`` -> list of matched chunk IDs from hybrid search."""
-
     kb_anon_doc: NotRequired[Annotated[KbAnonDoc | None, _replace_reducer]]
     """Anonymous-session document loaded from Redis (read-only, no DB row)."""
 
+    citation_registry: NotRequired[
+        Annotated[CitationRegistry, _citation_registry_merge_reducer]
+    ]
+    """Per-conversation ``[n]`` -> source map; written by retrieval, read by the
+    normalizer. Merges (union, find-or-create) so parallel/subagent registrations
+    stay globally consistent instead of clobbering each other."""
+
     tree_version: NotRequired[Annotated[int, _replace_reducer]]
     """Monotonically increasing counter; bumped when commits change the KB tree."""
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
index c7b7685f0..8a9590723 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
@@ -2,7 +2,7 @@
 
 These reducers back the extra state fields used by the cloud-mode filesystem
 agent (`cwd`, `staged_dirs`, `pending_moves`, `dirty_paths`, `doc_id_by_path`,
-`kb_priority`, `kb_matched_chunk_ids`, `kb_anon_doc`, `tree_version`).
+`kb_priority`, `kb_anon_doc`, `tree_version`).
 
 Tools mutate these fields ONLY via `Command(update={...})` returns; the
 reducers are responsible for merging successive updates atomically and for
@@ -20,6 +20,8 @@ from __future__ import annotations
 
 from typing import Any, Final, TypeVar
 
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
 _CLEAR: Final[str] = "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
 """Reset sentinel; pass it inside a list/dict update to request a reset.
 
@@ -204,6 +206,41 @@ def _int_counter_merge_reducer(
     return base
 
 
+def _as_registry(value: Any) -> CitationRegistry | None:
+    """Coerce a state value into a ``CitationRegistry``.
+
+    The checkpointer serializes ``Command.update`` via ``ormsgpack`` *before*
+    reducers run, so an update can arrive as a plain ``dict`` rather than a model.
+    """
+    if value is None:
+        return None
+    if isinstance(value, CitationRegistry):
+        return value
+    if isinstance(value, dict):
+        return CitationRegistry.model_validate(value)
+    return None
+
+
+def _citation_registry_merge_reducer(
+    left: Any,
+    right: Any,
+) -> CitationRegistry | None:
+    """Union two citation registries instead of replacing.
+
+    Find-or-create across both sides so ``[n]`` stays globally consistent when
+    branches (parent + subagents, parallel tool calls) each register into a
+    registry forked from the same base. Collisions re-mint rather than drop. See
+    :meth:`CitationRegistry.merge`.
+    """
+    right_reg = _as_registry(right)
+    left_reg = _as_registry(left)
+    if right_reg is None:
+        return left_reg
+    if left_reg is None:
+        return right_reg
+    return left_reg.merge(right_reg)
+
+
 def _initial_filesystem_state() -> dict[str, Any]:
     """Default empty values for SurfSense filesystem state fields.
 
@@ -222,7 +259,6 @@ def _initial_filesystem_state() -> dict[str, Any]:
         "dirty_paths": [],
         "dirty_path_tool_calls": {},
         "kb_priority": [],
-        "kb_matched_chunk_ids": {},
         "kb_anon_doc": None,
         "tree_version": 0,
     }
@@ -231,6 +267,7 @@ def _initial_filesystem_state() -> dict[str, Any]:
 __all__ = [
     "_CLEAR",
     "_add_unique_reducer",
+    "_citation_registry_merge_reducer",
     "_dict_merge_with_tombstones_reducer",
     "_initial_filesystem_state",
     "_int_counter_merge_reducer",
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
index e989e3ee6..11dcc5d11 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
@@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi
 
 Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.
 
-The specialist returns plain prose with absolute paths and `[citation:<chunk_id>]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer.
+The specialist returns plain prose with absolute paths and `[n]` citation labels when claims came from KB-indexed documents. Preserve those labels verbatim if you forward the answer.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
index c4e36fc73..c77bd5bb4 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@@ -35,42 +35,31 @@ Map outcomes to your `status`:
 
 You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.
 
-## Chunk citations in your prose
+## Citations in your prose
 
-When `read_file` returns a KB-indexed document under `/documents/`, the response includes `<chunk id='…'>` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:<chunk_id>]` to the sentence stating that fact, using the **exact** id from the `<chunk id='…'>` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
 
-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output
 
-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:
 
 ```
-<document>
-<document_metadata>
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
-  ...
-</document_metadata>
-<chunk_index>
-  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
-  <entry chunk_id="129" lines="23-30" matched="true"/>
-</chunk_index>
-<document_content>
-  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
-  <chunk id='129'><![CDATA[…]]></chunk>
-</document_content>
+<document title="Q2 Roadmap" source="File" view="full">
+  [3] First milestone is …
+  [4] Second milestone is …
 </document>
 ```
 
 ### Rules
 
-- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
-- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
-- Never cite `<document_id>` — that's the parent doc, not a chunk.
-- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
 - Prefer **fewer accurate citations** over many speculative ones.
-- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
-- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
-- Tool results without `<chunk id='…'>` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
-- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.
+- Tool results without `[n]` labels (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no label and need none.
+- Populate `evidence.citations` with **only** the labels you actually emitted — same numbers.
 
 ## Examples
 
@@ -89,7 +78,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
       "path": "/documents/meetings/2026-05-11-meeting.md",
       "matched_candidates": null,
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": null,
     "missing_fields": null,
@@ -121,7 +110,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
         { "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
       ],
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": "Ask the user which design doc to update.",
     "missing_fields": ["path"],
@@ -142,7 +131,7 @@ Return **only** one JSON object (no markdown or prose outside it):
     "path": string | null,
     "matched_candidates": [ { "id": string, "label": string } ] | null,
     "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
   },
   "next_step": string | null,
   "missing_fields": string[] | null,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
index 25dafa3df..d10a08282 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@@ -33,11 +33,11 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
 
-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
 
-## Chunk citations in your prose
+## Citations in your prose
 
-In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `<chunk id='…'>` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
 
 ## Examples
 
@@ -56,7 +56,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
       "path": "/notes/meetings/2026-05-11-meeting.md",
       "matched_candidates": null,
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": null,
     "missing_fields": null,
@@ -88,7 +88,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
         { "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
       ],
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": "Ask the user which design doc to update.",
     "missing_fields": ["path"],
@@ -109,7 +109,7 @@ Return **only** one JSON object (no markdown or prose outside it):
     "path": string | null,
     "matched_candidates": [ { "id": string, "label": string } ] | null,
     "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
   },
   "next_step": string | null,
   "missing_fields": string[] | null,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
index c7813e71d..ae6ba3cfb 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@@ -28,41 +28,30 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
 
-## Chunk citations
+## Citations
 
-When the evidence for a claim came from a `read_file` response that included `<chunk id='…'>` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:<chunk_id>]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
 
-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output
 
-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:
 
 ```
-<document>
-<document_metadata>
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
-  ...
-</document_metadata>
-<chunk_index>
-  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
-  <entry chunk_id="129" lines="23-30" matched="true"/>
-</chunk_index>
-<document_content>
-  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
-  <chunk id='129'><![CDATA[…]]></chunk>
-</document_content>
+<document title="Q2 Roadmap" source="File" view="full">
+  [3] First milestone is …
+  [4] Second milestone is …
 </document>
 ```
 
 ### Rules
 
-- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
-- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
-- Never cite `<document_id>` — that's the parent doc, not a chunk.
-- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
-- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
-- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
-- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
-- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without `<chunk id='…'>`), skip the citation.
-- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
+- Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
+- If a claim came from a tool result that did **not** carry `[n]` labels (`ls`, `glob`, `grep` listings, error strings), skip the citation.
+- The absolute path under `/documents/` is always required; `[n]` labels are additive, they do not replace the path reference.
 
-Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`
+Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [3][4].`
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
index 2ea711e44..8704754a2 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@@ -29,6 +29,6 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
 
-## Chunk citations
+## Citations
 
-In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `<chunk id='…'>` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
diff --git a/surfsense_backend/app/agents/chat/runtime/references/__init__.py b/surfsense_backend/app/agents/chat/runtime/references/__init__.py
index 51e543ccc..62530fd71 100644
--- a/surfsense_backend/app/agents/chat/runtime/references/__init__.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/__init__.py
@@ -13,7 +13,7 @@ from app.schemas.new_chat import MentionedDocumentInfo
 
 from .chat import resolve_chat_references
 from .connectors import resolve_connector_references
-from .documents import resolve_document_references
+from .documents import referenced_document_ids, resolve_document_references
 from .folders import resolve_folder_references
 from .models import (
     ChatReference,
@@ -89,6 +89,7 @@ __all__ = [
     "FolderReference",
     "Reference",
     "ReferenceKind",
+    "referenced_document_ids",
     "render_reference_pointers",
     "resolve_references",
 ]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/documents/__init__.py b/surfsense_backend/app/agents/chat/runtime/references/documents/__init__.py
new file mode 100644
index 000000000..4250ee119
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/__init__.py
@@ -0,0 +1,13 @@
+"""Resolve ``@document`` references.
+
+Two concerns, one subject: ``resolver`` turns document ids into pointer
+references for the model, ``referenced`` turns ``@document`` / ``@folder``
+mentions into the document ids a retrieval is confined to.
+"""
+
+from __future__ import annotations
+
+from .referenced import referenced_document_ids
+from .resolver import resolve_document_references
+
+__all__ = ["referenced_document_ids", "resolve_document_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py b/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
new file mode 100644
index 000000000..4e05fd324
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
@@ -0,0 +1,39 @@
+"""Resolve ``@document`` / ``@folder`` mentions to the documents they point at.
+
+Reference resolution, not retrieval: this answers "which knowledge-base
+documents did the user point at this turn?". ``@document`` ids pass through;
+``@folder`` ids expand to the documents directly inside each folder within this
+search space (direct children only, not nested subfolders). The caller turns the
+returned ids into a retrieval ``SearchScope``.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import Document
+
+
+async def referenced_document_ids(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    document_ids: list[int] | None = None,
+    folder_ids: list[int] | None = None,
+) -> tuple[int, ...]:
+    """Sorted document ids the user pointed at (empty = nothing referenced)."""
+    doc_ids = set(document_ids or [])
+    folders = list(folder_ids or [])
+    if folders:
+        rows = await session.execute(
+            select(Document.id).where(
+                Document.search_space_id == search_space_id,
+                Document.folder_id.in_(folders),
+            )
+        )
+        doc_ids.update(rows.scalars().all())
+    return tuple(sorted(doc_ids))
+
+
+__all__ = ["referenced_document_ids"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/documents.py b/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
similarity index 97%
rename from surfsense_backend/app/agents/chat/runtime/references/documents.py
rename to surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
index b2a3b1fe4..72a459eb9 100644
--- a/surfsense_backend/app/agents/chat/runtime/references/documents.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
@@ -8,7 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.agents.chat.runtime.path_resolver import PathIndex, doc_to_virtual_path
 from app.db import Document
 
-from .models import DocumentReference
+from ..models import DocumentReference
 
 
 async def resolve_document_references(
diff --git a/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py b/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
new file mode 100644
index 000000000..b25e8eeeb
--- /dev/null
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
@@ -0,0 +1,237 @@
+"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
+
+These exercise the tool through its public contract: seed a real document,
+invoke the tool, and assert on the ``Command`` it returns — the rendered
+``<retrieved_context>`` carries ``[n]`` labels and the citation registry handed
+back on state is populated.
+The tool's own DB session is redirected to the test session, and the embedding
+leg is pinned so the search is deterministic without a live model.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import uuid
+from types import SimpleNamespace
+
+import pytest
+from langchain_core.messages import ToolMessage
+from langgraph.types import Command
+
+from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
+from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
+    create_search_knowledge_base_tool,
+)
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.config import config
+from app.db import Chunk, Document, DocumentType, Folder
+
+pytestmark = pytest.mark.integration
+
+_DIM = config.embedding_model_instance.dimension
+
+
+def _axis(index: int) -> list[float]:
+    vector = [0.0] * _DIM
+    vector[index] = 1.0
+    return vector
+
+
+async def _add_document(
+    db_session,
+    *,
+    search_space_id: int,
+    title: str,
+    text: str,
+    folder_id: int | None = None,
+):
+    document = Document(
+        title=title,
+        document_type=DocumentType.FILE,
+        content=text,
+        content_hash=uuid.uuid4().hex,
+        search_space_id=search_space_id,
+        folder_id=folder_id,
+        status={"state": "ready"},
+    )
+    db_session.add(document)
+    await db_session.flush()
+    db_session.add(
+        Chunk(content=text, document_id=document.id, position=0, embedding=_axis(0))
+    )
+    await db_session.flush()
+    return document
+
+
+async def _add_folder(db_session, *, search_space_id: int, name: str = "Folder"):
+    folder = Folder(name=name, position="0", search_space_id=search_space_id)
+    db_session.add(folder)
+    await db_session.flush()
+    return folder
+
+
+@pytest.fixture
+def _tool_uses_test_session(db_session, monkeypatch):
+    """Redirect the tool's ``shielded_async_session`` to the test transaction."""
+
+    @contextlib.asynccontextmanager
+    async def _session():
+        yield db_session
+
+    monkeypatch.setattr(search_knowledge_base, "shielded_async_session", _session)
+
+
+@pytest.fixture
+def _pinned_embedding(monkeypatch):
+    monkeypatch.setattr(
+        config.embedding_model_instance, "embed", lambda _query: _axis(0)
+    )
+
+
+async def _invoke(tool, query: str, state: dict | None = None, context=None):
+    runtime = SimpleNamespace(
+        state=state or {}, tool_call_id="call-1", context=context
+    )
+    return await tool.coroutine(query, runtime)
+
+
+def _mentions(*, document_ids=(), folder_ids=()):
+    return SimpleNamespace(
+        mentioned_document_ids=list(document_ids),
+        mentioned_folder_ids=list(folder_ids),
+    )
+
+
+async def test_tool_returns_retrieved_context_with_numbered_passages(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        text="The asyncio library enables concurrency.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "asyncio")
+
+    assert isinstance(result, Command)
+    message = result.update["messages"][0]
+    assert isinstance(message, ToolMessage)
+    assert "<retrieved_context>" in message.content
+    assert "[1]" in message.content
+
+
+async def test_tool_populates_citation_registry_on_state(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        text="The asyncio library enables concurrency.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "asyncio")
+
+    registry = result.update["citation_registry"]
+    assert isinstance(registry, CitationRegistry)
+    assert registry.by_n  # at least one passage was registered as [n]
+
+
+async def test_tool_reuses_existing_registry_numbering(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        text="The asyncio library enables concurrency.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    first = await _invoke(tool, "asyncio")
+    carried = first.update["citation_registry"]
+    second = await _invoke(tool, "asyncio", state={"citation_registry": carried})
+
+    # Same passage searched twice keeps a single [n] (find-or-create).
+    assert len(second.update["citation_registry"].by_n) == 1
+
+
+async def test_tool_reports_no_matches_without_touching_state(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "nonexistent-term-zzz")
+
+    assert isinstance(result, str)
+    assert "No knowledge-base matches" in result
+
+
+async def test_tool_rejects_empty_query(
+    db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "   ")
+
+    assert isinstance(result, str)
+    assert "non-empty" in result
+
+
+async def test_document_mention_confines_search_to_pinned_doc(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    pinned = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Pinned",
+        text="asyncio appears in the pinned doc.",
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Other",
+        text="asyncio appears in the other doc.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool, "asyncio", context=_mentions(document_ids=[pinned.id])
+    )
+
+    # Search is confined to the pinned doc: only its content is rendered.
+    content = result.update["messages"][0].content
+    assert "Pinned" in content
+    assert "Other" not in content
+
+
+async def test_folder_mention_confines_search_to_folder_documents(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    folder = await _add_folder(db_session, search_space_id=db_search_space.id)
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Inside",
+        text="asyncio appears inside the folder.",
+        folder_id=folder.id,
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Outside",
+        text="asyncio appears outside the folder.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool, "asyncio", context=_mentions(folder_ids=[folder.id])
+    )
+
+    # Search is confined to the folder's document: only its content is rendered.
+    content = result.update["messages"][0].content
+    assert "Inside" in content
+    assert "Outside" not in content
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
index 637a10704..7398fce6a 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
@@ -4,9 +4,14 @@ from __future__ import annotations
 
 import pytest
 
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
     _CLEAR,
     _add_unique_reducer,
+    _citation_registry_merge_reducer,
     _dict_merge_with_tombstones_reducer,
     _initial_filesystem_state,
     _list_append_reducer,
@@ -93,6 +98,57 @@ class TestDictMergeWithTombstones:
         }
 
 
+def _kb_registry(chunk_id: int) -> CitationRegistry:
+    registry = CitationRegistry()
+    registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 1, "chunk_id": chunk_id}
+    )
+    return registry
+
+
+class TestCitationRegistryMergeReducer:
+    def test_none_left_returns_right(self):
+        right = _kb_registry(10)
+        assert _citation_registry_merge_reducer(None, right) is right
+
+    def test_none_right_returns_left(self):
+        left = _kb_registry(10)
+        assert _citation_registry_merge_reducer(left, None) is left
+
+    def test_both_none_returns_none(self):
+        assert _citation_registry_merge_reducer(None, None) is None
+
+    def test_unions_two_registries(self):
+        left = _kb_registry(10)
+        right = _kb_registry(11)
+
+        merged = _citation_registry_merge_reducer(left, right)
+
+        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+        assert chunk_ids == {10, 11}
+
+    def test_coerces_serialized_dict_update(self):
+        # The checkpointer serializes Command.update via ormsgpack before the
+        # reducer runs, so `right` can arrive as a plain dict.
+        left = _kb_registry(10)
+        right = _kb_registry(11).model_dump()
+
+        merged = _citation_registry_merge_reducer(left, right)
+
+        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+        assert chunk_ids == {10, 11}
+
+    def test_coerces_both_sides_from_dict(self):
+        left = _kb_registry(10).model_dump()
+        right = _kb_registry(11).model_dump()
+
+        merged = _citation_registry_merge_reducer(left, right)
+
+        assert isinstance(merged, CitationRegistry)
+        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+        assert chunk_ids == {10, 11}
+
+
 class TestInitialFilesystemState:
     def test_default_shape(self):
         state = _initial_filesystem_state()
@@ -106,7 +162,6 @@ class TestInitialFilesystemState:
         assert state["dirty_paths"] == []
         assert state["dirty_path_tool_calls"] == {}
         assert state["kb_priority"] == []
-        assert state["kb_matched_chunk_ids"] == {}
         assert state["kb_anon_doc"] is None
         assert state["tree_version"] == 0
 
diff --git a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
index 027738fba..b128c35e7 100644
--- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
+++ b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
@@ -6,9 +6,6 @@ import pytest
 from langchain_core.messages import AIMessage, HumanMessage
 
 from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
-from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
-    build_document_xml as _build_document_xml,
-)
 from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
     KBSearchPlan,
     KnowledgePriorityMiddleware,
@@ -59,88 +56,6 @@ class TestResolveSearchTypes:
         assert result.count("FILE") == 1
 
 
-# ── _build_document_xml ────────────────────────────────────────────────
-
-
-class TestBuildDocumentXml:
-    @pytest.fixture
-    def sample_document(self):
-        return {
-            "document_id": 42,
-            "document": {
-                "id": 42,
-                "document_type": "FILE",
-                "title": "Test Doc",
-                "metadata": {"url": "https://example.com"},
-            },
-            "chunks": [
-                {"chunk_id": 101, "content": "First chunk content"},
-                {"chunk_id": 102, "content": "Second chunk content"},
-                {"chunk_id": 103, "content": "Third chunk content"},
-            ],
-        }
-
-    def test_contains_document_metadata(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<document_id>42</document_id>" in xml
-        assert "<document_type>FILE</document_type>" in xml
-        assert "Test Doc" in xml
-
-    def test_contains_chunk_index(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<chunk_index>" in xml
-        assert "</chunk_index>" in xml
-        assert 'chunk_id="101"' in xml
-        assert 'chunk_id="102"' in xml
-        assert 'chunk_id="103"' in xml
-
-    def test_matched_chunks_flagged_in_index(self, sample_document):
-        xml = _build_document_xml(sample_document, matched_chunk_ids={101, 103})
-        lines = xml.split("\n")
-        for line in lines:
-            if 'chunk_id="101"' in line:
-                assert 'matched="true"' in line
-            if 'chunk_id="102"' in line:
-                assert 'matched="true"' not in line
-            if 'chunk_id="103"' in line:
-                assert 'matched="true"' in line
-
-    def test_chunk_content_in_document_content_section(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<document_content>" in xml
-        assert "First chunk content" in xml
-        assert "Second chunk content" in xml
-        assert "Third chunk content" in xml
-
-    def test_line_numbers_in_chunk_index_are_accurate(self, sample_document):
-        """Verify that the line ranges in chunk_index actually point to the right content."""
-        xml = _build_document_xml(sample_document, matched_chunk_ids={101})
-        xml_lines = xml.split("\n")
-
-        for line in xml_lines:
-            if 'chunk_id="101"' in line and "lines=" in line:
-                import re
-
-                m = re.search(r'lines="(\d+)-(\d+)"', line)
-                assert m, f"No lines= attribute found in: {line}"
-                start, _end = int(m.group(1)), int(m.group(2))
-                target_line = xml_lines[start - 1]
-                assert "101" in target_line
-                assert "First chunk content" in target_line
-                break
-        else:
-            pytest.fail("chunk_id=101 entry not found in chunk_index")
-
-    def test_splits_into_lines_correctly(self, sample_document):
-        """Each chunk occupies exactly one line (no embedded newlines)."""
-        xml = _build_document_xml(sample_document)
-        lines = xml.split("\n")
-        chunk_lines = [
-            line for line in lines if "<![CDATA[" in line and "<chunk" in line
-        ]
-        assert len(chunk_lines) == 3
-
-
 # ── planner parsing / date normalization ───────────────────────────────