search-kb: on-demand KB tool on the [n] spine; drop kb_matched_chunk_ids

The main agent's search_knowledge_base tool runs the hybrid spine, renders a <retrieved_context> of numbered [n] passages, and persists the registry. KB subagent prompts teach citing [n] from <document view="full"> reads (evidence.chunk_ids -> evidence.citations). Delete the now-unused search->read highlighting hand-off: the kb_matched_chunk_ids state field, its reducer default, the tool's _matched_chunk_ids writer, and the dead KnowledgePriorityMiddleware writes.
2026-06-26 21:39:43 +02:00 · 2026-06-25 15:26:39 +02:00 · 2026-06-25 15:26:39 +02:00 · c98bdea5cf
commit c98bdea5cf
parent 04a76b163b
16 changed files with 518 additions and 325 deletions
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
@ -1,12 +1,11 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (OpenCode-style lazy RAG).
+"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
-The main agent no longer receives eagerly pre-injected KB context on every
+The main agent calls this when it decides it needs knowledge-base content. The
-turn (see :class:`KnowledgePriorityMiddleware`, now gated off by default).
+tool runs one hybrid search, renders the matched passages as a
-Instead it calls this tool only when it decides it needs knowledge-base
+``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
-content. The tool runs a single hybrid search (embed + DB search, ~0.5s),
+labels, and persists the conversation's ``CitationRegistry`` onto graph state so
-formats the top matches for the model, and writes ``kb_matched_chunk_ids``
+the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
-into graph state so matched-section highlighting is preserved when the agent
+turn.
 later reads a document via ``task(knowledge_base)``.
 """
 from __future__ import annotations
@ -18,153 +17,70 @@ from langchain.tools import ToolRuntime
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
-from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
+from app.agents.chat.multi_agent_chat.shared.citations import load_registry
-    search_knowledge_base as _hybrid_search_kb,
+from app.agents.chat.multi_agent_chat.shared.retrieval import SearchScope, build_context
 from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
    search_chunks,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
    SurfSenseFilesystemState,
 )
-from app.agents.chat.runtime.path_resolver import (
+from app.agents.chat.runtime.references import referenced_document_ids
-    PathIndex,
+from app.db import shielded_async_session
    build_path_index,
    doc_to_virtual_path,
 )
 from app.db import Document, shielded_async_session
 from app.utils.perf import get_perf_logger
 _perf_log = get_perf_logger()
 _DEFAULT_TOP_K = 5
 _MAX_TOP_K = 20
 _PER_DOC_SNIPPET_CHARS = 1200
 _MAX_TOTAL_CHARS = 16_000
 _TOOL_DESCRIPTION = (
    "Search the user's knowledge base (their indexed documents, files, and "
    "connector content) for passages relevant to a query, using hybrid "
    "semantic + keyword retrieval.\n\n"
    "Use this FIRST to ground any factual or informational answer about the "
-    "user's own documents, notes, or connected sources. The workspace tree "
+    "user's own documents, notes, or connected sources. It returns a "
-    "shows which files exist; this tool pulls the actual relevant content. "
+    "<retrieved_context> block: each matched passage is labelled [n]. Cite a "
-    "Each hit returns the document's virtual path, a relevance score, and the "
+    "passage by writing that [n] after the statement it supports.\n\n"
    "matched snippets. If you need a document's full text, delegate a read to "
    "the knowledge_base specialist via `task` using the returned path.\n\n"
    "Write a focused, specific query containing the concrete entities, "
    "acronyms, people, projects, or terms you are looking for."
 )
-async def _resolve_virtual_paths(
+def _search_types(
-    results: list[dict[str, Any]],
+    available_connectors: list[str] | None,
    available_document_types: list[str] | None,
 ) -> tuple[str, ...] | None:
    """Merge connector + document-type filters into a scope; ``None`` if unrestricted."""
    types: set[str] = set()
    if available_document_types:
        types.update(available_document_types)
    if available_connectors:
        types.update(available_connectors)
    return tuple(sorted(types)) or None
 async def _build_search_scope(
    session: AsyncSession,
    *,
    search_space_id: int,
-) -> dict[int, str]:
+    document_types: tuple[str, ...] | None,
-    """Resolve ``Document.id`` -> canonical virtual path for the search hits."""
+    runtime: ToolRuntime[None, SurfSenseFilesystemState],
-    doc_ids = [
+) -> SearchScope:
-        doc_id
+    """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
-        for doc_id in (
+    ctx = getattr(runtime, "context", None)
-            (doc.get("document") or {}).get("id")
+    document_ids = await referenced_document_ids(
-            for doc in results
+        session,
-            if isinstance(doc, dict)
+        search_space_id=search_space_id,
-        )
+        document_ids=getattr(ctx, "mentioned_document_ids", None),
-        if isinstance(doc_id, int)
+        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
-    ]
+    )
-    if not doc_ids:
+    return SearchScope(
-        return {}
+        document_types=document_types,
-
+        document_ids=document_ids or None,
    async with shielded_async_session() as session:
        index: PathIndex = await build_path_index(session, search_space_id)
        folder_rows = await session.execute(
            select(Document.id, Document.folder_id).where(
                Document.search_space_id == search_space_id,
                Document.id.in_(doc_ids),
            )
        )
        folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
    paths: dict[int, str] = {}
    for doc in results:
        doc_meta = doc.get("document") or {}
        doc_id = doc_meta.get("id")
        if not isinstance(doc_id, int):
            continue
        folder_id = folder_by_doc_id.get(doc_id, doc_meta.get("folder_id"))
        paths[doc_id] = doc_to_virtual_path(
            doc_id=doc_id,
            title=str(doc_meta.get("title") or "untitled"),
            folder_id=folder_id if isinstance(folder_id, int) else None,
            index=index,
        )
    return paths
 def _format_hits(
    results: list[dict[str, Any]],
    *,
    paths: dict[int, str],
    query: str,
 ) -> str:
    """Render search hits as a compact, model-readable block."""
    if not results:
        return (
            f"No knowledge-base matches found for query: {query!r}.\n"
            "Tell the user nothing relevant was found in their workspace, or "
            "try a different query."
        )
    lines: list[str] = [f"<knowledge_base_results query={query!r}>"]
    total = len(lines[0])
    for rank, doc in enumerate(results, start=1):
        doc_meta = doc.get("document") or {}
        doc_id = doc_meta.get("id")
        title = str(doc_meta.get("title") or "untitled")
        doc_type = doc_meta.get("document_type") or doc.get("source") or "document"
        score = doc.get("score")
        score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
        path = paths.get(doc_id) if isinstance(doc_id, int) else None
        header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
            f"\n   path: {path}" if path else ""
        )
        content = (doc.get("content") or "").strip()
        if content:
            snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
            if len(content) > _PER_DOC_SNIPPET_CHARS:
                snippet += " ..."
            body = "\n   " + snippet.replace("\n", "\n   ")
        else:
            body = "\n   (no preview available; read the document for details)"
        entry = header + body
        if total + len(entry) > _MAX_TOTAL_CHARS:
            lines.append("\n<!-- additional matches truncated to fit context -->")
            break
        lines.append(entry)
        total += len(entry)
    lines.append(
        "\n\nTo read a full document, delegate to the knowledge_base specialist "
        "with `task`, referencing the path above."
    )
    lines.append("\n</knowledge_base_results>")
    return "".join(lines)
 def _matched_chunk_ids(results: list[dict[str, Any]]) -> dict[int, list[int]]:
    """Extract ``Document.id`` -> matched chunk ids for state hand-off."""
    matched: dict[int, list[int]] = {}
    for doc in results:
        doc_id = (doc.get("document") or {}).get("id")
        if not isinstance(doc_id, int):
            continue
        chunk_ids = doc.get("matched_chunk_ids") or []
        normalized = [int(cid) for cid in chunk_ids if isinstance(cid, int | str)]
        if normalized:
            matched[doc_id] = normalized
    return matched
 def create_search_knowledge_base_tool(
@ -176,8 +92,7 @@ def create_search_knowledge_base_tool(
    """Factory for the on-demand ``search_knowledge_base`` tool."""
    _space_id = search_space_id
-    _connectors = available_connectors
+    _document_types = _search_types(available_connectors, available_document_types)
    _doc_types = available_document_types
    async def _impl(
        query: Annotated[
@ -195,34 +110,45 @@ def create_search_knowledge_base_tool(
            return "Error: provide a non-empty search query."
        clamped_top_k = min(max(1, top_k), _MAX_TOP_K)
-        t0 = time.perf_counter()
+        registry = load_registry(getattr(runtime, "state", None))
        results = await _hybrid_search_kb(
            query=cleaned_query,
            search_space_id=_space_id,
            available_connectors=_connectors,
            available_document_types=_doc_types,
            top_k=clamped_top_k,
        )
-        paths = await _resolve_virtual_paths(results, search_space_id=_space_id)
+        t0 = time.perf_counter()
-        rendered = _format_hits(results, paths=paths, query=cleaned_query)
+        async with shielded_async_session() as session:
-        matched = _matched_chunk_ids(results)
+            scope = await _build_search_scope(
                session,
                search_space_id=_space_id,
                document_types=_document_types,
                runtime=runtime,
            )
            hits = await search_chunks(
                session,
                search_space_id=_space_id,
                query=cleaned_query,
                scope=scope,
                top_k=clamped_top_k,
            )
            rendered = build_context(cleaned_query, hits, registry)
        _perf_log.info(
-            "[search_knowledge_base] tool query=%r results=%d chars=%d in %.3fs",
+            "[search_knowledge_base] tool query=%r docs=%d in %.3fs",
            cleaned_query[:60],
-            len(results),
+            len(hits),
            len(rendered),
            time.perf_counter() - t0,
        )
        if rendered is None:
            return (
                f"No knowledge-base matches found for query: {cleaned_query!r}.\n"
                "Tell the user nothing relevant was found in their workspace, or "
                "try a different query."
            )
        update: dict[str, Any] = {
            "messages": [
                ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
            ],
            "citation_registry": registry,
        }
        if matched:
            update["kb_matched_chunk_ids"] = matched
        return Command(update=update)
    return StructuredTool.from_function(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
@ -5,11 +5,6 @@ This middleware runs ``before_agent`` on every turn and writes:
 * ``state["kb_priority"]`` — the top-K most relevant documents for the
  current user message, used to render a ``<priority_documents>`` system
  message immediately before the user turn.
 * ``state["kb_matched_chunk_ids"]`` — internal hand-off mapping
  (``Document.id`` → matched chunk IDs) consumed by
  :class:`KBPostgresBackend._load_file_data` when the agent first reads each
  document, so the XML wrapper can flag matched sections in
  ``<chunk_index>``.
 The previous "scoped filesystem" behaviour (synthetic ``ls`` + state
 ``files`` seeding) is intentionally removed: documents are now lazy-loaded
@ -816,7 +811,6 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        ]
        update: dict[str, Any] = {
            "kb_priority": priority,
            "kb_matched_chunk_ids": {},
        }
        if self.inject_system_message:
            new_messages = list(state.get("messages") or [])
@ -930,7 +924,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
            merged.append(doc)
        _t_materialize = time.perf_counter()
-        priority, matched_chunk_ids = await self._materialize_priority(merged)
+        priority = await self._materialize_priority(merged)
        if folder_mention_ids:
            folder_entries = await self._materialize_folder_priority(folder_mention_ids)
@ -957,7 +951,6 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        update: dict[str, Any] = {
            "kb_priority": priority,
            "kb_matched_chunk_ids": matched_chunk_ids,
        }
        if self.inject_system_message:
            new_messages = list(messages)
@ -1016,13 +1009,12 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
    async def _materialize_priority(
        self, merged: list[dict[str, Any]]
-    ) -> tuple[list[dict[str, Any]], dict[int, list[int]]]:
+    ) -> list[dict[str, Any]]:
-        """Resolve canonical paths and matched chunk ids for the priority list."""
+        """Resolve canonical paths for the priority list."""
        priority: list[dict[str, Any]] = []
        matched_chunk_ids: dict[int, list[int]] = {}
        if not merged:
-            return priority, matched_chunk_ids
+            return priority
        _t0 = time.perf_counter()
        async with shielded_async_session() as session:
@ -1067,18 +1059,12 @@ class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
                    "mentioned": bool(doc.get("_user_mentioned")),
                }
            )
            if isinstance(doc_id, int):
                chunk_ids = doc.get("matched_chunk_ids") or []
                if chunk_ids:
                    matched_chunk_ids[doc_id] = [
                        int(cid) for cid in chunk_ids if isinstance(cid, int | str)
                    ]
        _perf_log.info(
            "[kb_priority.materialize] db=%.3fs docs=%d",
            time.perf_counter() - _t0,
            len(merged),
        )
-        return priority, matched_chunk_ids
+        return priority
 __all__ = [
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@ -14,8 +14,8 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
 * ``dirty_path_tool_calls`` — sidecar map ``path -> latest tool_call_id`` for
  dirty paths; used to bind the per-path snapshot to an action_id.
 * ``kb_priority`` — top-K priority hints rendered into a system message.
 * ``kb_matched_chunk_ids`` — internal hand-off for matched-chunk highlighting.
 * ``kb_anon_doc`` — Redis-loaded anonymous document (if any).
 * ``citation_registry`` — per-conversation ``[n]`` -> source map for citations.
 * ``tree_version`` — bumped by persistence; invalidates the tree render cache.
 * ``workspace_tree_text`` — pre-rendered ``<workspace_tree>`` body for the turn.
@ -30,9 +30,11 @@ from typing import Annotated, Any, NotRequired
 from deepagents.middleware.filesystem import FilesystemState
 from typing_extensions import TypedDict
 from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.agents.chat.multi_agent_chat.shared.receipts.receipt import Receipt
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
    _add_unique_reducer,
    _citation_registry_merge_reducer,
    _dict_merge_with_tombstones_reducer,
    _int_counter_merge_reducer,
    _list_append_reducer,
@ -162,12 +164,16 @@ class SurfSenseFilesystemState(FilesystemState):
    kb_priority: NotRequired[Annotated[list[KbPriorityEntry], _replace_reducer]]
    """Top-K priority hints rendered as a system message before the user turn."""
    kb_matched_chunk_ids: NotRequired[Annotated[dict[int, list[int]], _replace_reducer]]
    """Internal: ``Document.id`` -> list of matched chunk IDs from hybrid search."""
    kb_anon_doc: NotRequired[Annotated[KbAnonDoc | None, _replace_reducer]]
    """Anonymous-session document loaded from Redis (read-only, no DB row)."""
    citation_registry: NotRequired[
        Annotated[CitationRegistry, _citation_registry_merge_reducer]
    ]
    """Per-conversation ``[n]`` -> source map; written by retrieval, read by the
    normalizer. Merges (union, find-or-create) so parallel/subagent registrations
    stay globally consistent instead of clobbering each other."""
    tree_version: NotRequired[Annotated[int, _replace_reducer]]
    """Monotonically increasing counter; bumped when commits change the KB tree."""
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
@ -2,7 +2,7 @@
 These reducers back the extra state fields used by the cloud-mode filesystem
 agent (`cwd`, `staged_dirs`, `pending_moves`, `dirty_paths`, `doc_id_by_path`,
-`kb_priority`, `kb_matched_chunk_ids`, `kb_anon_doc`, `tree_version`).
+`kb_priority`, `kb_anon_doc`, `tree_version`).
 Tools mutate these fields ONLY via `Command(update={...})` returns; the
 reducers are responsible for merging successive updates atomically and for
@ -20,6 +20,8 @@ from __future__ import annotations
 from typing import Any, Final, TypeVar
 from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 _CLEAR: Final[str] = "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
 """Reset sentinel; pass it inside a list/dict update to request a reset.
@ -204,6 +206,41 @@ def _int_counter_merge_reducer(
    return base
 def _as_registry(value: Any) -> CitationRegistry | None:
    """Coerce a state value into a ``CitationRegistry``.
    The checkpointer serializes ``Command.update`` via ``ormsgpack`` *before*
    reducers run, so an update can arrive as a plain ``dict`` rather than a model.
    """
    if value is None:
        return None
    if isinstance(value, CitationRegistry):
        return value
    if isinstance(value, dict):
        return CitationRegistry.model_validate(value)
    return None
 def _citation_registry_merge_reducer(
    left: Any,
    right: Any,
 ) -> CitationRegistry | None:
    """Union two citation registries instead of replacing.
    Find-or-create across both sides so ``[n]`` stays globally consistent when
    branches (parent + subagents, parallel tool calls) each register into a
    registry forked from the same base. Collisions re-mint rather than drop. See
    :meth:`CitationRegistry.merge`.
    """
    right_reg = _as_registry(right)
    left_reg = _as_registry(left)
    if right_reg is None:
        return left_reg
    if left_reg is None:
        return right_reg
    return left_reg.merge(right_reg)
 def _initial_filesystem_state() -> dict[str, Any]:
    """Default empty values for SurfSense filesystem state fields.
@ -222,7 +259,6 @@ def _initial_filesystem_state() -> dict[str, Any]:
        "dirty_paths": [],
        "dirty_path_tool_calls": {},
        "kb_priority": [],
        "kb_matched_chunk_ids": {},
        "kb_anon_doc": None,
        "tree_version": 0,
    }
@ -231,6 +267,7 @@ def _initial_filesystem_state() -> dict[str, Any]:
 __all__ = [
    "_CLEAR",
    "_add_unique_reducer",
    "_citation_registry_merge_reducer",
    "_dict_merge_with_tombstones_reducer",
    "_initial_filesystem_state",
    "_int_counter_merge_reducer",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi
 Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.
-The specialist returns plain prose with absolute paths and `[citation:<chunk_id>]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer.
+The specialist returns plain prose with absolute paths and `[n]` citation labels when claims came from KB-indexed documents. Preserve those labels verbatim if you forward the answer.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@ -35,42 +35,31 @@ Map outcomes to your `status`:
 You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.
-## Chunk citations in your prose
+## Citations in your prose
-When `read_file` returns a KB-indexed document under `/documents/`, the response includes `<chunk id='…'>` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:<chunk_id>]` to the sentence stating that fact, using the **exact** id from the `<chunk id='…'>` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output
-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:
 ```
-<document>
+<document title="Q2 Roadmap" source="File" view="full">
-<document_metadata>
+  [3] First milestone is …
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
+  [4] Second milestone is …
  ...
 </document_metadata>
 <chunk_index>
  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
  <entry chunk_id="129" lines="23-30" matched="true"/>
 </chunk_index>
 <document_content>
  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
  <chunk id='129'><![CDATA[…]]></chunk>
 </document_content>
 </document>
 ```
 ### Rules
- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
- Never cite `<document_id>` — that's the parent doc, not a chunk.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
 - Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
 - Prefer **fewer accurate citations** over many speculative ones.
- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
+- Tool results without `[n]` labels (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no label and need none.
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
+- Populate `evidence.citations` with **only** the labels you actually emitted — same numbers.
 - Tool results without `<chunk id='…'>` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
 - Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.
 ## Examples
@ -89,7 +78,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
      "path": "/documents/meetings/2026-05-11-meeting.md",
      "matched_candidates": null,
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": null,
    "missing_fields": null,
@ -121,7 +110,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
        { "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
      ],
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": "Ask the user which design doc to update.",
    "missing_fields": ["path"],
@ -142,7 +131,7 @@ Return **only** one JSON object (no markdown or prose outside it):
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@ -33,11 +33,11 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
-## Chunk citations in your prose
+## Citations in your prose
-In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `<chunk id='…'>` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
 ## Examples
@ -56,7 +56,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
      "path": "/notes/meetings/2026-05-11-meeting.md",
      "matched_candidates": null,
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": null,
    "missing_fields": null,
@ -88,7 +88,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
        { "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
      ],
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": "Ask the user which design doc to update.",
    "missing_fields": ["path"],
@ -109,7 +109,7 @@ Return **only** one JSON object (no markdown or prose outside it):
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@ -28,41 +28,30 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
-## Chunk citations
+## Citations
-When the evidence for a claim came from a `read_file` response that included `<chunk id='…'>` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:<chunk_id>]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output
-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:
 ```
-<document>
+<document title="Q2 Roadmap" source="File" view="full">
-<document_metadata>
+  [3] First milestone is …
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
+  [4] Second milestone is …
  ...
 </document_metadata>
 <chunk_index>
  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
  <entry chunk_id="129" lines="23-30" matched="true"/>
 </chunk_index>
 <document_content>
  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
  <chunk id='129'><![CDATA[…]]></chunk>
 </document_content>
 </document>
 ```
 ### Rules
- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
- Never cite `<document_id>` — that's the parent doc, not a chunk.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
+- If a claim came from a tool result that did **not** carry `[n]` labels (`ls`, `glob`, `grep` listings, error strings), skip the citation.
- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without `<chunk id='…'>`), skip the citation.
+- The absolute path under `/documents/` is always required; `[n]` labels are additive, they do not replace the path reference.
 - The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.
-Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`
+Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [3][4].`
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@ -29,6 +29,6 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
-## Chunk citations
+## Citations
-In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `<chunk id='…'>` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
--- a/surfsense_backend/app/agents/chat/runtime/references/init.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/init.py
@ -13,7 +13,7 @@ from app.schemas.new_chat import MentionedDocumentInfo
 from .chat import resolve_chat_references
 from .connectors import resolve_connector_references
-from .documents import resolve_document_references
+from .documents import referenced_document_ids, resolve_document_references
 from .folders import resolve_folder_references
 from .models import (
    ChatReference,
@ -89,6 +89,7 @@ __all__ = [
    "FolderReference",
    "Reference",
    "ReferenceKind",
    "referenced_document_ids",
    "render_reference_pointers",
    "resolve_references",
 ]
--- a/surfsense_backend/app/agents/chat/runtime/references/documents/init.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/init.py
@ -0,0 +1,13 @@
 """Resolve ``@document`` references.
 Two concerns, one subject: ``resolver`` turns document ids into pointer
 references for the model, ``referenced`` turns ``@document`` / ``@folder``
 mentions into the document ids a retrieval is confined to.
 """
 from __future__ import annotations
 from .referenced import referenced_document_ids
 from .resolver import resolve_document_references
 __all__ = ["referenced_document_ids", "resolve_document_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
@ -0,0 +1,39 @@
 """Resolve ``@document`` / ``@folder`` mentions to the documents they point at.
 Reference resolution, not retrieval: this answers "which knowledge-base
 documents did the user point at this turn?". ``@document`` ids pass through;
 ``@folder`` ids expand to the documents directly inside each folder within this
 search space (direct children only, not nested subfolders). The caller turns the
 returned ids into a retrieval ``SearchScope``.
 """
 from __future__ import annotations
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.db import Document
 async def referenced_document_ids(
    session: AsyncSession,
    *,
    search_space_id: int,
    document_ids: list[int] | None = None,
    folder_ids: list[int] | None = None,
 ) -> tuple[int, ...]:
    """Sorted document ids the user pointed at (empty = nothing referenced)."""
    doc_ids = set(document_ids or [])
    folders = list(folder_ids or [])
    if folders:
        rows = await session.execute(
            select(Document.id).where(
                Document.search_space_id == search_space_id,
                Document.folder_id.in_(folders),
            )
        )
        doc_ids.update(rows.scalars().all())
    return tuple(sorted(doc_ids))
 __all__ = ["referenced_document_ids"]
--- a/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
@ -8,7 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.agents.chat.runtime.path_resolver import PathIndex, doc_to_virtual_path
 from app.db import Document
-from .models import DocumentReference
+from ..models import DocumentReference
 async def resolve_document_references(
--- a/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
@ -0,0 +1,237 @@
 """Behavior tests for the ``search_knowledge_base`` main-agent tool.
 These exercise the tool through its public contract: seed a real document,
 invoke the tool, and assert on the ``Command`` it returns — the rendered
 ``<retrieved_context>`` carries ``[n]`` labels and the citation registry handed
 back on state is populated.
 The tool's own DB session is redirected to the test session, and the embedding
 leg is pinned so the search is deterministic without a live model.
 """
 from __future__ import annotations
 import contextlib
 import uuid
 from types import SimpleNamespace
 import pytest
 from langchain_core.messages import ToolMessage
 from langgraph.types import Command
 from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
 from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
    create_search_knowledge_base_tool,
 )
 from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.config import config
 from app.db import Chunk, Document, DocumentType, Folder
 pytestmark = pytest.mark.integration
 _DIM = config.embedding_model_instance.dimension
 def _axis(index: int) -> list[float]:
    vector = [0.0] * _DIM
    vector[index] = 1.0
    return vector
 async def _add_document(
    db_session,
    *,
    search_space_id: int,
    title: str,
    text: str,
    folder_id: int | None = None,
 ):
    document = Document(
        title=title,
        document_type=DocumentType.FILE,
        content=text,
        content_hash=uuid.uuid4().hex,
        search_space_id=search_space_id,
        folder_id=folder_id,
        status={"state": "ready"},
    )
    db_session.add(document)
    await db_session.flush()
    db_session.add(
        Chunk(content=text, document_id=document.id, position=0, embedding=_axis(0))
    )
    await db_session.flush()
    return document
 async def _add_folder(db_session, *, search_space_id: int, name: str = "Folder"):
    folder = Folder(name=name, position="0", search_space_id=search_space_id)
    db_session.add(folder)
    await db_session.flush()
    return folder
@pytest.fixture
 def _tool_uses_test_session(db_session, monkeypatch):
    """Redirect the tool's ``shielded_async_session`` to the test transaction."""
    @contextlib.asynccontextmanager
    async def _session():
        yield db_session
    monkeypatch.setattr(search_knowledge_base, "shielded_async_session", _session)
@pytest.fixture
 def _pinned_embedding(monkeypatch):
    monkeypatch.setattr(
        config.embedding_model_instance, "embed", lambda _query: _axis(0)
    )
 async def _invoke(tool, query: str, state: dict | None = None, context=None):
    runtime = SimpleNamespace(
        state=state or {}, tool_call_id="call-1", context=context
    )
    return await tool.coroutine(query, runtime)
 def _mentions(*, document_ids=(), folder_ids=()):
    return SimpleNamespace(
        mentioned_document_ids=list(document_ids),
        mentioned_folder_ids=list(folder_ids),
    )
 async def test_tool_returns_retrieved_context_with_numbered_passages(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Asyncio Guide",
        text="The asyncio library enables concurrency.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(tool, "asyncio")
    assert isinstance(result, Command)
    message = result.update["messages"][0]
    assert isinstance(message, ToolMessage)
    assert "<retrieved_context>" in message.content
    assert "[1]" in message.content
 async def test_tool_populates_citation_registry_on_state(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Asyncio Guide",
        text="The asyncio library enables concurrency.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(tool, "asyncio")
    registry = result.update["citation_registry"]
    assert isinstance(registry, CitationRegistry)
    assert registry.by_n  # at least one passage was registered as [n]
 async def test_tool_reuses_existing_registry_numbering(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Asyncio Guide",
        text="The asyncio library enables concurrency.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    first = await _invoke(tool, "asyncio")
    carried = first.update["citation_registry"]
    second = await _invoke(tool, "asyncio", state={"citation_registry": carried})
    # Same passage searched twice keeps a single [n] (find-or-create).
    assert len(second.update["citation_registry"].by_n) == 1
 async def test_tool_reports_no_matches_without_touching_state(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(tool, "nonexistent-term-zzz")
    assert isinstance(result, str)
    assert "No knowledge-base matches" in result
 async def test_tool_rejects_empty_query(
    db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(tool, "   ")
    assert isinstance(result, str)
    assert "non-empty" in result
 async def test_document_mention_confines_search_to_pinned_doc(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    pinned = await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Pinned",
        text="asyncio appears in the pinned doc.",
    )
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Other",
        text="asyncio appears in the other doc.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(
        tool, "asyncio", context=_mentions(document_ids=[pinned.id])
    )
    # Search is confined to the pinned doc: only its content is rendered.
    content = result.update["messages"][0].content
    assert "Pinned" in content
    assert "Other" not in content
 async def test_folder_mention_confines_search_to_folder_documents(
    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
 ):
    folder = await _add_folder(db_session, search_space_id=db_search_space.id)
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Inside",
        text="asyncio appears inside the folder.",
        folder_id=folder.id,
    )
    await _add_document(
        db_session,
        search_space_id=db_search_space.id,
        title="Outside",
        text="asyncio appears outside the folder.",
    )
    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
    result = await _invoke(
        tool, "asyncio", context=_mentions(folder_ids=[folder.id])
    )
    # Search is confined to the folder's document: only its content is rendered.
    content = result.update["messages"][0].content
    assert "Inside" in content
    assert "Outside" not in content
--- a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
@ -4,9 +4,14 @@ from __future__ import annotations
 import pytest
 from app.agents.chat.multi_agent_chat.shared.citations import (
    CitationRegistry,
    CitationSourceType,
 )
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
    _CLEAR,
    _add_unique_reducer,
    _citation_registry_merge_reducer,
    _dict_merge_with_tombstones_reducer,
    _initial_filesystem_state,
    _list_append_reducer,
@ -93,6 +98,57 @@ class TestDictMergeWithTombstones:
        }
 def _kb_registry(chunk_id: int) -> CitationRegistry:
    registry = CitationRegistry()
    registry.register(
        CitationSourceType.KB_CHUNK, {"document_id": 1, "chunk_id": chunk_id}
    )
    return registry
 class TestCitationRegistryMergeReducer:
    def test_none_left_returns_right(self):
        right = _kb_registry(10)
        assert _citation_registry_merge_reducer(None, right) is right
    def test_none_right_returns_left(self):
        left = _kb_registry(10)
        assert _citation_registry_merge_reducer(left, None) is left
    def test_both_none_returns_none(self):
        assert _citation_registry_merge_reducer(None, None) is None
    def test_unions_two_registries(self):
        left = _kb_registry(10)
        right = _kb_registry(11)
        merged = _citation_registry_merge_reducer(left, right)
        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
        assert chunk_ids == {10, 11}
    def test_coerces_serialized_dict_update(self):
        # The checkpointer serializes Command.update via ormsgpack before the
        # reducer runs, so `right` can arrive as a plain dict.
        left = _kb_registry(10)
        right = _kb_registry(11).model_dump()
        merged = _citation_registry_merge_reducer(left, right)
        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
        assert chunk_ids == {10, 11}
    def test_coerces_both_sides_from_dict(self):
        left = _kb_registry(10).model_dump()
        right = _kb_registry(11).model_dump()
        merged = _citation_registry_merge_reducer(left, right)
        assert isinstance(merged, CitationRegistry)
        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
        assert chunk_ids == {10, 11}
 class TestInitialFilesystemState:
    def test_default_shape(self):
        state = _initial_filesystem_state()
@ -106,7 +162,6 @@ class TestInitialFilesystemState:
        assert state["dirty_paths"] == []
        assert state["dirty_path_tool_calls"] == {}
        assert state["kb_priority"] == []
        assert state["kb_matched_chunk_ids"] == {}
        assert state["kb_anon_doc"] is None
        assert state["tree_version"] == 0
--- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
+++ b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
@ -6,9 +6,6 @@ import pytest
 from langchain_core.messages import AIMessage, HumanMessage
 from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
 from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
    build_document_xml as _build_document_xml,
 )
 from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
    KBSearchPlan,
    KnowledgePriorityMiddleware,
@ -59,88 +56,6 @@ class TestResolveSearchTypes:
        assert result.count("FILE") == 1
 # ── _build_document_xml ────────────────────────────────────────────────
 class TestBuildDocumentXml:
    @pytest.fixture
    def sample_document(self):
        return {
            "document_id": 42,
            "document": {
                "id": 42,
                "document_type": "FILE",
                "title": "Test Doc",
                "metadata": {"url": "https://example.com"},
            },
            "chunks": [
                {"chunk_id": 101, "content": "First chunk content"},
                {"chunk_id": 102, "content": "Second chunk content"},
                {"chunk_id": 103, "content": "Third chunk content"},
            ],
        }
    def test_contains_document_metadata(self, sample_document):
        xml = _build_document_xml(sample_document)
        assert "<document_id>42</document_id>" in xml
        assert "<document_type>FILE</document_type>" in xml
        assert "Test Doc" in xml
    def test_contains_chunk_index(self, sample_document):
        xml = _build_document_xml(sample_document)
        assert "<chunk_index>" in xml
        assert "</chunk_index>" in xml
        assert 'chunk_id="101"' in xml
        assert 'chunk_id="102"' in xml
        assert 'chunk_id="103"' in xml
    def test_matched_chunks_flagged_in_index(self, sample_document):
        xml = _build_document_xml(sample_document, matched_chunk_ids={101, 103})
        lines = xml.split("\n")
        for line in lines:
            if 'chunk_id="101"' in line:
                assert 'matched="true"' in line
            if 'chunk_id="102"' in line:
                assert 'matched="true"' not in line
            if 'chunk_id="103"' in line:
                assert 'matched="true"' in line
    def test_chunk_content_in_document_content_section(self, sample_document):
        xml = _build_document_xml(sample_document)
        assert "<document_content>" in xml
        assert "First chunk content" in xml
        assert "Second chunk content" in xml
        assert "Third chunk content" in xml
    def test_line_numbers_in_chunk_index_are_accurate(self, sample_document):
        """Verify that the line ranges in chunk_index actually point to the right content."""
        xml = _build_document_xml(sample_document, matched_chunk_ids={101})
        xml_lines = xml.split("\n")
        for line in xml_lines:
            if 'chunk_id="101"' in line and "lines=" in line:
                import re
                m = re.search(r'lines="(\d+)-(\d+)"', line)
                assert m, f"No lines= attribute found in: {line}"
                start, _end = int(m.group(1)), int(m.group(2))
                target_line = xml_lines[start - 1]
                assert "101" in target_line
                assert "First chunk content" in target_line
                break
        else:
            pytest.fail("chunk_id=101 entry not found in chunk_index")
    def test_splits_into_lines_correctly(self, sample_document):
        """Each chunk occupies exactly one line (no embedded newlines)."""
        xml = _build_document_xml(sample_document)
        lines = xml.split("\n")
        chunk_lines = [
            line for line in lines if "<![CDATA[" in line and "<chunk" in line
        ]
        assert len(chunk_lines) == 3
 # ── planner parsing / date normalization ───────────────────────────────
`@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi`

	`Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.`	`Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.`

	The specialist returns plain prose with absolute paths and `[citation:<chunk_id>]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer.	The specialist returns plain prose with absolute paths and `[n]` citation labels when claims came from KB-indexed documents. Preserve those labels verbatim if you forward the answer.