diff --git a/.gitignore b/.gitignore
index 507709dca..929f44aec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,10 @@ debug.log
 references/
 references
 
+# Source/tests packages: exempt from the broad "references" scratch-folder ignore above.
+!surfsense_backend/app/agents/chat/runtime/references/
+!surfsense_backend/tests/unit/agents/chat/runtime/references/
+
 # Playwright (E2E test artifacts)
 surfsense_web/playwright/.auth/
 surfsense_web/playwright-report/
diff --git a/docker/.env.example b/docker/.env.example
index d2f713492..18142c614 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -412,7 +412,6 @@ SURFSENSE_ENABLE_TOOL_CALL_REPAIR=true
 SURFSENSE_ENABLE_BUSY_MUTEX=true
 SURFSENSE_ENABLE_SKILLS=true
 SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=true
-SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=true
 SURFSENSE_ENABLE_ACTION_LOG=true
 SURFSENSE_ENABLE_REVERT_ROUTE=true
 SURFSENSE_ENABLE_PERMISSION=true
diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index aee79c09f..a1d410eef 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -433,14 +433,6 @@ LANGSMITH_PROJECT=surfsense
 # Skills + subagents
 # SURFSENSE_ENABLE_SKILLS=false
 # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
-# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
-
-# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
-# KB content on demand via the `search_knowledge_base` tool and skips the
-# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
-# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
-# restore the original eager `<priority_documents>` pre-injection.
-# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
 
 # Snapshot / revert
 # SURFSENSE_ENABLE_ACTION_LOG=false
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
index d29c31230..2bae0742a 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
@@ -6,8 +6,6 @@ read-only). This middleware loads it once on the first turn into
 
 * :class:`KnowledgeTreeMiddleware` can render the synthetic ``/documents``
   view without touching the DB.
-* :class:`KnowledgePriorityMiddleware` skips hybrid search and emits a
-  degenerate priority list.
 * :class:`KBPostgresBackend` (``als_info`` / ``aread`` / ``_load_file_data``)
   recognises the synthetic path.
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
deleted file mode 100644
index 787dbe402..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""KB priority planner: <priority_documents> injection."""
-
-from __future__ import annotations
-
-from langchain_core.language_models import BaseChatModel
-
-from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    KnowledgePriorityMiddleware,
-)
-from app.services.llm_service import get_planner_llm
-
-
-def build_knowledge_priority_mw(
-    *,
-    llm: BaseChatModel,
-    search_space_id: int,
-    filesystem_mode: FilesystemMode,
-    available_connectors: list[str] | None,
-    available_document_types: list[str] | None,
-    mentioned_document_ids: list[int] | None,
-    preinjection_enabled: bool = True,
-) -> KnowledgePriorityMiddleware:
-    """Build the KB priority middleware.
-
-    When ``preinjection_enabled`` is False (the lazy default), the middleware
-    runs in mentions-only mode: it skips the expensive planner LLM + embedding
-    + hybrid search and only surfaces explicit @-mentions. The main agent is
-    expected to pull relevant KB content on demand via the
-    ``search_knowledge_base`` tool instead.
-    """
-    return KnowledgePriorityMiddleware(
-        llm=llm,
-        planner_llm=get_planner_llm(),
-        search_space_id=search_space_id,
-        filesystem_mode=filesystem_mode,
-        available_connectors=available_connectors,
-        available_document_types=available_document_types,
-        mentioned_document_ids=mentioned_document_ids,
-        inject_system_message=False,
-        mentions_only=not preinjection_enabled,
-    )
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
index 675898d4c..d766367de 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@@ -1,10 +1,11 @@
 """Main-agent middleware list assembly: one line per slot.
 
 The main agent is a pure router — filesystem reads/writes are owned by the
-``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
-here only renders KB context (workspace tree + priority docs), projects it
-into system messages, and commits any subagent-side staged writes at end of
-turn (cloud mode).
+``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
+retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
+search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
+labels. The stack here computes the workspace tree, commits any subagent-side
+staged writes at end of turn (cloud mode), and wires the supporting middleware.
 """
 
 from __future__ import annotations
@@ -33,9 +34,6 @@ from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
 from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
     build_compaction_mw,
 )
-from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
-    build_kb_context_projection_mw,
-)
 from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
     build_patch_tool_calls_mw,
 )
@@ -84,7 +82,6 @@ from .context_editing import build_context_editing_mw
 from .dedup_hitl import build_dedup_hitl_mw
 from .doom_loop import build_doom_loop_mw
 from .kb_persistence import build_kb_persistence_mw
-from .knowledge_priority import build_knowledge_priority_mw
 from .knowledge_tree import build_knowledge_tree_mw
 from .noop_injection import build_noop_injection_mw
 from .otel_span import build_otel_mw
@@ -237,16 +234,6 @@ def build_main_agent_deepagent_middleware(
             search_space_id=search_space_id,
             llm=llm,
         ),
-        build_knowledge_priority_mw(
-            llm=llm,
-            search_space_id=search_space_id,
-            filesystem_mode=filesystem_mode,
-            available_connectors=available_connectors,
-            available_document_types=available_document_types,
-            mentioned_document_ids=mentioned_document_ids,
-            preinjection_enabled=flags.enable_kb_priority_preinjection,
-        ),
-        build_kb_context_projection_mw(),
         build_kb_persistence_mw(
             filesystem_mode=filesystem_mode,
             search_space_id=search_space_id,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
index 0f0b5ffbb..5730c3122 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
@@ -15,7 +15,7 @@ allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
 1. Decompose the user's question into 2-4 specific, citation-worthy sub-questions.
 2. For each sub-question, run **one** targeted KB search (focused on terms the user would have written, not synonyms). Open the most relevant 2-3 documents fully via `read_file` if their excerpts are too short.
 3. Use `grep` to find supporting passages in long files instead of re-reading them end to end.
-4. Cite every claim with `[citation:chunk_id]` exactly as the chunk tag specifies.
+4. Cite every claim with the `[n]` label shown on the passage you used (search results and `read_file` output both carry them); never write a chunk id, URL, or title yourself.
 
 ## What good output looks like
 - Short paragraphs with inline citations.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/off.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/off.md
index 42cb099a6..ce80cf7e2 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/off.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/off.md
@@ -1,12 +1,13 @@
 <citations>
 Citation markers are **disabled** in this configuration.
 
-Do NOT include `[citation:…]` markers anywhere, even if tool descriptions or
+Do NOT include `[n]` citation labels or `[citation:…]` markers anywhere, even if
+tool output (`<retrieved_context>`, `<web_results>`), tool descriptions, or
 examples reference them. Ignore citation-format reminders elsewhere in this
 prompt when they conflict with this block.
 
 1. Answer in plain prose. Optional markdown links to public URLs when
    sources are URLs.
 2. Do not expose raw chunk ids, document ids, or internal ids to the user.
-3. Present KB or docs facts naturally without attribution markers.
+3. Present KB, web, or docs facts naturally without attribution markers.
 </citations>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
index 2abd95d5a..a42873fcb 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@@ -1,42 +1,16 @@
 <citations>
-Citations reach the answer through two channels. Use whichever applies — and
-never invent ids you didn't see. Citation ids are resolved by exact-match
-lookup; a wrong id silently breaks the link, so when in doubt, omit.
+Cite with one token: the bracket label `[n]`. Every citable result —
+`search_knowledge_base` passages, `web_search` results, and prose from a
+`task` knowledge_base/research specialist — already carries `[n]` labels on a
+single shared count. Those labels are the only citation you write; the server
+resolves each one back to its source after the turn.
 
-### Channel A — chunk blocks injected this turn
-When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
-turn:
-
-1. For each factual statement taken from those chunks, add
-   `[citation:chunk_id]` using the **exact** id from a visible
-   `<chunk id='…'>` tag. Copy digit-for-digit (or the URL verbatim);
-   do not retype from memory.
-2. `<document_id>` is the parent doc id, **not** a citation source —
-   only ids inside `<chunk id='…'>` count.
-3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated,
-   each id copied individually).
-4. Never invent, normalise, or guess at adjacent ids; if unsure, omit.
-5. Plain brackets only — no markdown links, no footnote numbering.
-
-### Channel B — citations relayed by a `task` specialist
-A `task(...)` tool message may contain `[citation:<chunk_id>]` markers
-the specialist already attached to its prose. The specialist saw the
-underlying `<chunk id='…'>` blocks; you didn't. So:
-
-1. **Preserve those markers verbatim** in your final answer — do not
-   reformat, renumber, drop, or wrap them in markdown links. When you
-   paraphrase a specialist sentence, copy the marker character-for-
-   character; do not regenerate the id from memory (LLMs reliably
-   corrupt nearby digits).
-2. Keep each marker attached to the sentence the specialist attached
-   it to.
-3. Do **not** add new `[citation:…]` markers of your own to a
-   specialist's prose; if a fact has no marker, the specialist
-   couldn't tie it to a chunk and neither can you.
-4. When a specialist returns JSON, the citation markers live inside
-   the prose-bearing fields (e.g. a summary or excerpt). Pull them
-   along with the surrounding sentence when you quote.
-
-If neither channel surfaces citation markers this turn, do not fabricate
-them.
+1. Put the label right after the claim it supports.
+2. Several sources for one claim: stack brackets, `[1][2]`.
+3. Copy labels exactly as shown, a specialist's included — never renumber them,
+   add your own, or write the underlying title, date, id, or URL instead.
+4. Write the bare `[n]` and nothing else: no `[citation:...]`, no markdown links,
+   no footnote marks, no "References" section.
+5. Only label claims the sources support. If nothing shown backs a claim — or you
+   never saw a label — leave it uncited; never invent one.
 </citations>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
index 8f2bfca4e..6c47b03a9 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
@@ -8,20 +8,14 @@ standing instructions. It also reports current character usage versus the
 hard limit so you can manage the budget. Treat it as background colour for
 your answer, not as the task itself.
 
-`<priority_documents>` lists the workspace documents most relevant to the
-latest user message, ranked by relevance score, with `[USER-MENTIONED]`
-flagged on anything the user explicitly referenced. When the task is about
-workspace content, read these first; matched passages inside each document
-are flagged via `<chunk_index>` so you can jump straight to them.
-
 `<workspace_tree>` shows the full `/documents/` folder and file layout. Use
 it to resolve paths the user describes in natural language ("my Q2 roadmap",
 "last week's meeting notes") into concrete document references before
 delegating to a specialist.
 
-`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
-by KB search (backing `<priority_documents>`). Each chunk carries a stable
-`id` attribute.
+`<retrieved_context>` blocks hold knowledge-base passages from
+`search_knowledge_base`; each `<document>` inside is in excerpt view and every
+passage is prefixed with an `[n]` citation label.
 
 If a block doesn't appear this turn, work from the conversation alone.
 </dynamic_context>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
index a5892c23a..fcce98fd0 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
@@ -7,21 +7,14 @@ decisions, conventions, architecture notes, processes, key facts. It also
 reports current character usage versus the hard limit so you can manage the
 budget. Treat it as background colour for your answer, not as the task itself.
 
-`<priority_documents>` lists the workspace documents most relevant to the
-latest user message, ranked by relevance score, with `[USER-MENTIONED]`
-flagged on anything someone in the thread explicitly referenced. When the
-task is about workspace content, read these first; matched passages inside
-each document are flagged via `<chunk_index>` so you can jump straight to
-them.
-
 `<workspace_tree>` shows the full `/documents/` folder and file layout. Use
 it to resolve paths described in natural language ("the Q2 roadmap", "last
 week's planning notes") into concrete document references before delegating
 to a specialist.
 
-`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
-by KB search (backing `<priority_documents>`). Each chunk carries a stable
-`id` attribute.
+`<retrieved_context>` blocks hold knowledge-base passages from
+`search_knowledge_base`; each `<document>` inside is in excerpt view and every
+passage is prefixed with an `[n]` citation label.
 
 If a block doesn't appear this turn, work from the conversation alone.
 </dynamic_context>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
index 32ed959c1..2539becce 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
@@ -14,5 +14,5 @@ Workflow (Understand → Plan → Act → Verify):
 
 Discipline:
 - Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
-- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>` or `<priority_documents>`. Otherwise describe the document in natural language and let the subagent resolve it.
+- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>`. Otherwise describe the document in natural language and let the subagent resolve it.
 </provider_hints>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/grok.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/grok.md
index 3219e10d3..3a68fba16 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/grok.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/grok.md
@@ -8,8 +8,8 @@ Tool discipline:
 - Typically one investigative tool per turn unless several independent read-only queries are clearly needed; don’t repeat identical calls.
 
 Attribution:
-- When citations are **enabled** (see citation block above) and you answer from chunk-tagged documents, use `[citation:chunk_id]` exactly as specified there.
-- When citations are **disabled**, never emit `[citation:…]` — plain prose and links per tool guidance.
+- When citations are **enabled** (see citation block above) and you answer from labelled passages, cite with the bare `[n]` label exactly as specified there.
+- When citations are **disabled**, never emit `[n]` or `[citation:…]` — plain prose and links per tool guidance.
 
 Style:
 - No emojis unless asked; flat lists for short answers.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_codex.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_codex.md
index aad52f995..79689ab80 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_codex.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_codex.md
@@ -3,7 +3,7 @@ You are running on an OpenAI Codex-class model (SurfSense **main agent**).
 
 Output style:
 - Concise; don’t paste huge fetch blobs — summarize.
-- When citations are **enabled** and you rely on chunk-tagged docs, references may use `[citation:chunk_id]` per the citation block above; when **disabled**, use prose and URLs only.
+- When citations are **enabled** and you rely on labelled passages, cite with the bare `[n]` label per the citation block above; when **disabled**, use prose and URLs only.
 - Numbered lists work well when the user should reply with a single option index.
 - No emojis; single-level bullets.
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/web_search/description.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/web_search/description.md
index df15a6284..aad604e47 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/web_search/description.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/web_search/description.md
@@ -4,7 +4,10 @@
     facts, anything outside SurfSense docs and the workspace KB. Reach for
     it whenever freshness matters or you'd otherwise guess from memory.
   - Don't refuse with "I lack network access" — call the tool.
+  - Returns a `<web_results>` block: each result is labelled `[n]`. Cite a
+    result by writing that `[n]` after the statement it supports (when
+    citations are enabled) — do not hand-write the URL as a markdown link.
   - If results are thin, say so and offer to refine the query.
   - Args: `query`, `top_k` (default 10, max 50).
   - Follow up with `scrape_webpage` on the best URL when snippets are too
-    shallow. Present sources with `[label](url)` markdown links.
+    shallow.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
index 9236e9121..9c667c9fe 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
@@ -1,12 +1,11 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (OpenCode-style lazy RAG).
+"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).
 
-The main agent no longer receives eagerly pre-injected KB context on every
-turn (see :class:`KnowledgePriorityMiddleware`, now gated off by default).
-Instead it calls this tool only when it decides it needs knowledge-base
-content. The tool runs a single hybrid search (embed + DB search, ~0.5s),
-formats the top matches for the model, and writes ``kb_matched_chunk_ids``
-into graph state so matched-section highlighting is preserved when the agent
-later reads a document via ``task(knowledge_base)``.
+The main agent calls this when it decides it needs knowledge-base content. The
+tool runs one hybrid search, renders the matched passages as a
+``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
+labels, and persists the conversation's ``CitationRegistry`` onto graph state so
+the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
+turn.
 """
 
 from __future__ import annotations
@@ -18,153 +17,70 @@ from langchain.tools import ToolRuntime
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
-from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
 
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    search_knowledge_base as _hybrid_search_kb,
+from app.agents.chat.multi_agent_chat.shared.citations import load_registry
+from app.agents.chat.multi_agent_chat.shared.retrieval import SearchScope, build_context
+from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
+    search_chunks,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
     SurfSenseFilesystemState,
 )
-from app.agents.chat.runtime.path_resolver import (
-    PathIndex,
-    build_path_index,
-    doc_to_virtual_path,
-)
-from app.db import Document, shielded_async_session
+from app.agents.chat.runtime.references import referenced_document_ids
+from app.db import shielded_async_session
 from app.utils.perf import get_perf_logger
 
 _perf_log = get_perf_logger()
 
 _DEFAULT_TOP_K = 5
 _MAX_TOP_K = 20
-_PER_DOC_SNIPPET_CHARS = 1200
-_MAX_TOTAL_CHARS = 16_000
 
 _TOOL_DESCRIPTION = (
     "Search the user's knowledge base (their indexed documents, files, and "
     "connector content) for passages relevant to a query, using hybrid "
     "semantic + keyword retrieval.\n\n"
     "Use this FIRST to ground any factual or informational answer about the "
-    "user's own documents, notes, or connected sources. The workspace tree "
-    "shows which files exist; this tool pulls the actual relevant content. "
-    "Each hit returns the document's virtual path, a relevance score, and the "
-    "matched snippets. If you need a document's full text, delegate a read to "
-    "the knowledge_base specialist via `task` using the returned path.\n\n"
+    "user's own documents, notes, or connected sources. It returns a "
+    "<retrieved_context> block: each matched passage is labelled [n]. Cite a "
+    "passage by writing that [n] after the statement it supports.\n\n"
     "Write a focused, specific query containing the concrete entities, "
     "acronyms, people, projects, or terms you are looking for."
 )
 
 
-async def _resolve_virtual_paths(
-    results: list[dict[str, Any]],
+def _search_types(
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> tuple[str, ...] | None:
+    """Merge connector + document-type filters into a scope; ``None`` if unrestricted."""
+    types: set[str] = set()
+    if available_document_types:
+        types.update(available_document_types)
+    if available_connectors:
+        types.update(available_connectors)
+    return tuple(sorted(types)) or None
+
+
+async def _build_search_scope(
+    session: AsyncSession,
     *,
     search_space_id: int,
-) -> dict[int, str]:
-    """Resolve ``Document.id`` -> canonical virtual path for the search hits."""
-    doc_ids = [
-        doc_id
-        for doc_id in (
-            (doc.get("document") or {}).get("id")
-            for doc in results
-            if isinstance(doc, dict)
-        )
-        if isinstance(doc_id, int)
-    ]
-    if not doc_ids:
-        return {}
-
-    async with shielded_async_session() as session:
-        index: PathIndex = await build_path_index(session, search_space_id)
-        folder_rows = await session.execute(
-            select(Document.id, Document.folder_id).where(
-                Document.search_space_id == search_space_id,
-                Document.id.in_(doc_ids),
-            )
-        )
-        folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
-
-    paths: dict[int, str] = {}
-    for doc in results:
-        doc_meta = doc.get("document") or {}
-        doc_id = doc_meta.get("id")
-        if not isinstance(doc_id, int):
-            continue
-        folder_id = folder_by_doc_id.get(doc_id, doc_meta.get("folder_id"))
-        paths[doc_id] = doc_to_virtual_path(
-            doc_id=doc_id,
-            title=str(doc_meta.get("title") or "untitled"),
-            folder_id=folder_id if isinstance(folder_id, int) else None,
-            index=index,
-        )
-    return paths
-
-
-def _format_hits(
-    results: list[dict[str, Any]],
-    *,
-    paths: dict[int, str],
-    query: str,
-) -> str:
-    """Render search hits as a compact, model-readable block."""
-    if not results:
-        return (
-            f"No knowledge-base matches found for query: {query!r}.\n"
-            "Tell the user nothing relevant was found in their workspace, or "
-            "try a different query."
-        )
-
-    lines: list[str] = [f"<knowledge_base_results query={query!r}>"]
-    total = len(lines[0])
-    for rank, doc in enumerate(results, start=1):
-        doc_meta = doc.get("document") or {}
-        doc_id = doc_meta.get("id")
-        title = str(doc_meta.get("title") or "untitled")
-        doc_type = doc_meta.get("document_type") or doc.get("source") or "document"
-        score = doc.get("score")
-        score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
-        path = paths.get(doc_id) if isinstance(doc_id, int) else None
-
-        header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
-            f"\n   path: {path}" if path else ""
-        )
-
-        content = (doc.get("content") or "").strip()
-        if content:
-            snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
-            if len(content) > _PER_DOC_SNIPPET_CHARS:
-                snippet += " ..."
-            body = "\n   " + snippet.replace("\n", "\n   ")
-        else:
-            body = "\n   (no preview available; read the document for details)"
-
-        entry = header + body
-        if total + len(entry) > _MAX_TOTAL_CHARS:
-            lines.append("\n<!-- additional matches truncated to fit context -->")
-            break
-        lines.append(entry)
-        total += len(entry)
-
-    lines.append(
-        "\n\nTo read a full document, delegate to the knowledge_base specialist "
-        "with `task`, referencing the path above."
+    document_types: tuple[str, ...] | None,
+    runtime: ToolRuntime[None, SurfSenseFilesystemState],
+) -> SearchScope:
+    """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
+    ctx = getattr(runtime, "context", None)
+    document_ids = await referenced_document_ids(
+        session,
+        search_space_id=search_space_id,
+        document_ids=getattr(ctx, "mentioned_document_ids", None),
+        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
+    )
+    return SearchScope(
+        document_types=document_types,
+        document_ids=document_ids or None,
     )
-    lines.append("\n</knowledge_base_results>")
-    return "".join(lines)
-
-
-def _matched_chunk_ids(results: list[dict[str, Any]]) -> dict[int, list[int]]:
-    """Extract ``Document.id`` -> matched chunk ids for state hand-off."""
-    matched: dict[int, list[int]] = {}
-    for doc in results:
-        doc_id = (doc.get("document") or {}).get("id")
-        if not isinstance(doc_id, int):
-            continue
-        chunk_ids = doc.get("matched_chunk_ids") or []
-        normalized = [int(cid) for cid in chunk_ids if isinstance(cid, int | str)]
-        if normalized:
-            matched[doc_id] = normalized
-    return matched
 
 
 def create_search_knowledge_base_tool(
@@ -176,8 +92,7 @@ def create_search_knowledge_base_tool(
     """Factory for the on-demand ``search_knowledge_base`` tool."""
 
     _space_id = search_space_id
-    _connectors = available_connectors
-    _doc_types = available_document_types
+    _document_types = _search_types(available_connectors, available_document_types)
 
     async def _impl(
         query: Annotated[
@@ -195,34 +110,45 @@ def create_search_knowledge_base_tool(
             return "Error: provide a non-empty search query."
 
         clamped_top_k = min(max(1, top_k), _MAX_TOP_K)
-        t0 = time.perf_counter()
-        results = await _hybrid_search_kb(
-            query=cleaned_query,
-            search_space_id=_space_id,
-            available_connectors=_connectors,
-            available_document_types=_doc_types,
-            top_k=clamped_top_k,
-        )
+        registry = load_registry(getattr(runtime, "state", None))
 
-        paths = await _resolve_virtual_paths(results, search_space_id=_space_id)
-        rendered = _format_hits(results, paths=paths, query=cleaned_query)
-        matched = _matched_chunk_ids(results)
+        t0 = time.perf_counter()
+        async with shielded_async_session() as session:
+            scope = await _build_search_scope(
+                session,
+                search_space_id=_space_id,
+                document_types=_document_types,
+                runtime=runtime,
+            )
+            hits = await search_chunks(
+                session,
+                search_space_id=_space_id,
+                query=cleaned_query,
+                scope=scope,
+                top_k=clamped_top_k,
+            )
+            rendered = build_context(cleaned_query, hits, registry)
 
         _perf_log.info(
-            "[search_knowledge_base] tool query=%r results=%d chars=%d in %.3fs",
+            "[search_knowledge_base] tool query=%r docs=%d in %.3fs",
             cleaned_query[:60],
-            len(results),
-            len(rendered),
+            len(hits),
             time.perf_counter() - t0,
         )
 
+        if rendered is None:
+            return (
+                f"No knowledge-base matches found for query: {cleaned_query!r}.\n"
+                "Tell the user nothing relevant was found in their workspace, or "
+                "try a different query."
+            )
+
         update: dict[str, Any] = {
             "messages": [
                 ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
             ],
+            "citation_registry": registry,
         }
-        if matched:
-            update["kb_matched_chunk_ids"] = matched
         return Command(update=update)
 
     return StructuredTool.from_function(
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/__init__.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/__init__.py
new file mode 100644
index 000000000..a329d6042
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/__init__.py
@@ -0,0 +1,22 @@
+"""Citation registry: maps model-facing ``[n]`` labels to real sources.
+
+Server-side only; the model sees only the bare ``[n]``.
+"""
+
+from __future__ import annotations
+
+from .markers import to_frontend_payload
+from .models import CitationEntry, CitationSourceType
+from .normalizer import normalize_citations
+from .registry import CitationRegistry, make_key
+from .state import load_registry
+
+__all__ = [
+    "CitationEntry",
+    "CitationRegistry",
+    "CitationSourceType",
+    "load_registry",
+    "make_key",
+    "normalize_citations",
+    "to_frontend_payload",
+]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/markers.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/markers.py
new file mode 100644
index 000000000..025d364f6
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/markers.py
@@ -0,0 +1,32 @@
+"""Map a registered citation to the frontend ``[citation:<payload>]`` payload.
+
+The citation renderer understands a chunk id (``42``), a negative chunk id for
+anonymous uploads (``-3``), and a URL. This is the seam that turns a server-side
+source into one the renderer can resolve; it grows as more source kinds become
+renderable. Kinds with no renderable form yet return ``None`` so the marker is
+dropped rather than emitted broken.
+"""
+
+from __future__ import annotations
+
+from .models import CitationEntry, CitationSourceType
+
+
+def to_frontend_payload(entry: CitationEntry) -> str | None:
+    """Inner payload for ``[citation:<payload>]``, or ``None`` if not renderable."""
+    locator = entry.locator
+    match entry.source_type:
+        case CitationSourceType.KB_CHUNK | CitationSourceType.ANON_CHUNK:
+            chunk_id = locator.get("chunk_id")
+            return str(chunk_id) if chunk_id is not None else None
+        case CitationSourceType.WEB_RESULT:
+            url = locator.get("url")
+            return url or None
+        case _:
+            # Connector items and chat turns have no client-side renderer yet
+            # (the frontend resolves only chunk ids and URLs), so they stay
+            # unmarked until both a registration path and a renderer exist.
+            return None
+
+
+__all__ = ["to_frontend_payload"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
new file mode 100644
index 000000000..5dccddc5c
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
@@ -0,0 +1,31 @@
+"""Data shapes for the citation registry."""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class CitationSourceType(str, Enum):
+    """Source kind of a citable unit; the value is the stable wire/dedup form."""
+
+    KB_CHUNK = "kb_chunk"
+    KB_DOCUMENT = "kb_document"
+    CONNECTOR_ITEM = "connector_item"
+    WEB_RESULT = "web_result"
+    CHAT_TURN = "chat_turn"
+    ANON_CHUNK = "anon_chunk"
+
+
+class CitationEntry(BaseModel):
+    """A registered unit: ``n`` (the label), ``locator`` (identity), ``display`` (UI only)."""
+
+    n: int
+    source_type: CitationSourceType
+    locator: dict[str, Any]
+    display: dict[str, Any] = Field(default_factory=dict)
+
+
+__all__ = ["CitationEntry", "CitationSourceType"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/normalizer.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/normalizer.py
new file mode 100644
index 000000000..fd1773e40
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/normalizer.py
@@ -0,0 +1,64 @@
+"""Rewrite model ``[n]`` citations into frontend ``[citation:<payload>]`` markers.
+
+The model cites with tiny ordinals ``[n]`` — one per bracket. Several citations
+are just several brackets (``[1][2]`` or ``[1], [2]``). Each ordinal is resolved
+through the registry and replaced with a marker the citation renderer
+understands. Unknown or not-yet-renderable ordinals are dropped, so a bad
+citation disappears rather than misleads. Code spans are left untouched.
+"""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Callable
+
+from .markers import to_frontend_payload
+from .registry import CitationRegistry
+
+# Fenced (```...```) and inline (`...`) code; mirrors the frontend's single
+# code-region pattern so ordinals inside examples are never rewritten.
+_CODE_REGION = re.compile(r"```[\s\S]*?```|`[^`\n]+`")
+
+# A single ordinal in a bracket: `[1]`, `[12]`. We deliberately match even when
+# glued to the preceding word (`docs[17]`) because the model very frequently
+# writes citations that way — requiring a non-word char before `[` (to dodge
+# `arr[1]`) silently dropped those citations, leaving raw `[n]` that both fails to
+# render and reads like array indexing. Genuine code/array syntax is instead
+# protected by the code-region carve-out below; an unresolved ordinal drops
+# harmlessly. Adjacent citations `[1][2]` are each rewritten.
+_ORDINAL = re.compile(r"\[\s*(\d+)\s*\]")
+
+
+def normalize_citations(text: str, registry: CitationRegistry) -> str:
+    """Replace each ``[n]`` with its resolved marker; drop the unresolved."""
+    if not text:
+        return text
+
+    rewrite = _ordinal_rewriter(registry)
+    return _outside_code(text, lambda span: _ORDINAL.sub(rewrite, span))
+
+
+def _ordinal_rewriter(registry: CitationRegistry) -> Callable[[re.Match[str]], str]:
+    """Build the substitution that turns one ordinal into a marker (or drops it)."""
+
+    def rewrite(match: re.Match[str]) -> str:
+        entry = registry.resolve(int(match.group(1)))
+        payload = to_frontend_payload(entry) if entry else None
+        return f"[citation:{payload}]" if payload is not None else ""
+
+    return rewrite
+
+
+def _outside_code(text: str, transform: Callable[[str], str]) -> str:
+    """Apply ``transform`` to non-code spans only; code regions pass through verbatim."""
+    parts = []
+    last = 0
+    for region in _CODE_REGION.finditer(text):
+        parts.append(transform(text[last : region.start()]))
+        parts.append(region.group(0))
+        last = region.end()
+    parts.append(transform(text[last:]))
+    return "".join(parts)
+
+
+__all__ = ["normalize_citations"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/registry.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/registry.py
new file mode 100644
index 000000000..4d56bc088
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/registry.py
@@ -0,0 +1,91 @@
+"""Maps the model-facing ``[n]`` to its source.
+
+Pydantic for reliable serialization in checkpointed, cross-agent state.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from .models import CitationEntry, CitationSourceType
+
+
+def make_key(source_type: CitationSourceType, locator: dict[str, Any]) -> str:
+    """Stable, order-insensitive dedup key; ``source_type`` prefix avoids cross-kind collisions."""
+    type_value = (
+        source_type.value
+        if isinstance(source_type, CitationSourceType)
+        else str(source_type)
+    )
+    return f"{type_value}|{json.dumps(locator, sort_keys=True, default=str)}"
+
+
+class CitationRegistry(BaseModel):
+    """Per-conversation ``[n]`` ↔ unit map (find-or-create, monotonic)."""
+
+    by_n: dict[int, CitationEntry] = Field(default_factory=dict)
+    by_key: dict[str, int] = Field(default_factory=dict)
+    next_n: int = 1
+
+    def register(
+        self,
+        source_type: CitationSourceType,
+        locator: dict[str, Any],
+        display: dict[str, Any] | None = None,
+    ) -> int:
+        """Return the ``[n]`` for this unit, minting a new one only if unseen."""
+        key = make_key(source_type, locator)
+        existing = self.by_key.get(key)
+        if existing is not None:
+            return existing
+
+        n = self.next_n
+        self.by_n[n] = CitationEntry(
+            n=n,
+            source_type=source_type,
+            locator=dict(locator),
+            display=dict(display or {}),
+        )
+        self.by_key[key] = n
+        self.next_n = n + 1
+        return n
+
+    def resolve(self, n: int) -> CitationEntry | None:
+        """Map ``[n]`` back to its source; unknown → ``None`` so bad citations drop."""
+        return self.by_n.get(n)
+
+    def merge(self, other: CitationRegistry) -> CitationRegistry:
+        """Union ``self`` with ``other`` (find-or-create), returning a new registry.
+
+        Needed because separate branches (parent + subagents, parallel tool calls)
+        each register into a registry forked from the same base. A plain replace
+        would drop one branch's mappings; this unions them so ``[n]`` stays globally
+        consistent and no source is lost:
+
+        - A source already in ``self`` keeps its existing ``[n]``.
+        - A source only in ``other`` keeps its ``[n]`` when that slot is free.
+        - A collision (same ``[n]``, different source on each side) re-mints the
+          ``other`` entry to a fresh ``[n]`` and advances ``next_n`` past both.
+
+        Pure: neither registry is mutated. Entries are folded in ascending ``[n]``
+        order so the result is deterministic.
+        """
+        merged = self.model_copy(deep=True)
+        for n in sorted(other.by_n):
+            entry = other.by_n[n]
+            key = make_key(entry.source_type, entry.locator)
+            if key in merged.by_key:
+                continue
+            if n in merged.by_n:
+                merged.register(entry.source_type, entry.locator, entry.display)
+            else:
+                merged.by_n[n] = entry.model_copy(deep=True)
+                merged.by_key[key] = n
+                merged.next_n = max(merged.next_n, n + 1)
+        return merged
+
+
+__all__ = ["CitationRegistry", "make_key"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/state.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/state.py
new file mode 100644
index 000000000..0df103a54
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/state.py
@@ -0,0 +1,26 @@
+"""Read the conversation's ``CitationRegistry`` out of graph state.
+
+The registry is checkpointed, so it may come back as a live ``CitationRegistry``
+or a plain dict (after (de)serialization). Both the search tool and the read
+path load it the same way before registering new ``[n]`` and writing it back.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+from .registry import CitationRegistry
+
+
+def load_registry(state: Mapping[str, Any] | None) -> CitationRegistry:
+    """Return the registry from ``state``, tolerating a serialized dict or absence."""
+    raw = state.get("citation_registry") if state else None
+    if isinstance(raw, CitationRegistry):
+        return raw
+    if isinstance(raw, dict):
+        return CitationRegistry.model_validate(raw)
+    return CitationRegistry()
+
+
+__all__ = ["load_registry"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/__init__.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/__init__.py
new file mode 100644
index 000000000..42368891d
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/__init__.py
@@ -0,0 +1,25 @@
+"""Render citable documents for the model: one shape for search, read, and web.
+
+``render_document`` emits one ``<document title=… source=… view="excerpt|full">``
+block whose passages carry server-assigned ``[n]`` labels. ``render_search_context``
+wraps KB excerpt blocks in ``<retrieved_context>``; ``render_web_results`` wraps web
+excerpt blocks in ``<web_results>``. Both cite with the same ``[n]`` spine.
+"""
+
+from __future__ import annotations
+
+from .document import render_document
+from .models import DocumentView, RenderableDocument, RenderablePassage
+from .search_context import render_search_context
+from .source_label import source_label
+from .web_results import render_web_results
+
+__all__ = [
+    "DocumentView",
+    "RenderableDocument",
+    "RenderablePassage",
+    "render_document",
+    "render_search_context",
+    "render_web_results",
+    "source_label",
+]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/document.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/document.py
new file mode 100644
index 000000000..83181ff69
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/document.py
@@ -0,0 +1,70 @@
+"""Render one citable document as a ``<document>`` block.
+
+Every citable surface (KB search excerpts, KB full reads, web results) uses the
+same block; ``view`` and the passages shown are what differ. Each passage is
+registered for citation as it renders, so its ``[n]`` resolves back to its source
+later.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
+from .models import DocumentView, RenderableDocument, RenderablePassage
+
+
+def render_document(
+    document: RenderableDocument,
+    *,
+    view: DocumentView,
+    registry: CitationRegistry,
+) -> str | None:
+    """Render one ``<document>`` block, registering each passage for citation.
+
+    Returns ``None`` when the document has no passage to show. Mutates ``registry``
+    (find-or-create).
+    """
+    if not document.passages:
+        return None
+
+    lines = [_open_tag(document, view)]
+    for passage in document.passages:
+        lines.append(_render_passage(document, passage, registry))
+    lines.append("</document>")
+    return "\n".join(lines)
+
+
+def _open_tag(document: RenderableDocument, view: DocumentView) -> str:
+    attrs = [f'title="{_attr(document.title)}"']
+    if document.source:
+        attrs.append(f'source="{_attr(document.source)}"')
+    attrs.append(f'view="{view}"')
+    return f"<document {' '.join(attrs)}>"
+
+
+def _render_passage(
+    document: RenderableDocument,
+    passage: RenderablePassage,
+    registry: CitationRegistry,
+) -> str:
+    n = registry.register(
+        passage.source_type,
+        passage.locator,
+        {"title": document.title, "source": document.source},
+    )
+    label = f"  [{n}] "
+    body = passage.content.strip().replace("\n", "\n" + " " * len(label))
+    return f"{label}{body}"
+
+
+def _attr(value: str) -> str:
+    collapsed = " ".join(str(value).split())
+    return (
+        collapsed.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+    )
+
+
+__all__ = ["render_document"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/models.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/models.py
new file mode 100644
index 000000000..45cdb1865
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/models.py
@@ -0,0 +1,42 @@
+"""Inputs for rendering a citable document for the model.
+
+A passage is one citable unit — what the model cites with ``[n]``. A document
+groups the passages shown from one source. The same shapes feed every citable
+surface: KB search excerpts, KB full reads, and web results.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationSourceType
+
+DocumentView = Literal["excerpt", "full"]
+"""How much of the source is shown: a search slice, or the whole object."""
+
+
+@dataclass(frozen=True)
+class RenderablePassage:
+    """One citable unit: what the model cites with ``[n]``.
+
+    ``locator`` is the source-specific identity registered for this passage (a KB
+    chunk's ``{document_id, chunk_id}``, a web result's ``{url}``). ``source_type``
+    selects how that locator resolves to a frontend payload.
+    """
+
+    content: str
+    locator: dict[str, Any]
+    source_type: CitationSourceType = CitationSourceType.KB_CHUNK
+
+
+@dataclass(frozen=True)
+class RenderableDocument:
+    """A source document and the passages to render from it, in order."""
+
+    title: str
+    source: str | None = None
+    passages: list[RenderablePassage] = field(default_factory=list)
+
+
+__all__ = ["DocumentView", "RenderableDocument", "RenderablePassage"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
new file mode 100644
index 000000000..418a2142d
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
@@ -0,0 +1,53 @@
+"""Wrap search excerpts in the ``<retrieved_context>`` block.
+
+Each document renders through the shared ``render_document``; this module adds the
+container and the one-time header that teaches the model how to read and cite.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
+from .document import render_document
+from .models import RenderableDocument
+
+_HEADER = (
+    "These are excerpts from the user's knowledge base, selected for this query.\n"
+    "A document is a full source (a file, a Slack thread, a Notion page); each\n"
+    "<document> below is in excerpt view, so you are seeing only the chunks that\n"
+    "matched this query, not the whole source. Cite a chunk with its [n]. Read the\n"
+    "document for full context before claiming it only says X."
+)
+
+
+def render_search_context(
+    documents: list[RenderableDocument],
+    registry: CitationRegistry,
+) -> str | None:
+    """Render retrieved documents as excerpt blocks inside ``<retrieved_context>``.
+
+    Returns ``None`` when no document has a passage to show, so the caller can skip
+    the block. Mutates ``registry`` (find-or-create), so a passage seen again in a
+    later turn keeps its original ``[n]``.
+    """
+    blocks = [
+        block
+        for document in documents
+        if (
+            block := render_document(document, view="excerpt", registry=registry)
+        )
+        is not None
+    ]
+    if not blocks:
+        return None
+
+    return (
+        "<retrieved_context>\n"
+        + _HEADER
+        + "\n"
+        + "\n".join(blocks)
+        + "\n</retrieved_context>"
+    )
+
+
+__all__ = ["render_search_context"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/source_label.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/source_label.py
new file mode 100644
index 000000000..03878b2f4
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/source_label.py
@@ -0,0 +1,69 @@
+"""Build a short, honest source label for a knowledge-base document.
+
+A label orients the model about where a passage came from — e.g. ``Slack`` or
+``Web · docs.python.org``. It is derived only from the document's type and any
+URL in its metadata, so it never asserts detail we don't actually have. Search
+hits and full reads both build their ``<document source=…>`` from here, so the
+label a passage carries is identical whichever surface it arrives through.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from urllib.parse import urlparse
+
+_FRIENDLY_NAMES = {
+    "FILE": "File",
+    "NOTE": "Note",
+    "EXTENSION": "Saved page",
+    "CRAWLED_URL": "Web",
+    "YOUTUBE_VIDEO": "YouTube",
+    "SLACK_CONNECTOR": "Slack",
+    "TEAMS_CONNECTOR": "Teams",
+    "DISCORD_CONNECTOR": "Discord",
+    "NOTION_CONNECTOR": "Notion",
+    "GITHUB_CONNECTOR": "GitHub",
+    "LINEAR_CONNECTOR": "Linear",
+    "JIRA_CONNECTOR": "Jira",
+    "CONFLUENCE_CONNECTOR": "Confluence",
+    "CLICKUP_CONNECTOR": "ClickUp",
+    "AIRTABLE_CONNECTOR": "Airtable",
+    "OBSIDIAN_CONNECTOR": "Obsidian",
+    "BOOKSTACK_CONNECTOR": "BookStack",
+}
+
+_URL_KEYS = ("url", "source_url", "link", "source")
+
+
+def source_label(document_type: str | None, metadata: dict[str, Any]) -> str | None:
+    """``Source`` or ``Source · host``; ``None`` when nothing is known."""
+    name = _friendly_name(document_type)
+    host = _url_host(metadata)
+    if name and host:
+        return f"{name} · {host}"
+    return name or host
+
+
+def _friendly_name(document_type: str | None) -> str | None:
+    if not document_type:
+        return None
+    return _FRIENDLY_NAMES.get(document_type, _prettify(document_type))
+
+
+def _prettify(document_type: str) -> str:
+    """Fallback name for unmapped types: ``GOOGLE_DRIVE_FILE`` → ``Google Drive``."""
+    words = document_type.replace("_CONNECTOR", "").replace("_FILE", "").split("_")
+    return " ".join(word.capitalize() for word in words if word)
+
+
+def _url_host(metadata: dict[str, Any]) -> str | None:
+    for key in _URL_KEYS:
+        value = metadata.get(key)
+        if isinstance(value, str) and value.startswith(("http://", "https://")):
+            host = urlparse(value).netloc
+            if host:
+                return host.removeprefix("www.")
+    return None
+
+
+__all__ = ["source_label"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
new file mode 100644
index 000000000..b310c7b3a
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
@@ -0,0 +1,54 @@
+"""Wrap live web-search results in a ``<web_results>`` block.
+
+Each result renders through the shared ``render_document`` (excerpt view), so a
+web result is cited with ``[n]`` exactly like a knowledge-base passage. Only the
+container and header differ — they tell the model these came from the public web,
+not the user's workspace.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
+from .document import render_document
+from .models import RenderableDocument
+
+_HEADER = (
+    "These are live results from a public web search for this query. Each\n"
+    "<document> below is one result in excerpt view; cite a result with its [n]\n"
+    "after the statement it supports. Scrape the URL for full context before\n"
+    "making a definitive claim from a snippet."
+)
+
+
+def render_web_results(
+    documents: list[RenderableDocument],
+    registry: CitationRegistry,
+) -> str | None:
+    """Render web results as excerpt blocks inside ``<web_results>``.
+
+    Returns ``None`` when no result has content to show, so the caller can skip
+    the block. Mutates ``registry`` (find-or-create), so a URL seen again keeps
+    its original ``[n]``.
+    """
+    blocks = [
+        block
+        for document in documents
+        if (
+            block := render_document(document, view="excerpt", registry=registry)
+        )
+        is not None
+    ]
+    if not blocks:
+        return None
+
+    return (
+        "<web_results>\n"
+        + _HEADER
+        + "\n"
+        + "\n".join(blocks)
+        + "\n</web_results>"
+    )
+
+
+__all__ = ["render_web_results"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
index f5233c7d3..91ee2a4c6 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
@@ -53,14 +53,6 @@ class AgentFeatureFlags:
     # Skills + subagents
     enable_skills: bool = True
     enable_specialized_subagents: bool = True
-    enable_kb_planner_runnable: bool = True
-
-    # KB retrieval mode — when False (default), the main agent retrieves KB
-    # content lazily via the on-demand ``search_knowledge_base`` tool and the
-    # expensive per-turn pre-injection (planner LLM + embed + hybrid search,
-    # ~2.3s) is skipped; explicit @-mentions are still surfaced cheaply. Set
-    # True to restore the original eager ``<priority_documents>`` pre-injection.
-    enable_kb_priority_preinjection: bool = False
 
     # Snapshot / revert
     enable_action_log: bool = True
@@ -118,9 +110,6 @@ class AgentFeatureFlags:
                 enable_llm_tool_selector=False,
                 enable_skills=False,
                 enable_specialized_subagents=False,
-                enable_kb_planner_runnable=False,
-                # Full rollback restores the original eager KB pre-injection.
-                enable_kb_priority_preinjection=True,
                 enable_action_log=False,
                 enable_revert_route=False,
                 enable_plugin_loader=False,
@@ -156,12 +145,6 @@ class AgentFeatureFlags:
             enable_specialized_subagents=_env_bool(
                 "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", True
             ),
-            enable_kb_planner_runnable=_env_bool(
-                "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", True
-            ),
-            enable_kb_priority_preinjection=_env_bool(
-                "SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION", False
-            ),
             # Snapshot / revert
             enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True),
             enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True),
@@ -198,7 +181,6 @@ class AgentFeatureFlags:
                 self.enable_llm_tool_selector,
                 self.enable_skills,
                 self.enable_specialized_subagents,
-                self.enable_kb_planner_runnable,
                 self.enable_action_log,
                 self.enable_revert_route,
                 self.enable_plugin_loader,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/citation_state.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/citation_state.py
new file mode 100644
index 000000000..e9cb54957
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/citation_state.py
@@ -0,0 +1,50 @@
+"""Contribute the ``citation_registry`` state channel to a subagent.
+
+The conversation's ``[n]`` -> source registry lives on graph state behind a
+merge reducer (see :mod:`app.agents.chat.multi_agent_chat.shared.state.reducers`).
+The orchestrator and the KB subagent get that channel for free via the filesystem
+state schema, but a citable subagent that does *not* use the filesystem (e.g.
+``research``) still needs the channel declared so its tools can register ``[n]``
+via ``Command(update={"citation_registry": ...})`` and have it merge back up.
+
+This middleware adds *only* that channel — no tools, no behavior — so any subagent
+that mints citations can opt in without inheriting filesystem semantics.
+"""
+
+from __future__ import annotations
+
+from typing import Annotated, NotRequired
+
+from langchain.agents.middleware import AgentMiddleware
+from typing_extensions import TypedDict
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.shared.state.reducers import (
+    _citation_registry_merge_reducer,
+)
+
+
+class CitationState(TypedDict):
+    """State carrying just the per-conversation ``[n]`` -> source registry."""
+
+    citation_registry: NotRequired[
+        Annotated[CitationRegistry, _citation_registry_merge_reducer]
+    ]
+
+
+class CitationStateMiddleware(AgentMiddleware):  # type: ignore[type-arg]
+    """Declare the ``citation_registry`` channel; no tools, no hooks."""
+
+    tools = ()
+    state_schema = CitationState
+
+
+def build_citation_state_mw() -> CitationStateMiddleware:
+    return CitationStateMiddleware()
+
+
+__all__ = [
+    "CitationState",
+    "CitationStateMiddleware",
+    "build_citation_state_mw",
+]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/document_xml.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/document_xml.py
deleted file mode 100644
index 60e586ae1..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/document_xml.py
+++ /dev/null
@@ -1,103 +0,0 @@
-"""Shared XML builder for KB documents.
-
-Produces the citation-friendly XML used by every read of a knowledge-base
-document (lazy-loaded by :class:`KBPostgresBackend` and synthetic anonymous
-files). The XML carries a ``<chunk_index>`` near the top so the LLM can jump
-directly to matched-chunk line ranges via ``read_file(offset=…, limit=…)``.
-
-Extracted from the original ``knowledge_search.py`` so the backend, the
-priority middleware, and any future renderer share a single implementation.
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-
-def build_document_xml(
-    document: dict[str, Any],
-    matched_chunk_ids: set[int] | None = None,
-) -> str:
-    """Build citation-friendly XML with a ``<chunk_index>`` for smart seeking.
-
-    Args:
-        document: Dict shape produced by hybrid search / lazy-load helpers.
-            Expected keys: ``document`` (with ``id``, ``title``,
-            ``document_type``, ``metadata``) and ``chunks``
-            (list of ``{chunk_id, content}``).
-        matched_chunk_ids: Optional set of chunk IDs to flag as
-            ``matched="true"`` in the chunk index.
-    """
-    matched = matched_chunk_ids or set()
-
-    doc_meta = document.get("document") or {}
-    metadata = (doc_meta.get("metadata") or {}) if isinstance(doc_meta, dict) else {}
-    document_id = doc_meta.get("id", document.get("document_id", "unknown"))
-    document_type = doc_meta.get("document_type", document.get("source", "UNKNOWN"))
-    title = doc_meta.get("title") or metadata.get("title") or "Untitled Document"
-    url = (
-        metadata.get("url") or metadata.get("source") or metadata.get("page_url") or ""
-    )
-    metadata_json = json.dumps(metadata, ensure_ascii=False)
-
-    metadata_lines: list[str] = [
-        "<document>",
-        "<document_metadata>",
-        f"  <document_id>{document_id}</document_id>",
-        f"  <document_type>{document_type}</document_type>",
-        f"  <title><![CDATA[{title}]]></title>",
-        f"  <url><![CDATA[{url}]]></url>",
-        f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-        "</document_metadata>",
-        "",
-    ]
-
-    chunks = document.get("chunks") or []
-    chunk_entries: list[tuple[int | None, str]] = []
-    if isinstance(chunks, list):
-        for chunk in chunks:
-            if not isinstance(chunk, dict):
-                continue
-            chunk_id = chunk.get("chunk_id") or chunk.get("id")
-            chunk_content = str(chunk.get("content", "")).strip()
-            if not chunk_content:
-                continue
-            if chunk_id is None:
-                xml = f"  <chunk><![CDATA[{chunk_content}]]></chunk>"
-            else:
-                xml = f"  <chunk id='{chunk_id}'><![CDATA[{chunk_content}]]></chunk>"
-            chunk_entries.append((chunk_id, xml))
-
-    index_overhead = 1 + len(chunk_entries) + 1 + 1 + 1
-    first_chunk_line = len(metadata_lines) + index_overhead + 1
-
-    current_line = first_chunk_line
-    index_entry_lines: list[str] = []
-    for cid, xml_str in chunk_entries:
-        num_lines = xml_str.count("\n") + 1
-        end_line = current_line + num_lines - 1
-        matched_attr = ' matched="true"' if cid is not None and cid in matched else ""
-        if cid is not None:
-            index_entry_lines.append(
-                f'  <entry chunk_id="{cid}" lines="{current_line}-{end_line}"{matched_attr}/>'
-            )
-        else:
-            index_entry_lines.append(
-                f'  <entry lines="{current_line}-{end_line}"{matched_attr}/>'
-            )
-        current_line = end_line + 1
-
-    lines = metadata_lines.copy()
-    lines.append("<chunk_index>")
-    lines.extend(index_entry_lines)
-    lines.append("</chunk_index>")
-    lines.append("")
-    lines.append("<document_content>")
-    for _, xml_str in chunk_entries:
-        lines.append(xml_str)
-    lines.extend(["</document_content>", "</document>"])
-    return "\n".join(lines)
-
-
-__all__ = ["build_document_xml"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
index e13196537..cb0f4cc69 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
@@ -42,8 +42,15 @@ from langchain.tools import ToolRuntime
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
-    build_document_xml,
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    render_document,
+    source_label,
 )
 from app.agents.chat.runtime.path_resolver import (
     DOCUMENTS_ROOT,
@@ -59,6 +66,21 @@ _TEMP_PREFIX = "temp_"
 _GREP_MAX_TOTAL_MATCHES = 50
 _GREP_MAX_PER_DOC = 5
 
+_EMPTY_DOCUMENT_NOTICE = "(This document has no readable content.)"
+
+
+def render_full_document(
+    document: RenderableDocument,
+    registry: CitationRegistry,
+) -> str:
+    """Render a whole KB document (``view="full"``), registering each chunk's ``[n]``.
+
+    Falls back to a short notice when the document has no chunks, so a read never
+    returns blank.
+    """
+    rendered = render_document(document, view="full", registry=registry)
+    return rendered if rendered is not None else _EMPTY_DOCUMENT_NOTICE
+
 
 def _basename(path: str) -> str:
     return path.rsplit("/", 1)[-1]
@@ -127,13 +149,6 @@ class KBPostgresBackend(BackendProtocol):
         anon = self.state.get("kb_anon_doc")
         return anon if isinstance(anon, dict) else None
 
-    def _matched_chunk_ids(self, doc_id: int) -> set[int]:
-        mapping = self.state.get("kb_matched_chunk_ids") or {}
-        try:
-            return set(mapping.get(doc_id, []) or [])
-        except TypeError:
-            return set()
-
     @staticmethod
     def _file_data_size(file_data: dict[str, Any]) -> int:
         try:
@@ -466,80 +481,93 @@ class KBPostgresBackend(BackendProtocol):
     def read(self, file_path: str, offset: int = 0, limit: int = 2000) -> str:  # type: ignore[override]
         return asyncio.run(self.aread(file_path, offset, limit))
 
-    async def _load_file_data(
+    async def aload_document(
         self,
         path: str,
-    ) -> tuple[dict[str, Any], int | None] | None:
-        """Lazy-load a virtual KB document into a deepagents ``FileData``.
+    ) -> tuple[RenderableDocument, int | None] | None:
+        """Lazy-load a virtual KB document as a :class:`RenderableDocument`.
 
-        Returns ``(file_data, doc_id)`` or ``None`` if the path doesn't map
-        to any known document. ``doc_id`` is ``None`` for the synthetic
-        anonymous document so the caller doesn't track it as a DB-backed file.
+        Returns ``(document, doc_id)`` with every chunk in document order, or
+        ``None`` if the path maps to no known document. ``doc_id`` is ``None``
+        for the synthetic anonymous upload so the caller doesn't track it as a
+        DB-backed file. Pure data — rendering and citation registration happen in
+        the caller (see :meth:`_load_file_data` and the ``read_file`` tool).
         """
         anon = self._kb_anon_doc()
         if anon and str(anon.get("path") or "") == path:
-            doc_payload = {
-                "document_id": -1,
-                "chunks": list(anon.get("chunks") or []),
-                "matched_chunk_ids": [],
-                "document": {
-                    "id": -1,
-                    "title": anon.get("title") or "uploaded_document",
-                    "document_type": "FILE",
-                    "metadata": {"source": "anonymous_upload"},
-                },
-                "source": "FILE",
-            }
-            xml = build_document_xml(doc_payload, matched_chunk_ids=set())
-            file_data = create_file_data(xml)
-            return file_data, None
+            document = RenderableDocument(
+                title=str(anon.get("title") or "uploaded_document"),
+                source="Uploaded file",
+                passages=[
+                    RenderablePassage(
+                        content=str(chunk.get("content", "")),
+                        locator={
+                            "document_id": -1,
+                            "chunk_id": int(chunk["chunk_id"]),
+                        },
+                        source_type=CitationSourceType.ANON_CHUNK,
+                    )
+                    for chunk in (anon.get("chunks") or [])
+                    if isinstance(chunk, dict) and chunk.get("chunk_id") is not None
+                ],
+            )
+            return document, None
 
         if not path.startswith(DOCUMENTS_ROOT):
             return None
 
         async with shielded_async_session() as session:
-            document = await virtual_path_to_doc(
+            document_row = await virtual_path_to_doc(
                 session,
                 search_space_id=self.search_space_id,
                 virtual_path=path,
             )
-            if document is None:
+            if document_row is None:
                 return None
             chunk_rows = await session.execute(
                 select(Chunk.id, Chunk.content)
-                .where(Chunk.document_id == document.id)
+                .where(Chunk.document_id == document_row.id)
                 .order_by(Chunk.position, Chunk.id)
             )
-            chunks = [
-                {"chunk_id": row.id, "content": row.content} for row in chunk_rows.all()
-            ]
+            chunks = chunk_rows.all()
 
-        doc_payload = {
-            "document_id": document.id,
-            "chunks": chunks,
-            "matched_chunk_ids": list(self._matched_chunk_ids(document.id)),
-            "document": {
-                "id": document.id,
-                "title": document.title,
-                "document_type": (
-                    document.document_type.value
-                    if getattr(document, "document_type", None) is not None
-                    else "UNKNOWN"
-                ),
-                "metadata": dict(document.document_metadata or {}),
-            },
-            "source": (
-                document.document_type.value
-                if getattr(document, "document_type", None) is not None
-                else "UNKNOWN"
-            ),
-        }
-        xml = build_document_xml(
-            doc_payload,
-            matched_chunk_ids=self._matched_chunk_ids(document.id),
+        document_type = (
+            document_row.document_type.value
+            if getattr(document_row, "document_type", None) is not None
+            else None
         )
-        file_data = create_file_data(xml)
-        return file_data, document.id
+        metadata = dict(document_row.document_metadata or {})
+        document = RenderableDocument(
+            title=document_row.title,
+            source=source_label(document_type, metadata),
+            passages=[
+                RenderablePassage(
+                    content=row.content,
+                    locator={"document_id": document_row.id, "chunk_id": row.id},
+                )
+                for row in chunks
+            ],
+        )
+        return document, document_row.id
+
+    async def _load_file_data(
+        self,
+        path: str,
+    ) -> tuple[dict[str, Any], int | None] | None:
+        """Render a virtual KB document into a deepagents ``FileData``.
+
+        Used by the filesystem ops (move/edit existence + content staging) and the
+        backend's own ``aread``/``aedit``. These have no conversation registry to
+        persist into, so the ``[n]`` labels are minted into a throwaway registry —
+        the canonical, citation-persisting read is the ``read_file`` tool, which
+        renders from :meth:`aload_document` against the state registry.
+        """
+        loaded = await self.aload_document(path)
+        if loaded is None:
+            return None
+        document, doc_id = loaded
+        rendered = render_full_document(document, CitationRegistry())
+        return create_file_data(rendered), doc_id
 
     # ------------------------------------------------------------------ writes
 
@@ -1037,4 +1065,5 @@ __all__ = [
     "KBPostgresBackend",
     "list_tree_listing",
     "paginate_listing",
+    "render_full_document",
 ]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/resolver.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/resolver.py
index 6c35f369f..4553df7ff 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/resolver.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/resolver.py
@@ -37,8 +37,8 @@ def build_backend_resolver(
 
     In cloud mode the resolver returns a fresh :class:`KBPostgresBackend`
     bound to the current ``runtime`` so the backend can read staging state
-    (``staged_dirs``, ``pending_moves``, ``files`` cache, ``kb_anon_doc``,
-    ``kb_matched_chunk_ids``) for each tool call. When no ``search_space_id``
+    (``staged_dirs``, ``pending_moves``, ``files`` cache, ``kb_anon_doc``)
+    for each tool call. When no ``search_space_id``
     is provided, the resolver falls back to :class:`StateBackend` (used by
     sub-agents and tests that don't need DB-backed reads).
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
index 98dbbaaab..1520668ad 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
@@ -35,26 +35,14 @@ current working directory (`cwd`, default `/documents`).
   turn alongside any new/edited documents. Snapshot/revert is enabled
   for every destructive operation when action logging is on.
 
-## Reading Documents Efficiently
+## Reading Documents
 
-Documents are formatted as XML. Each document contains:
-- `<document_metadata>` — title, type, URL, etc.
-- `<chunk_index>` — a table of every chunk with its **line range** and a
-  `matched="true"` flag for chunks that matched the search query.
-- `<document_content>` — the actual chunks in original document order.
-
-**Workflow**: when reading a large document, read the first ~20 lines to see
-the `<chunk_index>`, identify chunks marked `matched="true"`, then use
-`read_file(path, offset=<start_line>, limit=<lines>)` to jump directly to
-those sections instead of reading the entire file sequentially.
-
-Use `<chunk id='...'>` values as citation IDs in your answers.
-
-## Priority List
-
-You receive a `<priority_documents>` system message each turn listing the
-top-K paths most relevant to the user's query (by hybrid search). Read those
-first — matched sections are flagged inside each document's `<chunk_index>`.
+A knowledge-base document is returned as a `<document … view="full">` block —
+the whole source, with each passage labelled `[n]`. `view="full"` means you are
+seeing the complete document, not an excerpt. Use `read_file(path, offset, limit)`
+to page through a large document. Cite a passage by writing its `[n]` after the
+statement it supports — the same `[n]` that passage had in
+`search_knowledge_base` results.
 
 ## Workspace Tree
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
index 712b51c26..d4cae99f0 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
@@ -37,13 +37,4 @@ directory (`cwd`).
 - Cross-mount moves are not supported.
 - Desktop deletes hit disk immediately and cannot be undone via the
   agent's revert flow — confirm before calling `rm`/`rmdir`.
-
-## Priority List
-
-You may receive a `<priority_documents>` system message listing the top-K
-documents from the user's SurfSense knowledge base — these are cloud-ingested
-via connectors (Notion, Slack, etc.), not local files. Treat it as a hint:
-consult it when the task spans both local and cloud sources (e.g. drafting a
-local note from a Notion summary); skip when the task is purely about local
-files.
 """
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/description.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/description.py
index b10ca4acc..3d1c6b69f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/description.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/description.py
@@ -10,11 +10,11 @@ Usage:
 - By default, reads up to 100 lines from the beginning.
 - Use `offset` and `limit` for pagination when files are large.
 - Results include line numbers.
-- Documents contain a `<chunk_index>` near the top listing every chunk with
-  its line range and a `matched="true"` flag for search-relevant chunks.
-  Read the index first, then jump to matched chunks with
-  `read_file(path, offset=<start_line>, limit=<num_lines>)`.
-- Use chunk IDs (`<chunk id='...'>`) as citations in answers.
+- A knowledge-base document is returned as a `<document … view="full">` block:
+  the whole source, with each passage labelled `[n]`. `view="full"` means you are
+  seeing the complete document, not an excerpt.
+- Cite a passage by writing its `[n]` after the statement it supports — the same
+  `[n]` you would use for that passage from `search_knowledge_base`.
 """
 
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
index 5c20619d6..07dfec57e 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
@@ -4,14 +4,20 @@ from __future__ import annotations
 
 from typing import TYPE_CHECKING, Annotated, Any
 
-from deepagents.backends.utils import format_read_response, validate_path
+from deepagents.backends.utils import (
+    create_file_data,
+    format_read_response,
+    validate_path,
+)
 from langchain.tools import ToolRuntime
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
 
+from app.agents.chat.multi_agent_chat.shared.citations import load_registry
 from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
     KBPostgresBackend,
+    render_full_document,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
     SurfSenseFilesystemState,
@@ -55,10 +61,12 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
 
         backend = mw._get_backend(runtime)
         if isinstance(backend, KBPostgresBackend):
-            loaded = await backend._load_file_data(validated)
+            loaded = await backend.aload_document(validated)
             if loaded is None:
                 return f"Error: File '{validated}' not found"
-            file_data, doc_id = loaded
+            document, doc_id = loaded
+            registry = load_registry(runtime.state)
+            file_data = create_file_data(render_full_document(document, registry))
             rendered = format_read_response(file_data, offset, limit)
             update: dict[str, Any] = {
                 "files": {validated: file_data},
@@ -68,6 +76,7 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
                         tool_call_id=runtime.tool_call_id,
                     )
                 ],
+                "citation_registry": registry,
             }
             if doc_id is not None:
                 update["doc_id_by_path"] = {validated: doc_id}
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
index 4667441ab..f15c918be 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
@@ -1,4 +1,4 @@
-"""Project ``workspace_tree_text`` + ``kb_priority`` from state into SystemMessages."""
+"""Project ``workspace_tree_text`` from state into a SystemMessage."""
 
 from __future__ import annotations
 
@@ -14,18 +14,15 @@ from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
 )
 from app.utils.perf import get_perf_logger
 
-from .knowledge_search import _render_priority_message
-
 _perf_log = get_perf_logger()
 
 
 class KbContextProjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
-    """Emit ``<workspace_tree>`` + ``<priority_documents>`` from shared state.
+    """Emit the ``<workspace_tree>`` from shared state.
 
     Read-only consumer: no DB, no LLM, no state writes. The orchestrator's
-    renderer middlewares populate the source fields; this projection lets any
-    agent (orchestrator or subagent) put the same content in front of its
-    own LLM call.
+    ``KnowledgeTreeMiddleware`` populates ``workspace_tree_text``; this
+    projection lets a subagent put the same tree in front of its own LLM call.
     """
 
     tools = ()
@@ -39,28 +36,19 @@ class KbContextProjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
         del runtime
         start = time.perf_counter()
         tree_text = state.get("workspace_tree_text")
-        priority = state.get("kb_priority")
-        if not tree_text and not priority:
+        if not tree_text:
             _perf_log.info(
-                "[kb_context_projection] tree=0 priority=0 elapsed=%.3fs",
+                "[kb_context_projection] tree=0 elapsed=%.3fs",
                 time.perf_counter() - start,
             )
             return None
 
         messages = list(state.get("messages") or [])
         insert_at = max(len(messages) - 1, 0)
-        tree_chars = 0
-        if tree_text:
-            tree_chars = len(tree_text)
-            messages.insert(insert_at, SystemMessage(content=tree_text))
-        priority_count = 0
-        if priority:
-            priority_count = len(priority) if hasattr(priority, "__len__") else 1
-            messages.insert(insert_at, _render_priority_message(priority))
+        messages.insert(insert_at, SystemMessage(content=tree_text))
         _perf_log.info(
-            "[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",
-            tree_chars,
-            priority_count,
+            "[kb_context_projection] tree_chars=%d elapsed=%.3fs",
+            len(tree_text),
             time.perf_counter() - start,
         )
         return {"messages": messages}
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
deleted file mode 100644
index 9ef601791..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
+++ /dev/null
@@ -1,1089 +0,0 @@
-"""Hybrid-search priority middleware for the SurfSense new chat agent.
-
-This middleware runs ``before_agent`` on every turn and writes:
-
-* ``state["kb_priority"]`` — the top-K most relevant documents for the
-  current user message, used to render a ``<priority_documents>`` system
-  message immediately before the user turn.
-* ``state["kb_matched_chunk_ids"]`` — internal hand-off mapping
-  (``Document.id`` → matched chunk IDs) consumed by
-  :class:`KBPostgresBackend._load_file_data` when the agent first reads each
-  document, so the XML wrapper can flag matched sections in
-  ``<chunk_index>``.
-
-The previous "scoped filesystem" behaviour (synthetic ``ls`` + state
-``files`` seeding) is intentionally removed: documents are now lazy-loaded
-from Postgres on demand, with the full workspace tree rendered separately
-by :class:`KnowledgeTreeMiddleware`.
-
-In anonymous mode the middleware skips hybrid search entirely and emits a
-single-entry priority list pointing at the Redis-loaded document
-(``state["kb_anon_doc"]``).
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import re
-import time
-from collections.abc import Sequence
-from datetime import UTC, datetime
-from typing import Any
-
-from langchain.agents import create_agent
-from langchain.agents.middleware import AgentMiddleware, AgentState
-from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
-from langchain_core.runnables import Runnable
-from langgraph.runtime import Runtime
-from litellm import token_counter
-from pydantic import BaseModel, Field, ValidationError
-from sqlalchemy import select
-
-from app.agents.chat.multi_agent_chat.shared.date_filters import (
-    parse_date_or_datetime,
-    resolve_date_range,
-)
-from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
-from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
-    SurfSenseFilesystemState,
-)
-from app.agents.chat.runtime.path_resolver import (
-    PathIndex,
-    build_path_index,
-    doc_to_virtual_path,
-)
-from app.db import (
-    NATIVE_TO_LEGACY_DOCTYPE,
-    Chunk,
-    Document,
-    Folder,
-    shielded_async_session,
-)
-from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
-from app.utils.document_converters import embed_texts
-from app.utils.perf import get_perf_logger
-
-logger = logging.getLogger(__name__)
-_perf_log = get_perf_logger()
-
-
-class KBSearchPlan(BaseModel):
-    """Structured internal plan for KB retrieval."""
-
-    optimized_query: str = Field(
-        min_length=1,
-        description="Optimized retrieval query preserving the user's intent.",
-    )
-    start_date: str | None = Field(
-        default=None,
-        description="Optional ISO start date or datetime for KB search filtering.",
-    )
-    end_date: str | None = Field(
-        default=None,
-        description="Optional ISO end date or datetime for KB search filtering.",
-    )
-    is_recency_query: bool = Field(
-        default=False,
-        description=(
-            "True when the user's intent is primarily about recency or temporal "
-            "ordering (e.g. 'latest', 'newest', 'most recent', 'last uploaded') "
-            "rather than topical relevance."
-        ),
-    )
-
-
-def _extract_text_from_message(message: BaseMessage) -> str:
-    content = getattr(message, "content", "")
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict) and item.get("type") == "text":
-                parts.append(str(item.get("text", "")))
-        return "\n".join(p for p in parts if p)
-    return str(content)
-
-
-def _render_recent_conversation(
-    messages: Sequence[BaseMessage],
-    *,
-    llm: BaseChatModel | None = None,
-    user_text: str = "",
-    max_messages: int = 6,
-) -> str:
-    """Render recent dialogue for internal planning under a token budget.
-
-    Filters to ``HumanMessage`` and ``AIMessage`` (without tool_calls) so that
-    injected ``SystemMessage`` artefacts (priority list, workspace tree,
-    file-write contract) don't pollute the planner prompt.
-    """
-    rendered: list[tuple[str, str]] = []
-    for message in messages:
-        role: str | None = None
-        if isinstance(message, HumanMessage):
-            role = "user"
-        elif isinstance(message, AIMessage):
-            if getattr(message, "tool_calls", None):
-                continue
-            role = "assistant"
-        else:
-            continue
-
-        text = _extract_text_from_message(message).strip()
-        if not text:
-            continue
-        text = re.sub(r"\s+", " ", text)
-        rendered.append((role, text))
-
-    if not rendered:
-        return ""
-
-    if rendered and rendered[-1][0] == "user" and rendered[-1][1] == user_text.strip():
-        rendered = rendered[:-1]
-
-    if not rendered:
-        return ""
-
-    def _legacy_render() -> str:
-        legacy_lines: list[str] = []
-        for role, text in rendered[-max_messages:]:
-            clipped = text[:400].rstrip() + "..." if len(text) > 400 else text
-            legacy_lines.append(f"{role}: {clipped}")
-        return "\n".join(legacy_lines)
-
-    def _count_prompt_tokens(conversation_text: str) -> int | None:
-        prompt = _build_kb_planner_prompt(
-            recent_conversation=conversation_text or "(none)",
-            user_text=user_text,
-        )
-        message_payload = [{"role": "user", "content": prompt}]
-
-        count_fn = getattr(llm, "_count_tokens", None) if llm is not None else None
-        if callable(count_fn):
-            try:
-                return count_fn(message_payload)
-            except Exception:
-                pass
-
-        profile = getattr(llm, "profile", None) if llm is not None else None
-        model_names: list[str] = []
-        if isinstance(profile, dict):
-            tcms = profile.get("token_count_models")
-            if isinstance(tcms, list):
-                model_names.extend(
-                    name for name in tcms if isinstance(name, str) and name
-                )
-            tcm = profile.get("token_count_model")
-            if isinstance(tcm, str) and tcm and tcm not in model_names:
-                model_names.append(tcm)
-        model_name = model_names[0] if model_names else getattr(llm, "model", None)
-        if not isinstance(model_name, str) or not model_name:
-            return None
-        try:
-            return token_counter(messages=message_payload, model=model_name)
-        except Exception:
-            return None
-
-    get_max_input_tokens = getattr(llm, "_get_max_input_tokens", None) if llm else None
-    if callable(get_max_input_tokens):
-        try:
-            max_input_tokens = int(get_max_input_tokens())
-        except Exception:
-            max_input_tokens = None
-    else:
-        profile = getattr(llm, "profile", None) if llm is not None else None
-        max_input_tokens = (
-            profile.get("max_input_tokens")
-            if isinstance(profile, dict)
-            and isinstance(profile.get("max_input_tokens"), int)
-            else None
-        )
-
-    if not isinstance(max_input_tokens, int) or max_input_tokens <= 0:
-        return _legacy_render()
-
-    output_reserve = min(max(int(max_input_tokens * 0.02), 256), 1024)
-    budget = max_input_tokens - output_reserve
-    if budget <= 0:
-        return _legacy_render()
-
-    selected_lines: list[str] = []
-    for role, text in reversed(rendered):
-        candidate_line = f"{role}: {text}"
-        candidate_lines = [candidate_line, *selected_lines]
-        candidate_conversation = "\n".join(candidate_lines)
-        token_count = _count_prompt_tokens(candidate_conversation)
-        if token_count is None:
-            return _legacy_render()
-        if token_count <= budget:
-            selected_lines = candidate_lines
-            continue
-
-        lo, hi = 1, len(text)
-        best_line: str | None = None
-        while lo <= hi:
-            mid = (lo + hi) // 2
-            clipped_text = text[:mid].rstrip() + "..."
-            clipped_line = f"{role}: {clipped_text}"
-            clipped_conversation = "\n".join([clipped_line, *selected_lines])
-            clipped_tokens = _count_prompt_tokens(clipped_conversation)
-            if clipped_tokens is None:
-                break
-            if clipped_tokens <= budget:
-                best_line = clipped_line
-                lo = mid + 1
-            else:
-                hi = mid - 1
-
-        if best_line is not None:
-            selected_lines = [best_line, *selected_lines]
-        break
-
-    if not selected_lines:
-        return _legacy_render()
-
-    return "\n".join(selected_lines)
-
-
-def _build_kb_planner_prompt(
-    *,
-    recent_conversation: str,
-    user_text: str,
-) -> str:
-    today = datetime.now(UTC).date().isoformat()
-    return (
-        "You optimize internal knowledge-base search inputs for document retrieval.\n"
-        "Return JSON only with this exact shape:\n"
-        '{"optimized_query":"string","start_date":"ISO string or null","end_date":"ISO string or null","is_recency_query":bool}\n\n'
-        "Rules:\n"
-        "- Preserve the user's intent.\n"
-        "- Rewrite the query to improve retrieval using concrete entities, acronyms, projects, tools, people, and document-specific terms when helpful.\n"
-        "- Keep the query concise and retrieval-focused.\n"
-        "- Only use date filters when the latest user request or recent dialogue clearly implies a time range.\n"
-        "- If you use date filters, prefer returning both bounds.\n"
-        "- If no date filter is useful, return null for both dates.\n"
-        '- Set "is_recency_query" to true ONLY when the user\'s primary intent is about '
-        "recency or temporal ordering rather than topical relevance. Examples: "
-        '"latest file", "newest upload", "most recent document", "what did I save last", '
-        '"show me files from today", "last thing I added". '
-        "When true, results will be sorted by date instead of relevance.\n"
-        "- Do not include markdown, prose, or explanations.\n\n"
-        f"Today's UTC date: {today}\n\n"
-        f"Recent conversation:\n{recent_conversation or '(none)'}\n\n"
-        f"Latest user message:\n{user_text}"
-    )
-
-
-def _extract_json_payload(text: str) -> str:
-    stripped = text.strip()
-    fenced = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", stripped, re.DOTALL)
-    if fenced:
-        return fenced.group(1)
-    start = stripped.find("{")
-    end = stripped.rfind("}")
-    if start != -1 and end != -1 and end > start:
-        return stripped[start : end + 1]
-    return stripped
-
-
-def _parse_kb_search_plan_response(response_text: str) -> KBSearchPlan:
-    payload = json.loads(_extract_json_payload(response_text))
-    return KBSearchPlan.model_validate(payload)
-
-
-def _normalize_optional_date_range(
-    start_date: str | None,
-    end_date: str | None,
-) -> tuple[datetime | None, datetime | None]:
-    parsed_start = parse_date_or_datetime(start_date) if start_date else None
-    parsed_end = parse_date_or_datetime(end_date) if end_date else None
-
-    if parsed_start is None and parsed_end is None:
-        return None, None
-
-    return resolve_date_range(parsed_start, parsed_end)
-
-
-def _resolve_search_types(
-    available_connectors: list[str] | None,
-    available_document_types: list[str] | None,
-) -> list[str] | None:
-    types: set[str] = set()
-    if available_document_types:
-        types.update(available_document_types)
-    if available_connectors:
-        types.update(available_connectors)
-    if not types:
-        return None
-
-    expanded: set[str] = set(types)
-    for t in types:
-        legacy = NATIVE_TO_LEGACY_DOCTYPE.get(t)
-        if legacy:
-            expanded.add(legacy)
-    return list(expanded) if expanded else None
-
-
-_RECENCY_MAX_CHUNKS_PER_DOC = 5
-
-
-async def browse_recent_documents(
-    *,
-    search_space_id: int,
-    document_type: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-) -> list[dict[str, Any]]:
-    """Return documents ordered by recency (newest first), no relevance ranking."""
-    from sqlalchemy import func
-
-    from app.db import DocumentType
-
-    _t0 = time.perf_counter()
-    async with shielded_async_session() as session:
-        base_conditions = [
-            Document.search_space_id == search_space_id,
-            func.coalesce(Document.status["state"].astext, "ready") != "deleting",
-        ]
-
-        if document_type is not None:
-            import contextlib
-
-            doc_type_enums = []
-            for dt in document_type:
-                if isinstance(dt, str):
-                    with contextlib.suppress(KeyError):
-                        doc_type_enums.append(DocumentType[dt])
-                else:
-                    doc_type_enums.append(dt)
-            if doc_type_enums:
-                if len(doc_type_enums) == 1:
-                    base_conditions.append(Document.document_type == doc_type_enums[0])
-                else:
-                    base_conditions.append(Document.document_type.in_(doc_type_enums))
-
-        if start_date is not None:
-            base_conditions.append(Document.updated_at >= start_date)
-        if end_date is not None:
-            base_conditions.append(Document.updated_at <= end_date)
-
-        doc_query = (
-            select(Document)
-            .where(*base_conditions)
-            .order_by(Document.updated_at.desc())
-            .limit(top_k)
-        )
-        result = await session.execute(doc_query)
-        documents = result.scalars().unique().all()
-
-        if not documents:
-            return []
-
-        doc_ids = [d.id for d in documents]
-        numbered = (
-            select(
-                Chunk.id.label("chunk_id"),
-                Chunk.document_id,
-                Chunk.content,
-                func.row_number()
-                .over(
-                    partition_by=Chunk.document_id,
-                    order_by=(Chunk.position, Chunk.id),
-                )
-                .label("rn"),
-            )
-            .where(Chunk.document_id.in_(doc_ids))
-            .subquery("numbered")
-        )
-
-        chunk_query = (
-            select(numbered.c.chunk_id, numbered.c.document_id, numbered.c.content)
-            .where(numbered.c.rn <= _RECENCY_MAX_CHUNKS_PER_DOC)
-            .order_by(numbered.c.document_id, numbered.c.rn)
-        )
-        chunk_result = await session.execute(chunk_query)
-        fetched_chunks = chunk_result.all()
-
-    doc_chunks: dict[int, list[dict[str, Any]]] = {d.id: [] for d in documents}
-    for row in fetched_chunks:
-        if row.document_id in doc_chunks:
-            doc_chunks[row.document_id].append(
-                {"chunk_id": row.chunk_id, "content": row.content}
-            )
-
-    results: list[dict[str, Any]] = []
-    for doc in documents:
-        chunks_list = doc_chunks.get(doc.id, [])
-        metadata = doc.document_metadata or {}
-        results.append(
-            {
-                "document_id": doc.id,
-                "content": "\n\n".join(
-                    c["content"] for c in chunks_list if c.get("content")
-                ),
-                "score": 0.0,
-                "chunks": chunks_list,
-                "matched_chunk_ids": [],
-                "document": {
-                    "id": doc.id,
-                    "title": doc.title,
-                    "document_type": (
-                        doc.document_type.value
-                        if getattr(doc, "document_type", None)
-                        else None
-                    ),
-                    "metadata": metadata,
-                    "folder_id": getattr(doc, "folder_id", None),
-                },
-                "source": (
-                    doc.document_type.value
-                    if getattr(doc, "document_type", None)
-                    else None
-                ),
-            }
-        )
-    _perf_log.info(
-        "[kb_priority.recent] db=%.3fs docs=%d space=%d",
-        time.perf_counter() - _t0,
-        len(results),
-        search_space_id,
-    )
-    return results
-
-
-async def search_knowledge_base(
-    *,
-    query: str,
-    search_space_id: int,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-) -> list[dict[str, Any]]:
-    """Run a single unified hybrid search against the knowledge base."""
-    if not query:
-        return []
-
-    # ``embed_texts`` serializes behind a global embedding lock and, for API
-    # models, makes a network round-trip — so this can stall while another
-    # turn is embedding. Timed separately from the DB search to tell the two
-    # apart when debugging slow time-to-first-token.
-    _t_embed = time.perf_counter()
-    [embedding] = await asyncio.to_thread(embed_texts, [query])
-    _embed_elapsed = time.perf_counter() - _t_embed
-
-    doc_types = _resolve_search_types(available_connectors, available_document_types)
-    retriever_top_k = min(top_k * 3, 30)
-
-    _t_search = time.perf_counter()
-    async with shielded_async_session() as session:
-        retriever = ChucksHybridSearchRetriever(session)
-        results = await retriever.hybrid_search(
-            query_text=query,
-            top_k=retriever_top_k,
-            search_space_id=search_space_id,
-            document_type=doc_types,
-            start_date=start_date,
-            end_date=end_date,
-            query_embedding=embedding.tolist(),
-        )
-    _search_elapsed = time.perf_counter() - _t_search
-
-    _perf_log.info(
-        "[kb_priority.search] embed=%.3fs hybrid_search=%.3fs results=%d space=%d query=%r",
-        _embed_elapsed,
-        _search_elapsed,
-        len(results),
-        search_space_id,
-        query[:80],
-    )
-    return results[:top_k]
-
-
-async def fetch_mentioned_documents(
-    *,
-    document_ids: list[int],
-    search_space_id: int,
-) -> list[dict[str, Any]]:
-    """Fetch explicitly mentioned documents."""
-    if not document_ids:
-        return []
-
-    _t0 = time.perf_counter()
-    async with shielded_async_session() as session:
-        doc_result = await session.execute(
-            select(Document).where(
-                Document.id.in_(document_ids),
-                Document.search_space_id == search_space_id,
-            )
-        )
-        docs = {doc.id: doc for doc in doc_result.scalars().all()}
-
-        if not docs:
-            return []
-
-        chunk_result = await session.execute(
-            select(Chunk.id, Chunk.content, Chunk.document_id)
-            .where(Chunk.document_id.in_(list(docs.keys())))
-            .order_by(Chunk.document_id, Chunk.position, Chunk.id)
-        )
-        chunks_by_doc: dict[int, list[dict[str, Any]]] = {doc_id: [] for doc_id in docs}
-        for row in chunk_result.all():
-            if row.document_id in chunks_by_doc:
-                chunks_by_doc[row.document_id].append(
-                    {"chunk_id": row.id, "content": row.content}
-                )
-
-    results: list[dict[str, Any]] = []
-    for doc_id in document_ids:
-        doc = docs.get(doc_id)
-        if doc is None:
-            continue
-        metadata = doc.document_metadata or {}
-        results.append(
-            {
-                "document_id": doc.id,
-                "content": "",
-                "score": 1.0,
-                "chunks": chunks_by_doc.get(doc.id, []),
-                "matched_chunk_ids": [],
-                "document": {
-                    "id": doc.id,
-                    "title": doc.title,
-                    "document_type": (
-                        doc.document_type.value
-                        if getattr(doc, "document_type", None)
-                        else None
-                    ),
-                    "metadata": metadata,
-                    "folder_id": getattr(doc, "folder_id", None),
-                },
-                "source": (
-                    doc.document_type.value
-                    if getattr(doc, "document_type", None)
-                    else None
-                ),
-                "_user_mentioned": True,
-            }
-        )
-    _perf_log.info(
-        "[kb_priority.mentioned] db=%.3fs requested=%d resolved=%d",
-        time.perf_counter() - _t0,
-        len(document_ids),
-        len(results),
-    )
-    return results
-
-
-def _render_priority_message(priority: list[dict[str, Any]]) -> SystemMessage:
-    """Render the priority list as a single ``<priority_documents>`` system message."""
-    if not priority:
-        body = "(no priority documents for this turn)"
-    else:
-        lines: list[str] = []
-        for entry in priority:
-            score = entry.get("score")
-            mentioned = entry.get("mentioned")
-            score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
-            mark = " [USER-MENTIONED]" if mentioned else ""
-            lines.append(f"- {entry.get('path', '')} (score={score_str}){mark}")
-        body = "\n".join(lines)
-    return SystemMessage(
-        content=(
-            "<priority_documents>\n"
-            "These documents are most relevant to the latest user message; "
-            "read them first. Matched sections are flagged inside each "
-            "document's <chunk_index>.\n"
-            f"{body}\n"
-            "</priority_documents>"
-        )
-    )
-
-
-class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
-    """Compute hybrid-search priority hints for the current turn."""
-
-    tools = ()
-    state_schema = SurfSenseFilesystemState
-
-    def __init__(
-        self,
-        *,
-        llm: BaseChatModel | None = None,
-        planner_llm: BaseChatModel | None = None,
-        search_space_id: int,
-        filesystem_mode: FilesystemMode = FilesystemMode.CLOUD,
-        available_connectors: list[str] | None = None,
-        available_document_types: list[str] | None = None,
-        top_k: int = 10,
-        mentioned_document_ids: list[int] | None = None,
-        inject_system_message: bool = True,  # For backwards compatibility
-        mentions_only: bool = False,
-    ) -> None:
-        self.llm = llm
-        # Cheap model for structured internal tasks (query rewrite, date
-        # extraction, recency classification) when one is configured; falls back
-        # to the chat LLM otherwise.
-        self.planner_llm = planner_llm or llm
-        self.search_space_id = search_space_id
-        self.filesystem_mode = filesystem_mode
-        self.available_connectors = available_connectors
-        self.available_document_types = available_document_types
-        self.top_k = top_k
-        self.mentioned_document_ids = mentioned_document_ids or []
-        self.inject_system_message = inject_system_message
-        # Lazy mode: skip the planner LLM + embedding + hybrid search and only
-        # surface explicit @-mentions. The agent retrieves topical KB content on
-        # demand via the ``search_knowledge_base`` tool instead.
-        self.mentions_only = mentions_only
-        # Compiled lazily and memoized to avoid the per-turn create_agent cost.
-        self._planner: Runnable | None = None
-        self._planner_compile_failed = False
-
-    def _build_kb_planner_runnable(self) -> Runnable | None:
-        """Lazily compile and memoize the kb-planner Runnable.
-
-        Returns ``None`` (and the caller falls back to ``planner_llm.ainvoke``)
-        when the flag is off, the LLM is missing, or ``create_agent`` raises.
-        Built without tools but with RetryAfterMiddleware so a transient
-        rate-limit on the planner call doesn't fail the whole turn.
-        """
-        if self._planner is not None or self._planner_compile_failed:
-            return self._planner
-        if self.planner_llm is None:
-            return None
-        flags = get_flags()
-        if not flags.enable_kb_planner_runnable or flags.disable_new_agent_stack:
-            return None
-
-        from app.agents.chat.shared.middleware.retry_after import RetryAfterMiddleware
-
-        try:
-            self._planner = create_agent(
-                self.planner_llm,
-                tools=[],
-                middleware=[RetryAfterMiddleware(max_retries=2)],
-            )
-        except Exception as exc:  # pragma: no cover - defensive
-            logger.warning(
-                "kb-planner Runnable compile failed; falling back to planner_llm.ainvoke: %s",
-                exc,
-            )
-            self._planner_compile_failed = True
-            self._planner = None
-        return self._planner
-
-    async def _plan_search_inputs(
-        self,
-        *,
-        messages: Sequence[BaseMessage],
-        user_text: str,
-    ) -> tuple[str, datetime | None, datetime | None, bool]:
-        if self.planner_llm is None:
-            return user_text, None, None, False
-
-        recent_conversation = _render_recent_conversation(
-            messages,
-            llm=self.planner_llm,
-            user_text=user_text,
-        )
-        prompt = _build_kb_planner_prompt(
-            recent_conversation=recent_conversation,
-            user_text=user_text,
-        )
-        loop = asyncio.get_running_loop()
-        t0 = loop.time()
-
-        # Both paths tag surfsense:internal so the planner's intermediate
-        # events stay suppressed from the UI.
-        planner = self._build_kb_planner_runnable()
-        try:
-            if planner is not None:
-                planner_state = await planner.ainvoke(
-                    {"messages": [HumanMessage(content=prompt)]},
-                    config={"tags": ["surfsense:internal"]},
-                )
-                response_messages = (
-                    planner_state.get("messages", [])
-                    if isinstance(planner_state, dict)
-                    else []
-                )
-                response = (
-                    response_messages[-1]
-                    if response_messages
-                    else AIMessage(content="")
-                )
-            else:
-                response = await self.planner_llm.ainvoke(
-                    [HumanMessage(content=prompt)],
-                    config={"tags": ["surfsense:internal"]},
-                )
-            plan = _parse_kb_search_plan_response(_extract_text_from_message(response))
-            optimized_query = (
-                re.sub(r"\s+", " ", plan.optimized_query).strip() or user_text
-            )
-            start_date, end_date = _normalize_optional_date_range(
-                plan.start_date,
-                plan.end_date,
-            )
-            is_recency = plan.is_recency_query
-            _perf_log.info(
-                "[kb_priority] planner in %.3fs query=%r optimized=%r "
-                "start=%s end=%s recency=%s",
-                loop.time() - t0,
-                user_text[:80],
-                optimized_query[:120],
-                start_date.isoformat() if start_date else None,
-                end_date.isoformat() if end_date else None,
-                is_recency,
-            )
-            return optimized_query, start_date, end_date, is_recency
-        except (json.JSONDecodeError, ValidationError, ValueError) as exc:
-            logger.warning(
-                "KB planner returned invalid output, using raw query: %s", exc
-            )
-        except Exception as exc:  # pragma: no cover - defensive fallback
-            logger.warning("KB planner failed, using raw query: %s", exc)
-
-        return user_text, None, None, False
-
-    def before_agent(  # type: ignore[override]
-        self,
-        state: AgentState,
-        runtime: Runtime[Any],
-    ) -> dict[str, Any] | None:
-        try:
-            loop = asyncio.get_running_loop()
-            if loop.is_running():
-                return None
-        except RuntimeError:
-            pass
-        return asyncio.run(self.abefore_agent(state, runtime))
-
-    async def abefore_agent(  # type: ignore[override]
-        self,
-        state: AgentState,
-        runtime: Runtime[Any],
-    ) -> dict[str, Any] | None:
-        if self.filesystem_mode != FilesystemMode.CLOUD:
-            return None
-
-        messages = state.get("messages") or []
-        if not messages:
-            return None
-
-        last_human: HumanMessage | None = None
-        for msg in reversed(messages):
-            if isinstance(msg, HumanMessage):
-                last_human = msg
-                break
-        if last_human is None:
-            return None
-        user_text = _extract_text_from_message(last_human).strip()
-        if not user_text:
-            return None
-
-        anon_doc = state.get("kb_anon_doc")
-        if anon_doc:
-            return self._anon_priority(state, anon_doc)
-
-        return await self._authenticated_priority(state, messages, user_text, runtime)
-
-    def _anon_priority(
-        self,
-        state: AgentState,
-        anon_doc: dict[str, Any],
-    ) -> dict[str, Any]:
-        path = str(anon_doc.get("path") or "")
-        title = str(anon_doc.get("title") or "uploaded_document")
-        priority = [
-            {
-                "path": path,
-                "score": 1.0,
-                "document_id": None,
-                "title": title,
-                "mentioned": True,
-            }
-        ]
-        update: dict[str, Any] = {
-            "kb_priority": priority,
-            "kb_matched_chunk_ids": {},
-        }
-        if self.inject_system_message:
-            new_messages = list(state.get("messages") or [])
-            insert_at = max(len(new_messages) - 1, 0)
-            new_messages.insert(insert_at, _render_priority_message(priority))
-            update["messages"] = new_messages
-        return update
-
-    async def _authenticated_priority(
-        self,
-        state: AgentState,
-        messages: Sequence[BaseMessage],
-        user_text: str,
-        runtime: Runtime[Any] | None = None,
-    ) -> dict[str, Any]:
-        t0 = asyncio.get_event_loop().time()
-
-        # Prefer per-turn mentions from runtime.context (lets a cached graph
-        # serve different turns); fall back to the constructor closure, draining
-        # it after one read so stale mentions can't replay.
-        #
-        # CRITICAL: test ``ctx_mentions is not None``, not truthiness — an empty
-        # list means "this turn has no mentions", not "use the closure".
-        mention_ids: list[int] = []
-        ctx = getattr(runtime, "context", None) if runtime is not None else None
-        ctx_mentions = getattr(ctx, "mentioned_document_ids", None) if ctx else None
-        if ctx_mentions is not None:
-            mention_ids = list(ctx_mentions)
-            if self.mentioned_document_ids:
-                self.mentioned_document_ids = []
-        elif self.mentioned_document_ids:
-            mention_ids = list(self.mentioned_document_ids)
-            self.mentioned_document_ids = []
-
-        # Folder mentions aren't embedded, so they skip hybrid search and are
-        # surfaced only as [USER-MENTIONED] entries. Cloud mode only.
-        folder_mention_ids: list[int] = []
-        if (
-            ctx is not None
-            and getattr(self, "filesystem_mode", FilesystemMode.CLOUD)
-            == FilesystemMode.CLOUD
-        ):
-            ctx_folders = getattr(ctx, "mentioned_folder_ids", None)
-            if ctx_folders:
-                folder_mention_ids = list(ctx_folders)
-
-        # Lazy mode: skip the planner LLM + embedding + hybrid search entirely.
-        # With no explicit mentions there is nothing cheap to surface, so we bail
-        # out early and let the agent decide to call ``search_knowledge_base``.
-        if self.mentions_only:
-            if not mention_ids and not folder_mention_ids:
-                return None
-            planned_query = user_text
-            start_date = end_date = None
-            is_recency = False
-            search_results: list[dict[str, Any]] = []
-            _search_phase_elapsed = 0.0
-        else:
-            (
-                planned_query,
-                start_date,
-                end_date,
-                is_recency,
-            ) = await self._plan_search_inputs(
-                messages=messages,
-                user_text=user_text,
-            )
-
-            _t_search_phase = time.perf_counter()
-            if is_recency:
-                doc_types = _resolve_search_types(
-                    self.available_connectors, self.available_document_types
-                )
-                search_results = await browse_recent_documents(
-                    search_space_id=self.search_space_id,
-                    document_type=doc_types,
-                    top_k=self.top_k,
-                    start_date=start_date,
-                    end_date=end_date,
-                )
-            else:
-                search_results = await search_knowledge_base(
-                    query=planned_query,
-                    search_space_id=self.search_space_id,
-                    available_connectors=self.available_connectors,
-                    available_document_types=self.available_document_types,
-                    top_k=self.top_k,
-                    start_date=start_date,
-                    end_date=end_date,
-                )
-            _search_phase_elapsed = time.perf_counter() - _t_search_phase
-
-        mentioned_results: list[dict[str, Any]] = []
-        if mention_ids:
-            mentioned_results = await fetch_mentioned_documents(
-                document_ids=mention_ids,
-                search_space_id=self.search_space_id,
-            )
-
-        seen_doc_ids: set[int] = set()
-        merged: list[dict[str, Any]] = []
-        for doc in mentioned_results:
-            doc_id = (doc.get("document") or {}).get("id")
-            if isinstance(doc_id, int):
-                seen_doc_ids.add(doc_id)
-            merged.append(doc)
-        for doc in search_results:
-            doc_id = (doc.get("document") or {}).get("id")
-            if isinstance(doc_id, int) and doc_id in seen_doc_ids:
-                continue
-            merged.append(doc)
-
-        _t_materialize = time.perf_counter()
-        priority, matched_chunk_ids = await self._materialize_priority(merged)
-
-        if folder_mention_ids:
-            folder_entries = await self._materialize_folder_priority(folder_mention_ids)
-            priority = folder_entries + priority
-        _materialize_elapsed = time.perf_counter() - _t_materialize
-
-        # ``recency=...`` reflects which retrieval path ran (recency browse vs
-        # hybrid search). The planner phase is logged separately by
-        # ``_plan_search_inputs``; here ``search_phase`` and ``materialize``
-        # break down the remaining DB-bound work so a slow turn can be
-        # attributed to planner / search / materialize at a glance.
-        _perf_log.info(
-            "[kb_priority] completed in %.3fs (search_phase=%.3fs materialize=%.3fs "
-            "recency=%s) query=%r priority=%d mentioned=%d folders=%d",
-            asyncio.get_event_loop().time() - t0,
-            _search_phase_elapsed,
-            _materialize_elapsed,
-            is_recency,
-            user_text[:80],
-            len(priority),
-            len(mentioned_results),
-            len(folder_mention_ids),
-        )
-
-        update: dict[str, Any] = {
-            "kb_priority": priority,
-            "kb_matched_chunk_ids": matched_chunk_ids,
-        }
-        if self.inject_system_message:
-            new_messages = list(messages)
-            insert_at = max(len(new_messages) - 1, 0)
-            new_messages.insert(insert_at, _render_priority_message(priority))
-            update["messages"] = new_messages
-        return update
-
-    async def _materialize_folder_priority(
-        self, folder_ids: list[int]
-    ) -> list[dict[str, Any]]:
-        """Resolve mentioned folder ids to canonical-path priority entries.
-
-        Flagged ``mentioned=True`` with ``score=None`` (folders aren't ranked;
-        the agent decides which children to read).
-        """
-        if not folder_ids:
-            return []
-        async with shielded_async_session() as session:
-            index: PathIndex = await build_path_index(session, self.search_space_id)
-            folder_rows = await session.execute(
-                select(Folder.id, Folder.name).where(
-                    Folder.search_space_id == self.search_space_id,
-                    Folder.id.in_(folder_ids),
-                )
-            )
-            folder_titles: dict[int, str] = {
-                row.id: row.name for row in folder_rows.all()
-            }
-
-        entries: list[dict[str, Any]] = []
-        seen: set[int] = set()
-        for folder_id in folder_ids:
-            if folder_id in seen:
-                continue
-            seen.add(folder_id)
-            base = index.folder_paths.get(folder_id)
-            if base is None:
-                logger.debug(
-                    "kb_priority: dropping folder id=%s (missing from path index)",
-                    folder_id,
-                )
-                continue
-            path = base if base.endswith("/") else f"{base}/"
-            entries.append(
-                {
-                    "path": path,
-                    "score": None,
-                    "document_id": None,
-                    "folder_id": folder_id,
-                    "title": folder_titles.get(folder_id, ""),
-                    "mentioned": True,
-                }
-            )
-        return entries
-
-    async def _materialize_priority(
-        self, merged: list[dict[str, Any]]
-    ) -> tuple[list[dict[str, Any]], dict[int, list[int]]]:
-        """Resolve canonical paths and matched chunk ids for the priority list."""
-        priority: list[dict[str, Any]] = []
-        matched_chunk_ids: dict[int, list[int]] = {}
-
-        if not merged:
-            return priority, matched_chunk_ids
-
-        _t0 = time.perf_counter()
-        async with shielded_async_session() as session:
-            index: PathIndex = await build_path_index(session, self.search_space_id)
-            doc_ids = [
-                (doc.get("document") or {}).get("id")
-                for doc in merged
-                if isinstance(doc, dict)
-            ]
-            doc_ids = [doc_id for doc_id in doc_ids if isinstance(doc_id, int)]
-            folder_by_doc_id: dict[int, int | None] = {}
-            if doc_ids:
-                folder_rows = await session.execute(
-                    select(Document.id, Document.folder_id).where(
-                        Document.search_space_id == self.search_space_id,
-                        Document.id.in_(doc_ids),
-                    )
-                )
-                folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
-
-        for doc in merged:
-            doc_meta = doc.get("document") or {}
-            doc_id = doc_meta.get("id")
-            title = doc_meta.get("title") or "untitled"
-            folder_id = (
-                folder_by_doc_id.get(doc_id)
-                if isinstance(doc_id, int)
-                else doc_meta.get("folder_id")
-            )
-            path = doc_to_virtual_path(
-                doc_id=doc_id if isinstance(doc_id, int) else None,
-                title=str(title),
-                folder_id=folder_id if isinstance(folder_id, int) else None,
-                index=index,
-            )
-            priority.append(
-                {
-                    "path": path,
-                    "score": float(doc.get("score") or 0.0),
-                    "document_id": doc_id if isinstance(doc_id, int) else None,
-                    "title": str(title),
-                    "mentioned": bool(doc.get("_user_mentioned")),
-                }
-            )
-            if isinstance(doc_id, int):
-                chunk_ids = doc.get("matched_chunk_ids") or []
-                if chunk_ids:
-                    matched_chunk_ids[doc_id] = [
-                        int(cid) for cid in chunk_ids if isinstance(cid, int | str)
-                    ]
-        _perf_log.info(
-            "[kb_priority.materialize] db=%.3fs docs=%d",
-            time.perf_counter() - _t0,
-            len(merged),
-        )
-        return priority, matched_chunk_ids
-
-
-__all__ = [
-    "KnowledgePriorityMiddleware",
-    "browse_recent_documents",
-    "fetch_mentioned_documents",
-    "search_knowledge_base",
-]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/__init__.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/__init__.py
new file mode 100644
index 000000000..7d68d2238
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/__init__.py
@@ -0,0 +1,18 @@
+"""Knowledge-base retrieval: hybrid search rendered as citable evidence.
+
+Public surface is the service (``search_knowledge_base_context``) and its input
+value object (``SearchScope``); the rest are building blocks.
+"""
+
+from __future__ import annotations
+
+from .models import ChunkHit, DocumentHit, SearchScope
+from .service import build_context, search_knowledge_base_context
+
+__all__ = [
+    "ChunkHit",
+    "DocumentHit",
+    "SearchScope",
+    "build_context",
+    "search_knowledge_base_context",
+]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/adapter.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/adapter.py
new file mode 100644
index 000000000..cf4263451
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/adapter.py
@@ -0,0 +1,29 @@
+"""Turn retriever ``DocumentHit``s into renderable documents."""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    source_label,
+)
+
+from .models import DocumentHit
+
+
+def to_renderable_document(hit: DocumentHit) -> RenderableDocument:
+    """Map one hit to the shape the document-fragment renderer consumes."""
+    return RenderableDocument(
+        title=hit.title,
+        source=source_label(hit.document_type, hit.metadata),
+        passages=[
+            RenderablePassage(
+                content=chunk.content,
+                locator={"document_id": hit.document_id, "chunk_id": chunk.chunk_id},
+            )
+            for chunk in hit.chunks
+        ],
+    )
+
+
+__all__ = ["to_renderable_document"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/hybrid_search.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/hybrid_search.py
new file mode 100644
index 000000000..cc200b3a6
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/hybrid_search.py
@@ -0,0 +1,250 @@
+"""Hybrid (semantic + keyword) chunk search with reciprocal-rank fusion.
+
+Only matched chunks are citable, so the fused result already holds every passage
+shown — there is no second per-document fetch. Returns the top ``top_k``
+documents, each carrying its matched chunks in reading order.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import time
+
+from sqlalchemy import func, select, text
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import joinedload
+
+from app.config import config
+from app.db import Chunk, Document, DocumentType
+from app.observability import metrics, otel
+from app.utils.perf import get_perf_logger
+
+from .models import ChunkHit, DocumentHit, SearchScope
+
+_RRF_K = 60
+_CANDIDATE_MULTIPLIER = 5  # fused-chunk pool size relative to top_k
+_MAX_PASSAGES_PER_DOC = 12
+_SURFACE = "chunks"
+
+
+async def search_chunks(
+    db_session: AsyncSession,
+    *,
+    search_space_id: int,
+    query: str,
+    scope: SearchScope,
+    top_k: int,
+    query_embedding: list[float] | None = None,
+) -> list[DocumentHit]:
+    """Top ``top_k`` documents for ``query`` within scope, each with its chunks.
+
+    Instrumented seam: traces the search, records its duration, and logs a
+    timing line. The fusion logic lives in :func:`_search`.
+    """
+    started = time.perf_counter()
+    with otel.kb_search_span(
+        search_space_id=search_space_id,
+        query_chars=len(query),
+        extra={"search.surface": _SURFACE, "search.mode": "hybrid"},
+    ) as span:
+        try:
+            documents = await _search(
+                db_session,
+                search_space_id=search_space_id,
+                query=query,
+                scope=scope,
+                top_k=top_k,
+                query_embedding=query_embedding,
+            )
+        finally:
+            elapsed_ms = (time.perf_counter() - started) * 1000
+            metrics.record_kb_search_duration(
+                elapsed_ms, search_space_id=search_space_id, surface=_SURFACE
+            )
+        span.set_attribute("result.count", len(documents))
+        get_perf_logger().info(
+            "[chunk_search] hybrid in %.3fs docs=%d space=%d",
+            elapsed_ms / 1000,
+            len(documents),
+            search_space_id,
+        )
+        return documents
+
+
+async def _search(
+    db_session: AsyncSession,
+    *,
+    search_space_id: int,
+    query: str,
+    scope: SearchScope,
+    top_k: int,
+    query_embedding: list[float] | None,
+) -> list[DocumentHit]:
+    """Fusion search itself: resolve scope, fuse the two legs, group by document."""
+    document_types = _resolve_document_types(scope.document_types)
+    if document_types == []:  # types requested, none recognized → nothing matches
+        return []
+
+    if query_embedding is None:
+        query_embedding = await asyncio.to_thread(
+            config.embedding_model_instance.embed, query
+        )
+
+    conditions = _base_conditions(search_space_id, scope, document_types)
+    rows = await _fused_chunks(
+        db_session,
+        query=query,
+        query_embedding=query_embedding,
+        conditions=conditions,
+        candidate_pool=top_k * _CANDIDATE_MULTIPLIER,
+    )
+    return _group_into_documents(rows, top_k=top_k)
+
+
+def _resolve_document_types(
+    raw: tuple[str, ...] | None,
+) -> list[DocumentType] | None:
+    """Map type names to enum members; ``None`` when unfiltered, ``[]`` if all unknown."""
+    if not raw:
+        return None
+    resolved: list[DocumentType] = []
+    for name in raw:
+        with contextlib.suppress(KeyError):
+            resolved.append(DocumentType[name])
+    return resolved
+
+
+def _base_conditions(
+    search_space_id: int,
+    scope: SearchScope,
+    document_types: list[DocumentType] | None,
+) -> list:
+    """Filters shared by both search legs."""
+    conditions = [
+        Document.search_space_id == search_space_id,
+        func.coalesce(Document.status["state"].astext, "ready") != "deleting",
+    ]
+    if document_types:
+        conditions.append(Document.document_type.in_(document_types))
+    if scope.document_ids:
+        conditions.append(Document.id.in_(scope.document_ids))
+    if scope.start_date is not None:
+        conditions.append(Document.updated_at >= scope.start_date)
+    if scope.end_date is not None:
+        conditions.append(Document.updated_at <= scope.end_date)
+    return conditions
+
+
+async def _fused_chunks(
+    db_session: AsyncSession,
+    *,
+    query: str,
+    query_embedding: list[float],
+    conditions: list,
+    candidate_pool: int,
+):
+    """Run semantic + keyword legs and fuse them with RRF; return (Chunk, score) rows."""
+    tsvector = func.to_tsvector("english", Chunk.content)
+    tsquery = func.plainto_tsquery("english", query)
+
+    semantic = (
+        select(
+            Chunk.id,
+            func.rank()
+            .over(order_by=Chunk.embedding.op("<=>")(query_embedding))
+            .label("rank"),
+        )
+        .join(Document, Chunk.document_id == Document.id)
+        .where(*conditions)
+        .order_by(Chunk.embedding.op("<=>")(query_embedding))
+        .limit(candidate_pool)
+        .cte("semantic_search")
+    )
+
+    keyword = (
+        select(
+            Chunk.id,
+            func.rank()
+            .over(order_by=func.ts_rank_cd(tsvector, tsquery).desc())
+            .label("rank"),
+        )
+        .join(Document, Chunk.document_id == Document.id)
+        .where(*conditions)
+        .where(tsvector.op("@@")(tsquery))
+        .order_by(func.ts_rank_cd(tsvector, tsquery).desc())
+        .limit(candidate_pool)
+        .cte("keyword_search")
+    )
+
+    fused = (
+        select(
+            Chunk,
+            (
+                func.coalesce(1.0 / (_RRF_K + semantic.c.rank), 0.0)
+                + func.coalesce(1.0 / (_RRF_K + keyword.c.rank), 0.0)
+            ).label("score"),
+        )
+        .select_from(
+            semantic.outerjoin(keyword, semantic.c.id == keyword.c.id, full=True)
+        )
+        .join(Chunk, Chunk.id == func.coalesce(semantic.c.id, keyword.c.id))
+        .options(joinedload(Chunk.document))
+        .order_by(text("score DESC"))
+        .limit(candidate_pool)
+    )
+
+    result = await db_session.execute(fused)
+    return result.all()
+
+
+def _group_into_documents(rows, *, top_k: int) -> list[DocumentHit]:
+    """Group fused chunks by document, keep the top_k best, order chunks for reading."""
+    chunks_by_doc: dict[int, list[ChunkHit]] = {}
+    document_by_id: dict[int, Document] = {}
+    best_score: dict[int, float] = {}
+    order: list[int] = []
+
+    for chunk, score in rows:
+        document_id = chunk.document.id
+        if document_id not in chunks_by_doc:
+            chunks_by_doc[document_id] = []
+            document_by_id[document_id] = chunk.document
+            best_score[document_id] = float(score)
+            order.append(document_id)
+        chunks_by_doc[document_id].append(
+            ChunkHit(
+                chunk_id=chunk.id,
+                content=chunk.content,
+                position=chunk.position,
+                score=float(score),
+            )
+        )
+
+    return [
+        DocumentHit(
+            document_id=document_id,
+            title=document_by_id[document_id].title,
+            document_type=_type_value(document_by_id[document_id]),
+            metadata=document_by_id[document_id].document_metadata or {},
+            score=best_score[document_id],
+            chunks=_reading_order(chunks_by_doc[document_id]),
+        )
+        for document_id in order[:top_k]
+    ]
+
+
+def _reading_order(chunks: list[ChunkHit]) -> list[ChunkHit]:
+    """Keep the most relevant chunks, then present them in document order."""
+    most_relevant = sorted(chunks, key=lambda c: c.score, reverse=True)[
+        :_MAX_PASSAGES_PER_DOC
+    ]
+    return sorted(most_relevant, key=lambda c: c.position)
+
+
+def _type_value(document: Document) -> str | None:
+    document_type = getattr(document, "document_type", None)
+    return document_type.value if document_type is not None else None
+
+
+__all__ = ["search_chunks"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/models.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/models.py
new file mode 100644
index 000000000..4c4174a4f
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/models.py
@@ -0,0 +1,47 @@
+"""Value objects for knowledge-base retrieval: the query scope and raw hits.
+
+``SearchScope`` is the optional filter a search runs under. ``DocumentHit`` /
+``ChunkHit`` are the retriever's typed output — matched chunks grouped by their
+document — which the adapter turns into renderable ``RenderableDocument``s.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+
+
+@dataclass(frozen=True)
+class SearchScope:
+    """Filters narrowing a search; ``None``/empty means "whole knowledge base"."""
+
+    document_types: tuple[str, ...] | None = None
+    document_ids: tuple[int, ...] | None = None
+    start_date: datetime | None = None
+    end_date: datetime | None = None
+
+
+@dataclass(frozen=True)
+class ChunkHit:
+    """One matched chunk, with the position that orders it within its document."""
+
+    chunk_id: int
+    content: str
+    position: int
+    score: float
+
+
+@dataclass(frozen=True)
+class DocumentHit:
+    """A document and the chunks that matched the query, ordered by position."""
+
+    document_id: int
+    title: str
+    document_type: str | None
+    metadata: dict[str, Any]
+    score: float
+    chunks: list[ChunkHit] = field(default_factory=list)
+
+
+__all__ = ["ChunkHit", "DocumentHit", "SearchScope"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/reranking.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/reranking.py
new file mode 100644
index 000000000..0e3387018
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/reranking.py
@@ -0,0 +1,51 @@
+"""Reorder retrieved documents with the configured reranker (no-op if disabled).
+
+Ranking is by concatenated matched-chunk content; ``DocumentHit`` order is
+rewritten to follow the reranker's result.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from .models import DocumentHit
+
+if TYPE_CHECKING:
+    from app.services.reranker_service import RerankerService
+
+
+def rerank_hits(
+    query: str,
+    hits: list[DocumentHit],
+    reranker: RerankerService | None,
+) -> list[DocumentHit]:
+    """Return ``hits`` reordered by the reranker; unchanged when none is set."""
+    if reranker is None or len(hits) < 2:
+        return hits
+
+    hit_by_id = {hit.document_id: hit for hit in hits}
+    ranked = reranker.rerank_documents(query, [_as_document(hit) for hit in hits])
+    reordered = [
+        hit_by_id[doc["document_id"]]
+        for doc in ranked
+        if doc.get("document_id") in hit_by_id
+    ]
+    # Fall back to the original order if the reranker dropped or garbled ids.
+    return reordered if len(reordered) == len(hits) else hits
+
+
+def _as_document(hit: DocumentHit) -> dict[str, Any]:
+    """The minimal dict shape ``RerankerService.rerank_documents`` scores on."""
+    return {
+        "document_id": hit.document_id,
+        "content": "\n\n".join(chunk.content for chunk in hit.chunks),
+        "score": hit.score,
+        "document": {
+            "id": hit.document_id,
+            "title": hit.title,
+            "document_type": hit.document_type,
+        },
+    }
+
+
+__all__ = ["rerank_hits"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/service.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/service.py
new file mode 100644
index 000000000..e9cfa18dd
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/service.py
@@ -0,0 +1,66 @@
+"""Search the knowledge base and render it as model-facing ``<retrieved_context>``.
+
+The retrieval spine end to end: hybrid search → rerank → adapt → render, with
+each shown passage registered for ``[n]`` citation along the way.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    render_search_context,
+)
+
+from .adapter import to_renderable_document
+from .hybrid_search import search_chunks
+from .models import DocumentHit, SearchScope
+from .reranking import rerank_hits
+
+if TYPE_CHECKING:
+    from app.services.reranker_service import RerankerService
+
+_DEFAULT_TOP_K = 10
+
+
+async def search_knowledge_base_context(
+    db_session: AsyncSession,
+    *,
+    search_space_id: int,
+    query: str,
+    registry: CitationRegistry,
+    scope: SearchScope | None = None,
+    reranker: RerankerService | None = None,
+    top_k: int = _DEFAULT_TOP_K,
+) -> str | None:
+    """Retrieve KB evidence for ``query`` and render it, registering each ``[n]``.
+
+    Returns ``None`` when nothing matched, so the caller can skip the block.
+    """
+    hits = await search_chunks(
+        db_session,
+        search_space_id=search_space_id,
+        query=query,
+        scope=scope or SearchScope(),
+        top_k=top_k,
+    )
+    return build_context(query, hits, registry, reranker=reranker)
+
+
+def build_context(
+    query: str,
+    hits: list[DocumentHit],
+    registry: CitationRegistry,
+    *,
+    reranker: RerankerService | None = None,
+) -> str | None:
+    """Rerank → adapt → render. Pure given ``hits``, so it is unit-testable."""
+    ranked = rerank_hits(query, hits, reranker)
+    documents = [to_renderable_document(hit) for hit in ranked]
+    return render_search_context(documents, registry)
+
+
+__all__ = ["build_context", "search_knowledge_base_context"]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
index 41bed9d62..b00670615 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@@ -13,9 +13,8 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
 * ``dirty_paths`` — paths whose state file content differs from DB.
 * ``dirty_path_tool_calls`` — sidecar map ``path -> latest tool_call_id`` for
   dirty paths; used to bind the per-path snapshot to an action_id.
-* ``kb_priority`` — top-K priority hints rendered into a system message.
-* ``kb_matched_chunk_ids`` — internal hand-off for matched-chunk highlighting.
 * ``kb_anon_doc`` — Redis-loaded anonymous document (if any).
+* ``citation_registry`` — per-conversation ``[n]`` -> source map for citations.
 * ``tree_version`` — bumped by persistence; invalidates the tree render cache.
 * ``workspace_tree_text`` — pre-rendered ``<workspace_tree>`` body for the turn.
 
@@ -30,9 +29,11 @@ from typing import Annotated, Any, NotRequired
 from deepagents.middleware.filesystem import FilesystemState
 from typing_extensions import TypedDict
 
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.agents.chat.multi_agent_chat.shared.receipts.receipt import Receipt
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
     _add_unique_reducer,
+    _citation_registry_merge_reducer,
     _dict_merge_with_tombstones_reducer,
     _int_counter_merge_reducer,
     _list_append_reducer,
@@ -67,14 +68,6 @@ class PendingDelete(TypedDict, total=False):
     tool_call_id: str
 
 
-class KbPriorityEntry(TypedDict, total=False):
-    path: str
-    score: float
-    document_id: int | None
-    title: str
-    mentioned: bool
-
-
 class KbAnonDoc(TypedDict, total=False):
     """In-memory anonymous-session document loaded from Redis."""
 
@@ -159,15 +152,16 @@ class SurfSenseFilesystemState(FilesystemState):
     to the latest action_id (the one the user is most likely to revert).
     """
 
-    kb_priority: NotRequired[Annotated[list[KbPriorityEntry], _replace_reducer]]
-    """Top-K priority hints rendered as a system message before the user turn."""
-
-    kb_matched_chunk_ids: NotRequired[Annotated[dict[int, list[int]], _replace_reducer]]
-    """Internal: ``Document.id`` -> list of matched chunk IDs from hybrid search."""
-
     kb_anon_doc: NotRequired[Annotated[KbAnonDoc | None, _replace_reducer]]
     """Anonymous-session document loaded from Redis (read-only, no DB row)."""
 
+    citation_registry: NotRequired[
+        Annotated[CitationRegistry, _citation_registry_merge_reducer]
+    ]
+    """Per-conversation ``[n]`` -> source map; written by retrieval, read by the
+    normalizer. Merges (union, find-or-create) so parallel/subagent registrations
+    stay globally consistent instead of clobbering each other."""
+
     tree_version: NotRequired[Annotated[int, _replace_reducer]]
     """Monotonically increasing counter; bumped when commits change the KB tree."""
 
@@ -206,7 +200,6 @@ class SurfSenseFilesystemState(FilesystemState):
 
 __all__ = [
     "KbAnonDoc",
-    "KbPriorityEntry",
     "PendingDelete",
     "PendingMove",
     "SurfSenseFilesystemState",
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
index c7b7685f0..3a9cc67b1 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
@@ -2,7 +2,7 @@
 
 These reducers back the extra state fields used by the cloud-mode filesystem
 agent (`cwd`, `staged_dirs`, `pending_moves`, `dirty_paths`, `doc_id_by_path`,
-`kb_priority`, `kb_matched_chunk_ids`, `kb_anon_doc`, `tree_version`).
+`kb_anon_doc`, `tree_version`).
 
 Tools mutate these fields ONLY via `Command(update={...})` returns; the
 reducers are responsible for merging successive updates atomically and for
@@ -20,6 +20,8 @@ from __future__ import annotations
 
 from typing import Any, Final, TypeVar
 
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
 _CLEAR: Final[str] = "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
 """Reset sentinel; pass it inside a list/dict update to request a reset.
 
@@ -204,6 +206,41 @@ def _int_counter_merge_reducer(
     return base
 
 
+def _as_registry(value: Any) -> CitationRegistry | None:
+    """Coerce a state value into a ``CitationRegistry``.
+
+    The checkpointer serializes ``Command.update`` via ``ormsgpack`` *before*
+    reducers run, so an update can arrive as a plain ``dict`` rather than a model.
+    """
+    if value is None:
+        return None
+    if isinstance(value, CitationRegistry):
+        return value
+    if isinstance(value, dict):
+        return CitationRegistry.model_validate(value)
+    return None
+
+
+def _citation_registry_merge_reducer(
+    left: Any,
+    right: Any,
+) -> CitationRegistry | None:
+    """Union two citation registries instead of replacing.
+
+    Find-or-create across both sides so ``[n]`` stays globally consistent when
+    branches (parent + subagents, parallel tool calls) each register into a
+    registry forked from the same base. Collisions re-mint rather than drop. See
+    :meth:`CitationRegistry.merge`.
+    """
+    right_reg = _as_registry(right)
+    left_reg = _as_registry(left)
+    if right_reg is None:
+        return left_reg
+    if left_reg is None:
+        return right_reg
+    return left_reg.merge(right_reg)
+
+
 def _initial_filesystem_state() -> dict[str, Any]:
     """Default empty values for SurfSense filesystem state fields.
 
@@ -221,8 +258,6 @@ def _initial_filesystem_state() -> dict[str, Any]:
         "doc_id_by_path": {},
         "dirty_paths": [],
         "dirty_path_tool_calls": {},
-        "kb_priority": [],
-        "kb_matched_chunk_ids": {},
         "kb_anon_doc": None,
         "tree_version": 0,
     }
@@ -231,6 +266,7 @@ def _initial_filesystem_state() -> dict[str, Any]:
 __all__ = [
     "_CLEAR",
     "_add_unique_reducer",
+    "_citation_registry_merge_reducer",
     "_dict_merge_with_tombstones_reducer",
     "_initial_filesystem_state",
     "_int_counter_merge_reducer",
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/knowledge_base.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/knowledge_base.py
deleted file mode 100644
index d89124990..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/knowledge_base.py
+++ /dev/null
@@ -1,762 +0,0 @@
-"""
-Knowledge base search tool for the SurfSense agent.
-
-This module provides:
-- Connector constants and normalization
-- Async knowledge base search across multiple connectors
-- Document formatting for LLM context
-"""
-
-import asyncio
-import contextlib
-import json
-import re
-import time
-from datetime import datetime
-from typing import Any
-
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.db import NATIVE_TO_LEGACY_DOCTYPE, shielded_async_session
-from app.services.connector_service import ConnectorService
-from app.utils.perf import get_perf_logger
-
-# Connectors that call external live-search APIs. These are handled by the
-# ``web_search`` tool and must be excluded from knowledge-base searches.
-_LIVE_SEARCH_CONNECTORS: set[str] = {
-    "TAVILY_API",
-    "LINKUP_API",
-    "BAIDU_SEARCH_API",
-}
-
-# Patterns that indicate the query has no meaningful search signal.
-# plainto_tsquery('english', '*') produces an empty tsquery and an embedding
-# of '*' is random noise, so both keyword and semantic search degrade to
-# arbitrary ordering — large documents (many chunks) dominate by chance.
-_DEGENERATE_QUERY_RE = re.compile(
-    r"^[\s*?_.#@!\-/\\]+$"  # only wildcards, punctuation, whitespace
-)
-
-# Max chunks per document when doing a recency-based browse instead of
-# a real search.  We want breadth (many docs) over depth (many chunks).
-_BROWSE_MAX_CHUNKS_PER_DOC = 5
-
-
-def _is_degenerate_query(query: str) -> bool:
-    """Return True when the query carries no meaningful search signal.
-
-    Catches wildcard patterns (``*``, ``**``), empty / whitespace-only
-    strings, and single-character non-word tokens.  These queries cause
-    both keyword search (empty tsquery) and semantic search (meaningless
-    embedding) to return effectively random results.
-    """
-    stripped = query.strip()
-    if not stripped:
-        return True
-    return bool(_DEGENERATE_QUERY_RE.match(stripped))
-
-
-async def _browse_recent_documents(
-    search_space_id: int,
-    document_type: str | list[str] | None,
-    top_k: int,
-    start_date: datetime | None,
-    end_date: datetime | None,
-) -> list[dict[str, Any]]:
-    """Return the most-recent documents (recency-ordered, no search ranking).
-
-    Used as a fallback when the search query is degenerate (e.g. ``*``) and
-    semantic / keyword search would produce arbitrary results.  Returns
-    document-grouped dicts in the same shape as ``_combined_rrf_search``
-    so the rest of the pipeline works unchanged.
-    """
-    from sqlalchemy import select
-    from sqlalchemy.orm import joinedload
-
-    from app.db import Chunk, Document, DocumentType
-
-    perf = get_perf_logger()
-    t0 = time.perf_counter()
-
-    base_conditions = [Document.search_space_id == search_space_id]
-
-    if document_type is not None:
-        type_list = (
-            document_type if isinstance(document_type, list) else [document_type]
-        )
-        doc_type_enums = []
-        for dt in type_list:
-            if isinstance(dt, str):
-                with contextlib.suppress(KeyError):
-                    doc_type_enums.append(DocumentType[dt])
-            else:
-                doc_type_enums.append(dt)
-        if not doc_type_enums:
-            return []
-        if len(doc_type_enums) == 1:
-            base_conditions.append(Document.document_type == doc_type_enums[0])
-        else:
-            base_conditions.append(Document.document_type.in_(doc_type_enums))
-
-    if start_date is not None:
-        base_conditions.append(Document.updated_at >= start_date)
-    if end_date is not None:
-        base_conditions.append(Document.updated_at <= end_date)
-
-    async with shielded_async_session() as session:
-        doc_query = (
-            select(Document)
-            .options(joinedload(Document.search_space))
-            .where(*base_conditions)
-            .order_by(Document.updated_at.desc())
-            .limit(top_k)
-        )
-        result = await session.execute(doc_query)
-        documents = result.scalars().unique().all()
-
-        if not documents:
-            return []
-
-        doc_ids = [d.id for d in documents]
-
-        chunk_query = (
-            select(Chunk)
-            .where(Chunk.document_id.in_(doc_ids))
-            .order_by(Chunk.document_id, Chunk.position, Chunk.id)
-        )
-        chunk_result = await session.execute(chunk_query)
-        raw_chunks = chunk_result.scalars().all()
-
-    doc_chunk_counts: dict[int, int] = {}
-    doc_chunks: dict[int, list[dict]] = {d.id: [] for d in documents}
-    for chunk in raw_chunks:
-        did = chunk.document_id
-        count = doc_chunk_counts.get(did, 0)
-        if count < _BROWSE_MAX_CHUNKS_PER_DOC:
-            doc_chunks[did].append({"chunk_id": chunk.id, "content": chunk.content})
-            doc_chunk_counts[did] = count + 1
-
-    results: list[dict[str, Any]] = []
-    for doc in documents:
-        chunks_list = doc_chunks.get(doc.id, [])
-        results.append(
-            {
-                "document_id": doc.id,
-                "content": "\n\n".join(
-                    c["content"] for c in chunks_list if c.get("content")
-                ),
-                "score": 0.0,
-                "chunks": chunks_list,
-                "document": {
-                    "id": doc.id,
-                    "title": doc.title,
-                    "document_type": doc.document_type.value
-                    if getattr(doc, "document_type", None)
-                    else None,
-                    "metadata": doc.document_metadata or {},
-                },
-                "source": doc.document_type.value
-                if getattr(doc, "document_type", None)
-                else None,
-            }
-        )
-
-    perf.info(
-        "[kb_browse] recency browse in %.3fs docs=%d space=%d type=%s",
-        time.perf_counter() - t0,
-        len(results),
-        search_space_id,
-        document_type,
-    )
-    return results
-
-
-# =============================================================================
-# Connector Constants and Normalization
-# =============================================================================
-
-# Canonical connector values used internally by ConnectorService
-# Includes all document types and search source connectors
-_ALL_CONNECTORS: list[str] = [
-    "EXTENSION",
-    "FILE",
-    "SLACK_CONNECTOR",
-    "TEAMS_CONNECTOR",
-    "NOTION_CONNECTOR",
-    "YOUTUBE_VIDEO",
-    "GITHUB_CONNECTOR",
-    "ELASTICSEARCH_CONNECTOR",
-    "LINEAR_CONNECTOR",
-    "JIRA_CONNECTOR",
-    "CONFLUENCE_CONNECTOR",
-    "CLICKUP_CONNECTOR",
-    "GOOGLE_CALENDAR_CONNECTOR",
-    "GOOGLE_GMAIL_CONNECTOR",
-    "GOOGLE_DRIVE_FILE",
-    "DISCORD_CONNECTOR",
-    "AIRTABLE_CONNECTOR",
-    "LUMA_CONNECTOR",
-    "NOTE",
-    "BOOKSTACK_CONNECTOR",
-    "CRAWLED_URL",
-    "CIRCLEBACK",
-    "OBSIDIAN_CONNECTOR",
-    "ONEDRIVE_FILE",
-    "DROPBOX_FILE",
-]
-
-# Human-readable descriptions for each connector type
-# Used for generating dynamic docstrings and informing the LLM
-CONNECTOR_DESCRIPTIONS: dict[str, str] = {
-    "EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
-    "FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
-    "NOTE": "SurfSense Notes (notes created inside SurfSense)",
-    "SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
-    "TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
-    "NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
-    "YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
-    "GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
-    "ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
-    "LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
-    "JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
-    "CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
-    "CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
-    "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
-    "GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
-    "GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
-    "DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
-    "AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
-    "LUMA_CONNECTOR": "Luma events and meetings",
-    "WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
-    "CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
-    "BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
-    "CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
-    "OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
-    "ONEDRIVE_FILE": "Microsoft OneDrive files and documents (personal cloud storage)",
-    "DROPBOX_FILE": "Dropbox files and documents (cloud storage)",
-}
-
-
-def _normalize_connectors(
-    connectors_to_search: list[str] | None,
-    available_connectors: list[str] | None = None,
-) -> list[str]:
-    """Normalize model-supplied connectors to canonical ConnectorService types.
-
-    Maps user-facing aliases (e.g. WEBCRAWLER_CONNECTOR), drops unknowns, and
-    constrains to ``available_connectors`` when given. Empty input defaults to
-    all available connectors (minus live-search ones).
-    """
-    valid_set = (
-        set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
-    )
-    valid_set -= _LIVE_SEARCH_CONNECTORS
-
-    if not connectors_to_search:
-        base = (
-            list(available_connectors)
-            if available_connectors
-            else list(_ALL_CONNECTORS)
-        )
-        return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
-
-    normalized: list[str] = []
-    for raw in connectors_to_search:
-        c = (raw or "").strip().upper()
-        if not c:
-            continue
-        if c == "WEBCRAWLER_CONNECTOR":
-            c = "CRAWLED_URL"
-        normalized.append(c)
-
-    # De-dupe (order-preserving), keeping only known + available connectors.
-    seen: set[str] = set()
-    out: list[str] = []
-    for c in normalized:
-        if c in seen:
-            continue
-        if c not in _ALL_CONNECTORS:
-            continue
-        if c not in valid_set:
-            continue
-        seen.add(c)
-        out.append(c)
-
-    # Nothing matched: fall back to all available.
-    if not out:
-        base = (
-            list(available_connectors)
-            if available_connectors
-            else list(_ALL_CONNECTORS)
-        )
-        return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
-    return out
-
-
-# =============================================================================
-# Document Formatting
-# =============================================================================
-
-
-# Fraction of the model's context window (in characters) that a single tool
-# result is allowed to occupy.  The remainder is reserved for system prompt,
-# conversation history, and model output.  With ~4 chars/token this gives a
-# tool result ≈ 25 % of the context budget in tokens.
-_TOOL_OUTPUT_CONTEXT_FRACTION = 0.25
-_CHARS_PER_TOKEN = 4
-
-# Hard-floor / ceiling so the budget is always sensible regardless of what
-# the model reports.
-_MIN_TOOL_OUTPUT_CHARS = 20_000  # ~5K tokens
-_MAX_TOOL_OUTPUT_CHARS = 200_000  # ~50K tokens
-_MAX_CHUNK_CHARS = 8_000
-
-# Rank-adaptive per-document budget allocation.
-# Top-ranked (most relevant) documents get a larger share of the budget so
-# we pack as much high-quality context as possible.
-#
-#   fraction(rank) = _TOP_DOC_BUDGET_FRACTION / (1 + rank * _RANK_DECAY)
-#
-# Examples (128K budget, 8K chunk cap):
-#   rank 0 → 40% → 6 chunks   |  rank 3 → 19% → 3 chunks
-#   rank 1 → 30% → 4 chunks   |  rank 10 → 10% → 3 chunks (floor)
-#   rank 2 → 24% → 3 chunks   |
-_TOP_DOC_BUDGET_FRACTION = 0.40
-_RANK_DECAY = 0.35
-_MIN_CHUNKS_PER_DOC = 3
-
-
-def _compute_tool_output_budget(max_input_tokens: int | None) -> int:
-    """Derive a character budget from the model's context window.
-
-    Uses ``litellm.get_model_info`` via the value already resolved by
-    ``ChatLiteLLMRouter`` / ``ChatLiteLLM`` and passed through the dependency
-    chain as ``max_input_tokens``.  Falls back to a conservative default when
-    the value is unavailable.
-    """
-    if max_input_tokens is None or max_input_tokens <= 0:
-        return _MIN_TOOL_OUTPUT_CHARS  # conservative fallback
-
-    budget = int(max_input_tokens * _CHARS_PER_TOKEN * _TOOL_OUTPUT_CONTEXT_FRACTION)
-    return max(_MIN_TOOL_OUTPUT_CHARS, min(budget, _MAX_TOOL_OUTPUT_CHARS))
-
-
-_INTERNAL_METADATA_KEYS: frozenset[str] = frozenset(
-    {
-        "message_id",
-        "thread_id",
-        "event_id",
-        "calendar_id",
-        "google_drive_file_id",
-        "onedrive_file_id",
-        "dropbox_file_id",
-        "page_id",
-        "issue_id",
-        "connector_id",
-    }
-)
-
-
-def format_documents_for_context(
-    documents: list[dict[str, Any]],
-    *,
-    max_chars: int = _MAX_TOOL_OUTPUT_CHARS,
-    max_chunk_chars: int = _MAX_CHUNK_CHARS,
-    max_chunks_per_doc: int = 0,
-) -> str:
-    """Format retrieved documents into an XML context string for the LLM.
-
-    Documents are emitted highest-relevance first until ``max_chars`` is hit.
-    ``max_chunks_per_doc=0`` auto-computes a rank-adaptive cap so top results get
-    more chunks and no single large document monopolizes the budget.
-    """
-    if not documents:
-        return ""
-
-    # Group chunks by document id, preserving chunk_id so [citation:123] works.
-    # ConnectorService returns document-grouped results ({document, chunks, source}).
-    grouped: dict[str, dict[str, Any]] = {}
-
-    for doc in documents:
-        document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {}
-        metadata = (
-            (document_info.get("metadata") or {})
-            if isinstance(document_info, dict)
-            else {}
-        )
-        if not metadata and isinstance(doc, dict):
-            # Some result shapes may place metadata at the top level.
-            metadata = doc.get("metadata") or {}
-
-        source = (
-            (doc.get("source") if isinstance(doc, dict) else None)
-            or document_info.get("document_type")
-            or metadata.get("document_type")
-            or "UNKNOWN"
-        )
-
-        # Identity: prefer document_id, else type+title+url.
-        document_id_val = document_info.get("id")
-        title = (
-            document_info.get("title") or metadata.get("title") or "Untitled Document"
-        )
-        url = (
-            metadata.get("url")
-            or metadata.get("source")
-            or metadata.get("page_url")
-            or ""
-        )
-
-        doc_key = (
-            str(document_id_val)
-            if document_id_val is not None
-            else f"{source}::{title}::{url}"
-        )
-
-        if doc_key not in grouped:
-            grouped[doc_key] = {
-                "document_id": document_id_val
-                if document_id_val is not None
-                else doc_key,
-                "document_type": metadata.get("document_type") or source,
-                "title": title,
-                "url": url,
-                "metadata": metadata,
-                "chunks": [],
-            }
-
-        # Prefer document-grouped chunks when present.
-        chunks_list = doc.get("chunks") if isinstance(doc, dict) else None
-        if isinstance(chunks_list, list) and chunks_list:
-            for ch in chunks_list:
-                if not isinstance(ch, dict):
-                    continue
-                chunk_id = ch.get("chunk_id") or ch.get("id")
-                content = (ch.get("content") or "").strip()
-                if not content:
-                    continue
-                grouped[doc_key]["chunks"].append(
-                    {"chunk_id": chunk_id, "content": content}
-                )
-            continue
-
-        # Fallback: treat this as a flat chunk-like object
-        if not isinstance(doc, dict):
-            continue
-        chunk_id = doc.get("chunk_id") or doc.get("id")
-        content = (doc.get("content") or "").strip()
-        if not content:
-            continue
-        grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content})
-
-    # Live search connectors whose results should be cited by URL rather than
-    # a numeric chunk_id (the numeric IDs are meaningless auto-incremented counters).
-    live_search_connectors = {
-        "TAVILY_API",
-        "LINKUP_API",
-        "BAIDU_SEARCH_API",
-    }
-
-    parts: list[str] = []
-    total_chars = 0
-    total_docs = len(grouped)
-
-    for doc_idx, g in enumerate(grouped.values()):
-        metadata_clean = {
-            k: v for k, v in g["metadata"].items() if k not in _INTERNAL_METADATA_KEYS
-        }
-        metadata_json = json.dumps(metadata_clean, ensure_ascii=False)
-        is_live_search = g["document_type"] in live_search_connectors
-
-        doc_lines: list[str] = [
-            "<document>",
-            "<document_metadata>",
-            f"  <document_id>{g['document_id']}</document_id>",
-            f"  <document_type>{g['document_type']}</document_type>",
-            f"  <title><![CDATA[{g['title']}]]></title>",
-            f"  <url><![CDATA[{g['url']}]]></url>",
-            f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-            "</document_metadata>",
-            "",
-            "<document_content>",
-        ]
-
-        # Rank-adaptive per-document chunk cap: top results get more chunks.
-        if max_chunks_per_doc > 0:
-            chunks_allowed = max_chunks_per_doc
-        else:
-            doc_fraction = _TOP_DOC_BUDGET_FRACTION / (1 + doc_idx * _RANK_DECAY)
-            max_doc_chars = int(max_chars * doc_fraction)
-            xml_overhead = 500
-            chunks_allowed = max(
-                (max_doc_chars - xml_overhead) // max(max_chunk_chars, 1),
-                _MIN_CHUNKS_PER_DOC,
-            )
-
-        chunks = g["chunks"]
-        if len(chunks) > chunks_allowed:
-            chunks = chunks[:chunks_allowed]
-
-        for ch in chunks:
-            ch_content = ch["content"]
-            if max_chunk_chars and len(ch_content) > max_chunk_chars:
-                ch_content = ch_content[:max_chunk_chars] + "\n...(truncated)"
-            ch_id = g["url"] if (is_live_search and g["url"]) else ch["chunk_id"]
-            if ch_id is None:
-                doc_lines.append(f"  <chunk><![CDATA[{ch_content}]]></chunk>")
-            else:
-                doc_lines.append(
-                    f"  <chunk id='{ch_id}'><![CDATA[{ch_content}]]></chunk>"
-                )
-
-        doc_lines.extend(["</document_content>", "</document>", ""])
-
-        doc_xml = "\n".join(doc_lines)
-        doc_len = len(doc_xml)
-
-        if total_chars + doc_len > max_chars:
-            remaining = total_docs - doc_idx
-            if doc_idx == 0:
-                parts.append(doc_xml)
-                total_chars += doc_len
-            parts.append(
-                f"<!-- Output truncated: {remaining} more document(s) omitted "
-                f"(budget {max_chars} chars). Refine your query or reduce top_k "
-                f"to retrieve different results. -->"
-            )
-            break
-
-        parts.append(doc_xml)
-        total_chars += doc_len
-
-    result = "\n".join(parts).strip()
-
-    # Hard safety net: if the result is still over budget (e.g. a single massive
-    # first document), forcibly truncate with a closing comment.
-    if len(result) > max_chars:
-        truncation_msg = "\n<!-- ...output forcibly truncated to fit context window -->"
-        result = result[: max_chars - len(truncation_msg)] + truncation_msg
-
-    return result
-
-
-# =============================================================================
-# Knowledge Base Search
-# =============================================================================
-
-
-async def search_knowledge_base_async(
-    query: str,
-    search_space_id: int,
-    db_session: AsyncSession,
-    connector_service: ConnectorService,
-    connectors_to_search: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    max_input_tokens: int | None = None,
-) -> str:
-    """Search the knowledge base across connectors and return formatted results.
-
-    ``available_document_types`` lets local connectors with no indexed data be
-    skipped (no embedding / DB round-trip), and ``max_input_tokens`` sizes the
-    output to the model's context window.
-    """
-    perf = get_perf_logger()
-    t0 = time.perf_counter()
-
-    deduplicated = await search_knowledge_base_raw_async(
-        query=query,
-        search_space_id=search_space_id,
-        db_session=db_session,
-        connector_service=connector_service,
-        connectors_to_search=connectors_to_search,
-        top_k=top_k,
-        start_date=start_date,
-        end_date=end_date,
-        available_connectors=available_connectors,
-        available_document_types=available_document_types,
-    )
-
-    if not deduplicated:
-        return "No documents found in the knowledge base. The search space has no indexed content yet."
-
-    # Use browse chunk cap for degenerate queries, otherwise adaptive chunking.
-    max_chunks_per_doc = (
-        _BROWSE_MAX_CHUNKS_PER_DOC if _is_degenerate_query(query) else 0
-    )
-    output_budget = _compute_tool_output_budget(max_input_tokens)
-    result = format_documents_for_context(
-        deduplicated,
-        max_chars=output_budget,
-        max_chunks_per_doc=max_chunks_per_doc,
-    )
-
-    if len(result) > output_budget:
-        perf.warning(
-            "[kb_search] output STILL exceeds budget after format (%d > %d), "
-            "hard truncation should have fired",
-            len(result),
-            output_budget,
-        )
-
-    perf.info(
-        "[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
-        "budget=%d max_input_tokens=%s space=%d",
-        time.perf_counter() - t0,
-        len(deduplicated),
-        len(deduplicated),
-        len(result),
-        output_budget,
-        max_input_tokens,
-        search_space_id,
-    )
-    return result
-
-
-async def search_knowledge_base_raw_async(
-    query: str,
-    search_space_id: int,
-    db_session: AsyncSession,
-    connector_service: ConnectorService,
-    connectors_to_search: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    query_embedding: list[float] | None = None,
-) -> list[dict[str, Any]]:
-    """Search knowledge base and return raw document dicts (no XML formatting)."""
-    perf = get_perf_logger()
-    t0 = time.perf_counter()
-    all_documents: list[dict[str, Any]] = []
-
-    # Preserve the public signature for compatibility even if values are unused.
-    _ = (db_session, connector_service)
-
-    from app.agents.chat.multi_agent_chat.shared.date_filters import resolve_date_range
-
-    resolved_start_date, resolved_end_date = resolve_date_range(
-        start_date=start_date,
-        end_date=end_date,
-    )
-
-    connectors = _normalize_connectors(connectors_to_search, available_connectors)
-
-    if available_document_types:
-        doc_types_set = set(available_document_types)
-        connectors = [
-            c
-            for c in connectors
-            if c in doc_types_set
-            or NATIVE_TO_LEGACY_DOCTYPE.get(c, "") in doc_types_set
-        ]
-
-    if not connectors:
-        return []
-
-    if _is_degenerate_query(query):
-        perf.info(
-            "[kb_search_raw] degenerate query %r detected - recency browse",
-            query,
-        )
-        browse_connectors = connectors if connectors else [None]  # type: ignore[list-item]
-        expanded_browse = []
-        for connector in browse_connectors:
-            if connector is not None and connector in NATIVE_TO_LEGACY_DOCTYPE:
-                expanded_browse.append([connector, NATIVE_TO_LEGACY_DOCTYPE[connector]])
-            else:
-                expanded_browse.append(connector)
-        browse_results = await asyncio.gather(
-            *[
-                _browse_recent_documents(
-                    search_space_id=search_space_id,
-                    document_type=connector,
-                    top_k=top_k,
-                    start_date=resolved_start_date,
-                    end_date=resolved_end_date,
-                )
-                for connector in expanded_browse
-            ]
-        )
-        for docs in browse_results:
-            all_documents.extend(docs)
-    else:
-        if query_embedding is None:
-            from app.config import config as app_config
-
-            query_embedding = app_config.embedding_model_instance.embed(query)
-
-        max_parallel_searches = 4
-        semaphore = asyncio.Semaphore(max_parallel_searches)
-
-        async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
-            try:
-                async with semaphore, shielded_async_session() as isolated_session:
-                    svc = ConnectorService(isolated_session, search_space_id)
-                    return await svc._combined_rrf_search(
-                        query_text=query,
-                        search_space_id=search_space_id,
-                        document_type=connector,
-                        top_k=top_k,
-                        start_date=resolved_start_date,
-                        end_date=resolved_end_date,
-                        query_embedding=query_embedding,
-                    )
-            except Exception as exc:
-                perf.warning("[kb_search_raw] connector=%s FAILED: %s", connector, exc)
-                return []
-
-        connector_results = await asyncio.gather(
-            *[_search_one_connector(connector) for connector in connectors]
-        )
-        for docs in connector_results:
-            all_documents.extend(docs)
-
-    seen_doc_ids: set[Any] = set()
-    seen_content_hashes: set[int] = set()
-    deduplicated: list[dict[str, Any]] = []
-
-    def _content_fingerprint(document: dict[str, Any]) -> int | None:
-        chunks = document.get("chunks")
-        if isinstance(chunks, list):
-            chunk_texts = []
-            for chunk in chunks:
-                if not isinstance(chunk, dict):
-                    continue
-                chunk_content = (chunk.get("content") or "").strip()
-                if chunk_content:
-                    chunk_texts.append(chunk_content)
-            if chunk_texts:
-                return hash("||".join(chunk_texts))
-        flat_content = (document.get("content") or "").strip()
-        if flat_content:
-            return hash(flat_content)
-        return None
-
-    for doc in all_documents:
-        doc_id = (doc.get("document", {}) or {}).get("id")
-        if doc_id is not None:
-            if doc_id in seen_doc_ids:
-                continue
-            seen_doc_ids.add(doc_id)
-            deduplicated.append(doc)
-            continue
-        content_hash = _content_fingerprint(doc)
-        if content_hash is not None and content_hash in seen_content_hashes:
-            continue
-        if content_hash is not None:
-            seen_content_hashes.add(content_hash)
-        deduplicated.append(doc)
-
-    deduplicated.sort(key=lambda doc: doc.get("score", 0), reverse=True)
-    perf.info(
-        "[kb_search_raw] done in %.3fs total=%d deduped=%d",
-        time.perf_counter() - t0,
-        len(all_documents),
-        len(deduplicated),
-    )
-    return deduplicated
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
index ea831b891..c80a2a565 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
@@ -23,6 +23,45 @@ from app.services.llm_service import get_agent_llm
 
 logger = logging.getLogger(__name__)
 
+
+def _report_search_types(
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> tuple[str, ...] | None:
+    """Build the document-type scope for the shared KB search.
+
+    ``None`` means "search every indexed type"; a tuple narrows the scope to the
+    connectors/document types the search space actually has.
+    """
+    types: set[str] = set()
+    if available_document_types:
+        types.update(available_document_types)
+    if available_connectors:
+        types.update(available_connectors)
+    return tuple(sorted(types)) or None
+
+
+def _render_kb_hits_for_report(hits: list[Any]) -> str:
+    """Render KB hits as plain titled source text for the report writer.
+
+    Citations are intentionally omitted from reports for now, so no ``[n]``
+    labels or chunk ids are emitted — just titled document content for grounding.
+    """
+    from app.agents.chat.multi_agent_chat.shared.document_render import source_label
+
+    blocks: list[str] = []
+    for hit in hits:
+        label = source_label(hit.document_type, hit.metadata)
+        header = f"{hit.title} ({label})" if label else hit.title
+        body = "\n\n".join(
+            chunk.content.strip() for chunk in hit.chunks if chunk.content.strip()
+        )
+        if not body:
+            continue
+        blocks.append(f"## {header}\n\n{body}")
+    return "\n\n".join(blocks)
+
+
 # ─── Shared Formatting Rules ────────────────────────────────────────────────
 # Reusable formatting instructions appended to section-level and review prompts.
 
@@ -788,31 +827,46 @@ def create_generate_report_tool(
                     f"{query_count} queries: {search_queries[:5]}"
                 )
                 try:
-                    from .knowledge_base import search_knowledge_base_async
+                    from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
+                        search_chunks,
+                    )
+                    from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
+                        DocumentHit,
+                        SearchScope,
+                    )
+
+                    scope = SearchScope(
+                        document_types=_report_search_types(
+                            available_connectors, available_document_types
+                        )
+                    )
 
                     # Each query gets its own short-lived session.
-                    async def _run_single_query(q: str) -> str:
+                    async def _run_single_query(q: str) -> list[DocumentHit]:
                         async with shielded_async_session() as kb_session:
-                            kb_connector_svc = ConnectorService(
-                                kb_session, search_space_id
-                            )
-                            return await search_knowledge_base_async(
-                                query=q,
+                            return await search_chunks(
+                                kb_session,
                                 search_space_id=search_space_id,
-                                db_session=kb_session,
-                                connector_service=kb_connector_svc,
+                                query=q,
+                                scope=scope,
                                 top_k=10,
-                                available_connectors=available_connectors,
-                                available_document_types=available_document_types,
                             )
 
-                    kb_results = await asyncio.gather(
+                    hits_per_query = await asyncio.gather(
                         *[_run_single_query(q) for q in search_queries[:5]]
                     )
 
-                    kb_text_parts = [r for r in kb_results if r and r.strip()]
-                    if kb_text_parts:
-                        kb_combined = "\n\n---\n\n".join(kb_text_parts)
+                    seen_doc_ids: set[int] = set()
+                    merged_hits: list[DocumentHit] = []
+                    for hits in hits_per_query:
+                        for hit in hits:
+                            if hit.document_id in seen_doc_ids:
+                                continue
+                            seen_doc_ids.add(hit.document_id)
+                            merged_hits.append(hit)
+
+                    kb_combined = _render_kb_hits_for_report(merged_hits)
+                    if kb_combined.strip():
                         if effective_source.strip():
                             effective_source = (
                                 effective_source
@@ -822,20 +876,17 @@ def create_generate_report_tool(
                         else:
                             effective_source = kb_combined
 
-                        # Count docs found (rough: count <document> tags)
-                        doc_count = kb_combined.count("<document>")
+                        doc_count = len(merged_hits)
                         dispatch_custom_event(
                             "report_progress",
                             {
                                 "phase": "kb_search_done",
-                                "message": f"Found {doc_count} relevant documents"
-                                if doc_count
-                                else f"Found results from {len(kb_text_parts)} queries",
+                                "message": f"Found {doc_count} relevant documents",
                             },
                         )
                         logger.info(
                             f"[generate_report] KB search added ~{len(kb_combined)} chars "
-                            f"from {len(kb_text_parts)} queries"
+                            f"from {doc_count} documents"
                         )
                     else:
                         dispatch_custom_event(
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
index e989e3ee6..11dcc5d11 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
@@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi
 
 Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.
 
-The specialist returns plain prose with absolute paths and `[citation:<chunk_id>]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer.
+The specialist returns plain prose with absolute paths and `[n]` citation labels when claims came from KB-indexed documents. Preserve those labels verbatim if you forward the answer.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
index c4e36fc73..04be2f321 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@@ -6,10 +6,9 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor
 
 - If the supervisor already provided a precise path (e.g. `/documents/notes/2026-05-11.md`), use it directly — skip the lookup steps below.
 - Otherwise, most requests reference documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
-  1. Consult `<priority_documents>` — it's a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit the task.
-  2. Walk `<workspace_tree>` for descriptive folder/filename matches.
-  3. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
-  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
+  1. Walk `<workspace_tree>` for descriptive folder/filename matches.
+  2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
+  3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 
 For writes (where you choose the path yourself):
 
@@ -35,42 +34,31 @@ Map outcomes to your `status`:
 
 You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.
 
-## Chunk citations in your prose
+## Citations in your prose
 
-When `read_file` returns a KB-indexed document under `/documents/`, the response includes `<chunk id='…'>` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:<chunk_id>]` to the sentence stating that fact, using the **exact** id from the `<chunk id='…'>` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
 
-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output
 
-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:
 
 ```
-<document>
-<document_metadata>
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
-  ...
-</document_metadata>
-<chunk_index>
-  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
-  <entry chunk_id="129" lines="23-30" matched="true"/>
-</chunk_index>
-<document_content>
-  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
-  <chunk id='129'><![CDATA[…]]></chunk>
-</document_content>
+<document title="Q2 Roadmap" source="File" view="full">
+  [3] First milestone is …
+  [4] Second milestone is …
 </document>
 ```
 
 ### Rules
 
-- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
-- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
-- Never cite `<document_id>` — that's the parent doc, not a chunk.
-- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
 - Prefer **fewer accurate citations** over many speculative ones.
-- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
-- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
-- Tool results without `<chunk id='…'>` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
-- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.
+- Tool results without `[n]` labels (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no label and need none.
+- Populate `evidence.citations` with **only** the labels you actually emitted — same numbers.
 
 ## Examples
 
@@ -89,7 +77,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
       "path": "/documents/meetings/2026-05-11-meeting.md",
       "matched_candidates": null,
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": null,
     "missing_fields": null,
@@ -100,7 +88,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
 **Example 2 — edit by inference:**
 
 - *Supervisor task:* `"Add a bullet about the new feature flag to my Q2 roadmap"`
-- *You:* search for the roadmap doc — check `<priority_documents>` and `<workspace_tree>` first; if neither surfaces it, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose `<priority_documents>` hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
+- *You:* search for the roadmap doc — check `<workspace_tree>` first; if it doesn't surface the doc, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose the tree hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
 - *Output:* `status=success`, evidence includes path and the inserted snippet.
 
 **Example 3 — blocked, multiple candidates:**
@@ -121,7 +109,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
         { "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
       ],
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": "Ask the user which design doc to update.",
     "missing_fields": ["path"],
@@ -142,7 +130,7 @@ Return **only** one JSON object (no markdown or prose outside it):
     "path": string | null,
     "matched_candidates": [ { "id": string, "label": string } ] | null,
     "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
   },
   "next_step": string | null,
   "missing_fields": string[] | null,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
index 25dafa3df..e0f368bb2 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@@ -9,8 +9,7 @@ You are the SurfSense workspace specialist for the user's local folders.
   1. If you do not know which mounts exist, call `ls('/')` first.
   2. Walk likely folders with the `ls` and `list_tree` tools.
   3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
-  4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
-  5. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
+  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 
 For writes (where you choose the path yourself):
 
@@ -33,11 +32,11 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
 
-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
 
-## Chunk citations in your prose
+## Citations in your prose
 
-In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `<chunk id='…'>` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
 
 ## Examples
 
@@ -56,7 +55,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
       "path": "/notes/meetings/2026-05-11-meeting.md",
       "matched_candidates": null,
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": null,
     "missing_fields": null,
@@ -88,7 +87,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
         { "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
       ],
       "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
     },
     "next_step": "Ask the user which design doc to update.",
     "missing_fields": ["path"],
@@ -109,7 +108,7 @@ Return **only** one JSON object (no markdown or prose outside it):
     "path": string | null,
     "matched_candidates": [ { "id": string, "label": string } ] | null,
     "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
   },
   "next_step": string | null,
   "missing_fields": string[] | null,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
index c7813e71d..10dd0c763 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@@ -6,9 +6,8 @@ You answer workspace questions for another agent. The end user does **not** see
 
 The caller's question often references documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
 
-1. Consult `<priority_documents>` — a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit.
-2. Walk `<workspace_tree>` for descriptive folder/filename matches.
-3. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
+1. Walk `<workspace_tree>` for descriptive folder/filename matches.
+2. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
 
 If a precise path was already given, use it directly — skip the lookup.
 
@@ -28,41 +27,30 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
 
-## Chunk citations
+## Citations
 
-When the evidence for a claim came from a `read_file` response that included `<chunk id='…'>` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:<chunk_id>]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.
 
-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output
 
-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:
 
 ```
-<document>
-<document_metadata>
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
-  ...
-</document_metadata>
-<chunk_index>
-  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
-  <entry chunk_id="129" lines="23-30" matched="true"/>
-</chunk_index>
-<document_content>
-  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
-  <chunk id='129'><![CDATA[…]]></chunk>
-</document_content>
+<document title="Q2 Roadmap" source="File" view="full">
+  [3] First milestone is …
+  [4] Second milestone is …
 </document>
 ```
 
 ### Rules
 
-- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
-- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
-- Never cite `<document_id>` — that's the parent doc, not a chunk.
-- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
-- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
-- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
-- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
-- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without `<chunk id='…'>`), skip the citation.
-- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
+- Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
+- If a claim came from a tool result that did **not** carry `[n]` labels (`ls`, `glob`, `grep` listings, error strings), skip the citation.
+- The absolute path under `/documents/` is always required; `[n]` labels are additive, they do not replace the path reference.
 
-Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`
+Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [3][4].`
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
index 2ea711e44..6e11aea4f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@@ -9,7 +9,6 @@ The caller's question often references files by description (`"my meeting notes
 1. If you do not know which mounts exist, call `ls('/')` first.
 2. Walk likely folders with the `ls` and `list_tree` tools.
 3. Use `glob` for filename patterns; use `grep` when the description points at *content* rather than a name.
-4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
 
 If a precise path was already given, use it directly — skip the lookup.
 
@@ -29,6 +28,6 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
 
-## Chunk citations
+## Citations
 
-In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `<chunk id='…'>` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/agent.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/agent.py
index 9a694872b..e3c0ab9ae 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/agent.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/agent.py
@@ -7,6 +7,9 @@ from typing import Any
 from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 
+from app.agents.chat.multi_agent_chat.shared.middleware.citation_state import (
+    build_citation_state_mw,
+)
 from app.agents.chat.multi_agent_chat.subagents.shared.md_file_reader import (
     read_md_file,
 )
@@ -31,6 +34,12 @@ def build_subagent(
         or "Handles research tasks for this workspace."
     )
     system_prompt = read_md_file(__package__, "system_prompt").strip()
+    # web_search registers WEB_RESULT citations via Command(update=...); the
+    # citation-state middleware declares the channel so those [n] merge back up.
+    middleware_with_citations = {
+        **(middleware_stack or {}),
+        "citation_state": build_citation_state_mw(),
+    }
     return pack_subagent(
         name=NAME,
         description=description,
@@ -39,5 +48,5 @@ def build_subagent(
         ruleset=RULESET,
         dependencies=dependencies,
         model=model,
-        middleware_stack=middleware_stack,
+        middleware_stack=middleware_with_citations,
     )
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/system_prompt.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/system_prompt.md
index 1b9ccaefa..3d90a4352 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/system_prompt.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/system_prompt.md
@@ -17,6 +17,16 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio
 - Never fabricate facts, citations, URLs, or quote text.
 </tool_policy>
 
+<citations>
+`web_search` returns a `<web_results>` block whose results are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. When a finding came from a specific result, append its `[n]` to that finding, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+
+- Use the exact `[n]` shown next to the result you actually used; never renumber, guess, or invent a label.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `web_search` output this turn. If you can't see it, omit it.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links.
+- Several results behind one finding → each in its own brackets with nothing between: `[1][2]`.
+- `scrape_webpage` returns raw page text with no `[n]` labels; a fact drawn only from a scrape carries no citation (report the URL in `evidence.sources` instead).
+</citations>
+
 <out_of_scope>
 - Do not execute connector mutations (email/calendar/docs/chat writes) or deliverable generation.
 </out_of_scope>
@@ -47,6 +57,6 @@ Return **only** one JSON object (no markdown/prose):
 }
 <include snippet="output_contract_base"/>
 Route-specific rules:
-- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
-- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
+- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Append the supporting `[n]` to each finding drawn from a `web_search` result. Do not paste raw paragraphs, scraped pages, or quote blocks.
+- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once. (Citations travel as `[n]`; `sources` is for transparency and for scrape-only facts that carry no `[n]`.)
 </output_contract>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/__init__.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/__init__.py
index 7234942b6..0c99bf222 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/__init__.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/__init__.py
@@ -1,7 +1,8 @@
-"""Research-stage tools: web search and scrape."""
+"""Research-stage tools: web search (shared) and scrape."""
+
+from app.agents.chat.shared.tools.web_search import create_web_search_tool
 
 from .scrape_webpage import create_scrape_webpage_tool
-from .web_search import create_web_search_tool
 
 __all__ = [
     "create_scrape_webpage_tool",
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/index.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/index.py
index 1e823fafa..5fc2b5699 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/index.py
@@ -7,9 +7,9 @@ from typing import Any
 from langchain_core.tools import BaseTool
 
 from app.agents.chat.multi_agent_chat.shared.permissions import Ruleset
+from app.agents.chat.shared.tools.web_search import create_web_search_tool
 
 from .scrape_webpage import create_scrape_webpage_tool
-from .web_search import create_web_search_tool
 
 NAME = "research"
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/web_search.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/web_search.py
deleted file mode 100644
index 2fe6bd378..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/web_search.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""Real-time web search: SearXNG plus configured live-search connectors (Tavily, Linkup, Baidu, etc.)."""
-
-import asyncio
-import json
-import time
-from typing import Any
-
-from langchain_core.tools import StructuredTool
-from pydantic import BaseModel, Field
-
-from app.db import shielded_async_session
-from app.services.connector_service import ConnectorService
-from app.utils.perf import get_perf_logger
-
-_LIVE_SEARCH_CONNECTORS: set[str] = {
-    "TAVILY_API",
-    "LINKUP_API",
-    "BAIDU_SEARCH_API",
-}
-
-_LIVE_CONNECTOR_SPECS: dict[str, tuple[str, bool, bool, dict[str, Any]]] = {
-    "TAVILY_API": ("search_tavily", False, True, {}),
-    "LINKUP_API": ("search_linkup", False, False, {"mode": "standard"}),
-    "BAIDU_SEARCH_API": ("search_baidu", False, True, {}),
-}
-
-_CONNECTOR_LABELS: dict[str, str] = {
-    "TAVILY_API": "Tavily",
-    "LINKUP_API": "Linkup",
-    "BAIDU_SEARCH_API": "Baidu",
-}
-
-
-class WebSearchInput(BaseModel):
-    """Input schema for the web_search tool."""
-
-    query: str = Field(
-        description="The search query to look up on the web. Use specific, descriptive terms.",
-    )
-    top_k: int = Field(
-        default=10,
-        description="Number of results to retrieve (default: 10, max: 50).",
-    )
-
-
-def _format_web_results(
-    documents: list[dict[str, Any]],
-    *,
-    max_chars: int = 50_000,
-) -> str:
-    """Format web search results into XML suitable for the LLM context."""
-    if not documents:
-        return "No web search results found."
-
-    parts: list[str] = []
-    total_chars = 0
-
-    for doc in documents:
-        doc_info = doc.get("document") or {}
-        metadata = doc_info.get("metadata") or {}
-        title = doc_info.get("title") or "Web Result"
-        url = metadata.get("url") or ""
-        content = (doc.get("content") or "").strip()
-        source = metadata.get("document_type") or doc.get("source") or "WEB_SEARCH"
-        if not content:
-            continue
-
-        metadata_json = json.dumps(metadata, ensure_ascii=False)
-        doc_xml = "\n".join(
-            [
-                "<document>",
-                "<document_metadata>",
-                f"  <document_type>{source}</document_type>",
-                f"  <title><![CDATA[{title}]]></title>",
-                f"  <url><![CDATA[{url}]]></url>",
-                f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-                "</document_metadata>",
-                "<document_content>",
-                f"  <chunk id='{url}'><![CDATA[{content}]]></chunk>",
-                "</document_content>",
-                "</document>",
-                "",
-            ]
-        )
-
-        if total_chars + len(doc_xml) > max_chars:
-            parts.append("<!-- Output truncated to fit context window -->")
-            break
-
-        parts.append(doc_xml)
-        total_chars += len(doc_xml)
-
-    return "\n".join(parts).strip() or "No web search results found."
-
-
-async def _search_live_connector(
-    connector: str,
-    query: str,
-    search_space_id: int,
-    top_k: int,
-    semaphore: asyncio.Semaphore,
-) -> list[dict[str, Any]]:
-    """Dispatch a single live-search connector (Tavily / Linkup / Baidu)."""
-    perf = get_perf_logger()
-    spec = _LIVE_CONNECTOR_SPECS.get(connector)
-    if spec is None:
-        return []
-
-    method_name, _includes_date_range, includes_top_k, extra_kwargs = spec
-    kwargs: dict[str, Any] = {
-        "user_query": query,
-        "search_space_id": search_space_id,
-        **extra_kwargs,
-    }
-    if includes_top_k:
-        kwargs["top_k"] = top_k
-
-    try:
-        t0 = time.perf_counter()
-        async with semaphore, shielded_async_session() as session:
-            svc = ConnectorService(session, search_space_id)
-            _, chunks = await getattr(svc, method_name)(**kwargs)
-            perf.info(
-                "[web_search] connector=%s results=%d in %.3fs",
-                connector,
-                len(chunks),
-                time.perf_counter() - t0,
-            )
-            return chunks
-    except Exception as e:
-        perf.warning("[web_search] connector=%s FAILED: %s", connector, e)
-        return []
-
-
-def create_web_search_tool(
-    search_space_id: int | None = None,
-    available_connectors: list[str] | None = None,
-) -> StructuredTool:
-    """Factory for the ``web_search`` tool.
-
-    Dispatches in parallel to the platform SearXNG instance and any
-    user-configured live-search connectors (Tavily, Linkup, Baidu).
-    """
-    active_live_connectors: list[str] = []
-    if available_connectors:
-        active_live_connectors = [
-            c for c in available_connectors if c in _LIVE_SEARCH_CONNECTORS
-        ]
-
-    engine_names = ["SearXNG (platform default)"]
-    engine_names.extend(_CONNECTOR_LABELS.get(c, c) for c in active_live_connectors)
-    engines_summary = ", ".join(engine_names)
-
-    description = (
-        "Search the web for real-time information. "
-        "Use this for current events, news, prices, weather, public facts, or any "
-        "question that requires up-to-date information from the internet.\n\n"
-        f"Active search engines: {engines_summary}.\n"
-        "All configured engines are queried in parallel and results are merged."
-    )
-
-    _search_space_id = search_space_id
-    _active_live = active_live_connectors
-
-    async def _web_search_impl(query: str, top_k: int = 10) -> str:
-        from app.services import web_search_service
-
-        perf = get_perf_logger()
-        t0 = time.perf_counter()
-        clamped_top_k = min(max(1, top_k), 50)
-
-        semaphore = asyncio.Semaphore(4)
-        tasks: list[asyncio.Task[list[dict[str, Any]]]] = []
-
-        if web_search_service.is_available():
-
-            async def _searxng() -> list[dict[str, Any]]:
-                async with semaphore:
-                    _result_obj, docs = await web_search_service.search(
-                        query=query,
-                        top_k=clamped_top_k,
-                    )
-                    return docs
-
-            tasks.append(asyncio.ensure_future(_searxng()))
-
-        if _search_space_id is not None:
-            for connector in _active_live:
-                tasks.append(
-                    asyncio.ensure_future(
-                        _search_live_connector(
-                            connector=connector,
-                            query=query,
-                            search_space_id=_search_space_id,
-                            top_k=clamped_top_k,
-                            semaphore=semaphore,
-                        )
-                    )
-                )
-
-        if not tasks:
-            return "Web search is not available — no search engines are configured."
-
-        results_lists = await asyncio.gather(*tasks, return_exceptions=True)
-
-        all_documents: list[dict[str, Any]] = []
-        for result in results_lists:
-            if isinstance(result, BaseException):
-                perf.warning("[web_search] a search engine failed: %s", result)
-                continue
-            all_documents.extend(result)
-
-        seen_urls: set[str] = set()
-        deduplicated: list[dict[str, Any]] = []
-        for doc in all_documents:
-            url = ((doc.get("document") or {}).get("metadata") or {}).get("url", "")
-            if url and url in seen_urls:
-                continue
-            if url:
-                seen_urls.add(url)
-            deduplicated.append(doc)
-
-        formatted = _format_web_results(deduplicated)
-
-        perf.info(
-            "[web_search] query=%r engines=%d results=%d deduped=%d chars=%d in %.3fs",
-            query[:60],
-            len(tasks),
-            len(all_documents),
-            len(deduplicated),
-            len(formatted),
-            time.perf_counter() - t0,
-        )
-        return formatted
-
-    return StructuredTool(
-        name="web_search",
-        description=description,
-        coroutine=_web_search_impl,
-        args_schema=WebSearchInput,
-    )
diff --git a/surfsense_backend/app/agents/chat/runtime/mention_resolver.py b/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
index a47ed8f36..4f2f47b24 100644
--- a/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
@@ -74,8 +74,9 @@ class ResolvedMentionSet:
     ``@Project``).
 
     ``mentioned_document_ids`` is an ordered, deduped list consumed by
-    the priority middleware downstream — see
-    ``KnowledgePriorityMiddleware._compute_priority_paths``.
+    the on-demand ``search_knowledge_base`` tool downstream (via
+    ``referenced_document_ids``) to pin @-mentioned docs into the
+    retrieval scope.
     """
 
     mentions: list[ResolvedMention] = field(default_factory=list)
@@ -113,8 +114,8 @@ async def resolve_mentions(
 
     * Legacy clients that haven't migrated to the unified chip list
       still send the id arrays — we treat the union as authoritative.
-    * The id arrays are the canonical input to
-      ``KnowledgePriorityMiddleware`` (via ``SurfSenseContextSchema``);
+    * The id arrays are the canonical input to the retrieval scope
+      (via ``SurfSenseContextSchema`` → ``referenced_document_ids``);
       returning the deduped, validated lists lets the route forward
       them unchanged.
 
diff --git a/surfsense_backend/app/agents/chat/runtime/path_resolver.py b/surfsense_backend/app/agents/chat/runtime/path_resolver.py
index 861f48ee7..84282b63b 100644
--- a/surfsense_backend/app/agents/chat/runtime/path_resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/path_resolver.py
@@ -4,7 +4,6 @@ This module is the single source of truth for mapping ``Document`` rows to
 virtual paths under ``/documents/`` and back. It is used by:
 
 * :class:`KnowledgeTreeMiddleware` (rendering the workspace tree)
-* :class:`KnowledgePriorityMiddleware` (computing priority paths)
 * :class:`KBPostgresBackend` (``als_info`` / ``aread`` / move operations)
 * :class:`KnowledgeBasePersistenceMiddleware` (resolving moves and creates)
 
diff --git a/surfsense_backend/app/agents/chat/runtime/references/__init__.py b/surfsense_backend/app/agents/chat/runtime/references/__init__.py
new file mode 100644
index 000000000..62530fd71
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/__init__.py
@@ -0,0 +1,95 @@
+"""Resolved ``@``-references and their pointer block.
+
+References are scope, not content: they tell the model what the user pointed
+at this turn so it can retrieve from those sources with tools.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.runtime.path_resolver import build_path_index
+from app.schemas.new_chat import MentionedDocumentInfo
+
+from .chat import resolve_chat_references
+from .connectors import resolve_connector_references
+from .documents import referenced_document_ids, resolve_document_references
+from .folders import resolve_folder_references
+from .models import (
+    ChatReference,
+    ConnectorReference,
+    DocumentReference,
+    FolderReference,
+    Reference,
+    ReferenceKind,
+)
+from .reference_pointers import render_reference_pointers
+
+
+async def resolve_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    requesting_user_id: str | None,
+    current_chat_id: int,
+    document_ids: list[int] | None = None,
+    folder_ids: list[int] | None = None,
+    connector_ids: list[int] | None = None,
+    connector_chips: list[MentionedDocumentInfo] | None = None,
+    thread_ids: list[int] | None = None,
+) -> list[Reference]:
+    """Resolve a turn's ``@``-references into one ordered pointer list.
+
+    Order is documents, folders, connectors, chats. The path index is built
+    once and shared by the document and folder resolvers.
+    """
+    references: list[Reference] = []
+
+    if document_ids or folder_ids:
+        index = await build_path_index(session, search_space_id)
+        if document_ids:
+            references += await resolve_document_references(
+                session,
+                search_space_id=search_space_id,
+                document_ids=document_ids,
+                index=index,
+            )
+        if folder_ids:
+            references += await resolve_folder_references(
+                session,
+                search_space_id=search_space_id,
+                folder_ids=folder_ids,
+                index=index,
+            )
+
+    if connector_ids:
+        references += await resolve_connector_references(
+            session,
+            search_space_id=search_space_id,
+            connector_ids=connector_ids,
+            chips=connector_chips,
+        )
+
+    if thread_ids:
+        references += await resolve_chat_references(
+            session,
+            search_space_id=search_space_id,
+            requesting_user_id=requesting_user_id,
+            current_chat_id=current_chat_id,
+            thread_ids=thread_ids,
+        )
+
+    return references
+
+
+__all__ = [
+    "ChatReference",
+    "ConnectorReference",
+    "DocumentReference",
+    "FolderReference",
+    "Reference",
+    "ReferenceKind",
+    "referenced_document_ids",
+    "render_reference_pointers",
+    "resolve_references",
+]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/chat/__init__.py b/surfsense_backend/app/agents/chat/runtime/references/chat/__init__.py
new file mode 100644
index 000000000..841f2291a
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/chat/__init__.py
@@ -0,0 +1,7 @@
+"""Resolve ``@chat`` mentions into pointers, access-checked, titles only."""
+
+from __future__ import annotations
+
+from .resolver import resolve_chat_references
+
+__all__ = ["resolve_chat_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/chat/access.py b/surfsense_backend/app/agents/chat/runtime/references/chat/access.py
new file mode 100644
index 000000000..1f7614b06
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/chat/access.py
@@ -0,0 +1,79 @@
+"""Access-checked lookup of chat threads the requester may read.
+
+The single place chat visibility is enforced: a thread is readable when it is
+shared with the search space, the requester created it, or it is a legacy
+null-creator thread and the requester owns the search space. Anything else is
+dropped (fail-closed).
+"""
+
+from __future__ import annotations
+
+import logging
+from uuid import UUID
+
+from sqlalchemy import or_, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import ChatVisibility, NewChatThread, SearchSpace
+
+logger = logging.getLogger(__name__)
+
+
+def _visibility_predicate(user_uuid: UUID | None, *, include_legacy: bool):
+    """SQL predicate for threads the requester may read."""
+    conditions = [NewChatThread.visibility == ChatVisibility.SEARCH_SPACE]
+    if user_uuid is not None:
+        conditions.append(NewChatThread.created_by_id == user_uuid)
+    if include_legacy:
+        conditions.append(NewChatThread.created_by_id.is_(None))
+    return or_(*conditions)
+
+
+async def accessible_threads(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    requesting_user_id: str | None,
+    thread_ids: list[int],
+    exclude_thread_id: int | None = None,
+) -> list[NewChatThread]:
+    """Threads in this space the requester may read, in requested order.
+
+    Input order is preserved and de-duplicated; ``exclude_thread_id`` (the
+    active chat) is removed so a chat never references itself. Inaccessible or
+    foreign ids are silently dropped.
+    """
+    requested = [tid for tid in dict.fromkeys(thread_ids) if tid != exclude_thread_id]
+    if not requested:
+        return []
+
+    user_uuid: UUID | None = None
+    if requesting_user_id:
+        try:
+            user_uuid = UUID(requesting_user_id)
+        except (TypeError, ValueError):
+            logger.warning(
+                "accessible_threads: invalid user_id=%r; restricting to shared",
+                requesting_user_id,
+            )
+
+    # Legacy null-creator threads are readable only by the search-space owner.
+    include_legacy = False
+    if user_uuid is not None:
+        owner_id = await session.scalar(
+            select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
+        )
+        include_legacy = owner_id == user_uuid
+
+    rows = await session.execute(
+        select(NewChatThread).where(
+            NewChatThread.id.in_(requested),
+            NewChatThread.search_space_id == search_space_id,
+            _visibility_predicate(user_uuid, include_legacy=include_legacy),
+        )
+    )
+    threads_by_id = {row.id: row for row in rows.scalars().all()}
+    return [threads_by_id[tid] for tid in requested if tid in threads_by_id]
+
+
+__all__ = ["accessible_threads"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/chat/resolver.py b/surfsense_backend/app/agents/chat/runtime/references/chat/resolver.py
new file mode 100644
index 000000000..4e267dff3
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/chat/resolver.py
@@ -0,0 +1,41 @@
+"""Resolve ``@chat`` mentions into pointer references.
+
+Chats are not KB-indexed, so a chat reference is a pointer only; its turns are
+read on demand via the chat read tool, not injected here. Only the title is
+needed, so this takes the cheap access-checked path and never loads transcripts.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..models import ChatReference
+from .access import accessible_threads
+
+
+async def resolve_chat_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    requesting_user_id: str | None,
+    current_chat_id: int,
+    thread_ids: list[int],
+) -> list[ChatReference]:
+    """Map ``@chat`` thread ids to access-checked pointers (titles only)."""
+    if not thread_ids:
+        return []
+
+    threads = await accessible_threads(
+        session,
+        search_space_id=search_space_id,
+        requesting_user_id=requesting_user_id,
+        thread_ids=thread_ids,
+        exclude_thread_id=current_chat_id,
+    )
+    return [
+        ChatReference(entity_id=thread.id, label=str(thread.title or "Untitled chat"))
+        for thread in threads
+    ]
+
+
+__all__ = ["resolve_chat_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/connectors.py b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
new file mode 100644
index 000000000..8d5f36133
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
@@ -0,0 +1,83 @@
+"""Resolve ``@connector`` account mentions into references for the pointer block."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import SearchSourceConnector
+from app.schemas.new_chat import MentionedDocumentInfo
+
+from .models import ConnectorReference
+
+
+def connector_pointer_fields(
+    *,
+    account_name: str | None,
+    connector_type: str | None,
+    fallback_name: str | None,
+) -> tuple[str, str | None]:
+    """Pick the account label and provider for a connector pointer.
+
+    Prefers the chip the user selected (``account_name`` / ``connector_type``)
+    and falls back to the stored connector name.
+    """
+    label = account_name or fallback_name or "account"
+    return label, connector_type or None
+
+
+async def resolve_connector_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    connector_ids: list[int],
+    chips: list[MentionedDocumentInfo] | None = None,
+) -> list[ConnectorReference]:
+    """Map ``@connector`` ids to references; ids outside the space are dropped.
+
+    The DB check only confirms the connector belongs to this search space;
+    display fields come from the user's chip.
+    """
+    if not connector_ids:
+        return []
+
+    rows = await session.execute(
+        select(
+            SearchSourceConnector.id,
+            SearchSourceConnector.name,
+            SearchSourceConnector.connector_type,
+        ).where(
+            SearchSourceConnector.search_space_id == search_space_id,
+            SearchSourceConnector.id.in_(connector_ids),
+        )
+    )
+    accessible = {row.id: row for row in rows.all()}
+
+    chip_by_id = {
+        chip.id: chip for chip in (chips or []) if chip.kind == "connector"
+    }
+
+    references: list[ConnectorReference] = []
+    for connector_id in dict.fromkeys(connector_ids):
+        row = accessible.get(connector_id)
+        if row is None:
+            continue
+        chip = chip_by_id.get(connector_id)
+        stored_type = getattr(row.connector_type, "value", row.connector_type)
+        label, provider = connector_pointer_fields(
+            account_name=chip.account_name if chip else None,
+            connector_type=(chip.connector_type if chip else None)
+            or (str(stored_type) if stored_type else None),
+            fallback_name=str(row.name or ""),
+        )
+        references.append(
+            ConnectorReference(
+                entity_id=connector_id,
+                label=label,
+                provider=provider,
+            )
+        )
+    return references
+
+
+__all__ = ["connector_pointer_fields", "resolve_connector_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/documents/__init__.py b/surfsense_backend/app/agents/chat/runtime/references/documents/__init__.py
new file mode 100644
index 000000000..4250ee119
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/__init__.py
@@ -0,0 +1,13 @@
+"""Resolve ``@document`` references.
+
+Two concerns, one subject: ``resolver`` turns document ids into pointer
+references for the model, ``referenced`` turns ``@document`` / ``@folder``
+mentions into the document ids a retrieval is confined to.
+"""
+
+from __future__ import annotations
+
+from .referenced import referenced_document_ids
+from .resolver import resolve_document_references
+
+__all__ = ["referenced_document_ids", "resolve_document_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py b/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
new file mode 100644
index 000000000..4e05fd324
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
@@ -0,0 +1,39 @@
+"""Resolve ``@document`` / ``@folder`` mentions to the documents they point at.
+
+Reference resolution, not retrieval: this answers "which knowledge-base
+documents did the user point at this turn?". ``@document`` ids pass through;
+``@folder`` ids expand to the documents directly inside each folder within this
+search space (direct children only, not nested subfolders). The caller turns the
+returned ids into a retrieval ``SearchScope``.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import Document
+
+
+async def referenced_document_ids(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    document_ids: list[int] | None = None,
+    folder_ids: list[int] | None = None,
+) -> tuple[int, ...]:
+    """Sorted document ids the user pointed at (empty = nothing referenced)."""
+    doc_ids = set(document_ids or [])
+    folders = list(folder_ids or [])
+    if folders:
+        rows = await session.execute(
+            select(Document.id).where(
+                Document.search_space_id == search_space_id,
+                Document.folder_id.in_(folders),
+            )
+        )
+        doc_ids.update(rows.scalars().all())
+    return tuple(sorted(doc_ids))
+
+
+__all__ = ["referenced_document_ids"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py b/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
new file mode 100644
index 000000000..72a459eb9
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
@@ -0,0 +1,58 @@
+"""Resolve ``@document`` ids into references for the pointer block."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.runtime.path_resolver import PathIndex, doc_to_virtual_path
+from app.db import Document
+
+from ..models import DocumentReference
+
+
+async def resolve_document_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    document_ids: list[int],
+    index: PathIndex,
+) -> list[DocumentReference]:
+    """Map document ids to references in input order; unknown ids are dropped.
+
+    Best-effort and fail-closed: an id outside ``search_space_id`` (deleted or
+    foreign) simply does not produce a reference.
+    """
+    if not document_ids:
+        return []
+
+    rows = await session.execute(
+        select(Document).where(
+            Document.search_space_id == search_space_id,
+            Document.id.in_(document_ids),
+        )
+    )
+    documents_by_id = {row.id: row for row in rows.scalars().all()}
+
+    references: list[DocumentReference] = []
+    for document_id in dict.fromkeys(document_ids):
+        document = documents_by_id.get(document_id)
+        if document is None:
+            continue
+        title = str(document.title or "untitled")
+        references.append(
+            DocumentReference(
+                entity_id=document.id,
+                label=title,
+                path=doc_to_virtual_path(
+                    doc_id=document.id,
+                    title=title,
+                    folder_id=document.folder_id,
+                    index=index,
+                ),
+            )
+        )
+    return references
+
+
+__all__ = ["resolve_document_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/folders.py b/surfsense_backend/app/agents/chat/runtime/references/folders.py
new file mode 100644
index 000000000..df0ec457b
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/folders.py
@@ -0,0 +1,54 @@
+"""Resolve ``@folder`` ids into references for the pointer block."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.runtime.path_resolver import DOCUMENTS_ROOT, PathIndex
+from app.db import Folder
+
+from .models import FolderReference
+
+
+def folder_pointer_path(folder_id: int, folder_paths: dict[int, str]) -> str:
+    """Trailing-slash virtual path so the model reads the pointer as a directory."""
+    base = folder_paths.get(folder_id, DOCUMENTS_ROOT)
+    return base if base.endswith("/") else f"{base}/"
+
+
+async def resolve_folder_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    folder_ids: list[int],
+    index: PathIndex,
+) -> list[FolderReference]:
+    """Map folder ids to references in input order; unknown ids are dropped."""
+    if not folder_ids:
+        return []
+
+    rows = await session.execute(
+        select(Folder).where(
+            Folder.search_space_id == search_space_id,
+            Folder.id.in_(folder_ids),
+        )
+    )
+    folders_by_id = {row.id: row for row in rows.scalars().all()}
+
+    references: list[FolderReference] = []
+    for folder_id in dict.fromkeys(folder_ids):
+        folder = folders_by_id.get(folder_id)
+        if folder is None:
+            continue
+        references.append(
+            FolderReference(
+                entity_id=folder.id,
+                label=str(folder.name or "untitled"),
+                path=folder_pointer_path(folder.id, index.folder_paths),
+            )
+        )
+    return references
+
+
+__all__ = ["folder_pointer_path", "resolve_folder_references"]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/models.py b/surfsense_backend/app/agents/chat/runtime/references/models.py
new file mode 100644
index 000000000..8ae151772
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/models.py
@@ -0,0 +1,73 @@
+"""Data shapes for resolved ``@``-references.
+
+One type per kind so each carries exactly the fields it needs: documents and
+folders have a path, connectors have a provider, chats have neither. ``kind`` is
+a class-level discriminator used by the renderer and scope builder.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import ClassVar
+
+
+class ReferenceKind(str, Enum):
+    """What the user pointed at; the value is the label shown to the model."""
+
+    DOCUMENT = "document"
+    FOLDER = "folder"
+    CONNECTOR = "connector"
+    CHAT = "chat"
+
+
+@dataclass(frozen=True)
+class _Reference:
+    """Identity shared by every reference kind."""
+
+    entity_id: int
+    label: str
+
+
+@dataclass(frozen=True)
+class DocumentReference(_Reference):
+    """A referenced document, reachable by its virtual path."""
+
+    path: str
+    kind: ClassVar[ReferenceKind] = ReferenceKind.DOCUMENT
+
+
+@dataclass(frozen=True)
+class FolderReference(_Reference):
+    """A referenced folder, reachable by its virtual path."""
+
+    path: str
+    kind: ClassVar[ReferenceKind] = ReferenceKind.FOLDER
+
+
+@dataclass(frozen=True)
+class ConnectorReference(_Reference):
+    """A referenced connector account; ``provider`` is its type label."""
+
+    provider: str | None = None
+    kind: ClassVar[ReferenceKind] = ReferenceKind.CONNECTOR
+
+
+@dataclass(frozen=True)
+class ChatReference(_Reference):
+    """A referenced chat thread; its turns are read on demand, not here."""
+
+    kind: ClassVar[ReferenceKind] = ReferenceKind.CHAT
+
+
+Reference = DocumentReference | FolderReference | ConnectorReference | ChatReference
+
+
+__all__ = [
+    "ChatReference",
+    "ConnectorReference",
+    "DocumentReference",
+    "FolderReference",
+    "Reference",
+    "ReferenceKind",
+]
diff --git a/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
new file mode 100644
index 000000000..894d844b1
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
@@ -0,0 +1,66 @@
+"""Render resolved references into a ``<referenced_this_turn>`` pointer block.
+
+Pointers, not content: each line names what the user referenced and how to
+reach it (a path, a connector handle, a title) so the model knows what to
+retrieve from. Actual content is pulled later via tools, never injected here.
+"""
+
+from __future__ import annotations
+
+from .models import (
+    ChatReference,
+    ConnectorReference,
+    DocumentReference,
+    FolderReference,
+    Reference,
+)
+
+_HEADER = (
+    "The user pointed at these with @ this turn. They are scope, not content "
+    "— when the question is about them, retrieve from them before answering."
+)
+
+
+def render_reference_pointers(references: list[Reference]) -> str | None:
+    """Render references as one read-only pointer block.
+
+    Returns ``None`` when there is nothing to render so callers can skip the
+    block entirely.
+    """
+    if not references:
+        return None
+
+    lines = [_render_pointer(reference) for reference in references]
+    return (
+        "<referenced_this_turn>\n"
+        f"{_HEADER}\n"
+        + "\n".join(lines)
+        + "\n</referenced_this_turn>"
+    )
+
+
+def _render_pointer(reference: Reference) -> str:
+    """One ``- {kind} {id} — {handle}`` line, shaped per kind."""
+    head = f"- {reference.kind.value} {reference.entity_id} — "
+    return head + _handle(reference)
+
+
+def _handle(reference: Reference) -> str:
+    """The human-reachable handle: a path, a connector provider, or a title."""
+    label = _clean(reference.label)
+    match reference:
+        case DocumentReference() | FolderReference():
+            return f'"{label}" ({reference.path})'
+        case ConnectorReference():
+            provider = _clean(reference.provider) if reference.provider else ""
+            return f"{provider} ({label})" if provider else label
+        case ChatReference():
+            return f'"{label}"'
+
+
+def _clean(text: str) -> str:
+    """Collapse whitespace so a title can't break the one-line pointer."""
+    return " ".join(text.split())
+
+
+__all__ = ["render_reference_pointers"]
diff --git a/surfsense_backend/app/agents/chat/shared/context.py b/surfsense_backend/app/agents/chat/shared/context.py
index 50b761f5b..b543eb6b6 100644
--- a/surfsense_backend/app/agents/chat/shared/context.py
+++ b/surfsense_backend/app/agents/chat/shared/context.py
@@ -11,9 +11,9 @@ MUST live on this context object instead of being captured into a
 middleware ``__init__`` closure. Middlewares read fields back via
 ``runtime.context.<field>``; tools read them via ``runtime.context``.
 
-This object is read inside both ``KnowledgePriorityMiddleware`` (for
-``mentioned_document_ids``) and any future middleware that needs
-per-request state without invalidating the compiled-agent cache.
+This object is read by the ``search_knowledge_base`` tool (for
+``mentioned_document_ids``) and any middleware that needs per-request
+state without invalidating the compiled-agent cache.
 """
 
 from __future__ import annotations
@@ -43,13 +43,12 @@ class SurfSenseContextSchema:
     Phase 1.5 fields:
         search_space_id: Search space the request is scoped to.
         mentioned_document_ids: KB documents the user @-mentioned this turn.
-            Read by ``KnowledgePriorityMiddleware`` to seed its priority
-            list. Stays out of the compiled-agent cache key — that's the
-            whole point of putting it here.
+            Read by the ``search_knowledge_base`` tool to pin these docs
+            into the retrieval scope. Stays out of the compiled-agent cache
+            key — that's the whole point of putting it here.
         mentioned_folder_ids: KB folders the user @-mentioned this turn
-            (cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
-            entries in ``<priority_documents>`` so the agent prioritises
-            walking those folders with ``ls`` / ``find_documents``.
+            (cloud filesystem mode). Pinned into the ``search_knowledge_base``
+            retrieval scope so matches from those folders are prioritised.
         file_operation_contract: One-shot file operation contract for the
             upcoming turn (reserved; not currently populated).
         turn_id / request_id: Correlation IDs surfaced by the streaming
diff --git a/surfsense_backend/app/agents/chat/shared/middleware/compaction.py b/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
index f91af6a70..907d2f27b 100644
--- a/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
+++ b/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
@@ -4,7 +4,7 @@ Extends ``SummarizationMiddleware`` with three SurfSense behaviors:
 
 1. A structured summary template (:data:`SURFSENSE_SUMMARY_PROMPT`) instead of
    the base freeform prompt.
-2. Protected SystemMessages (injected hints like ``<priority_documents>``) are
+2. Protected SystemMessages (injected hints like ``<workspace_tree>``) are
    kept verbatim instead of being summarized away.
 3. ``content=None`` is sanitized before ``get_buffer_string`` (some providers
    stream tool-only AIMessages with ``None`` content, which would crash it).
@@ -77,7 +77,6 @@ Respond ONLY with the structured summary. Do not include any text before or afte
 # compaction step happens *before* re-injection in some paths, so we
 # must preserve them verbatim across the cutoff.
 PROTECTED_SYSTEM_PREFIXES: tuple[str, ...] = (
-    "<priority_documents>",  # KnowledgePriorityMiddleware
     "<workspace_tree>",  # KnowledgeTreeMiddleware
     "<file_operation_contract>",  # reserved file-operation contract prefix
     "<user_memory>",  # MemoryInjectionMiddleware
diff --git a/surfsense_backend/app/agents/chat/shared/tools/web_search.py b/surfsense_backend/app/agents/chat/shared/tools/web_search.py
index c67db541c..424225b30 100644
--- a/surfsense_backend/app/agents/chat/shared/tools/web_search.py
+++ b/surfsense_backend/app/agents/chat/shared/tools/web_search.py
@@ -4,20 +4,40 @@ Web search tool for the SurfSense agent.
 Provides a unified tool for real-time web searches that dispatches to all
 configured search engines: the platform SearXNG instance (always available)
 plus any user-configured live-search connectors (Tavily, Linkup, Baidu).
+
+Each result is registered into the conversation citation registry as a
+``WEB_RESULT`` and rendered with a server-assigned ``[n]`` label, so the model
+cites the web exactly like the knowledge base — one ``[n]`` spine, no special
+web citation form.
 """
 
-import asyncio
-import json
-import time
-from typing import Any
+from __future__ import annotations
 
-from langchain_core.tools import StructuredTool
-from pydantic import BaseModel, Field
+import asyncio
+import time
+from typing import TYPE_CHECKING, Annotated, Any
+from urllib.parse import urlparse
+
+from langchain.tools import ToolRuntime
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import BaseTool, StructuredTool
+from langgraph.types import Command
 
 from app.db import shielded_async_session
 from app.services.connector_service import ConnectorService
 from app.utils.perf import get_perf_logger
 
+if TYPE_CHECKING:
+    from app.agents.chat.multi_agent_chat.shared.document_render import (
+        RenderableDocument,
+    )
+
+# NOTE: imports from ``app.agents.chat.multi_agent_chat`` are done lazily inside
+# the functions below. This module lives under ``app.agents.chat.shared`` but is
+# imported during the ``multi_agent_chat`` package's own init cascade (via the
+# research subagent); importing that package at module load would re-enter a
+# partially-initialized module. Lazy imports break that cycle.
+
 _LIVE_SEARCH_CONNECTORS: set[str] = {
     "TAVILY_API",
     "LINKUP_API",
@@ -37,28 +57,29 @@ _CONNECTOR_LABELS: dict[str, str] = {
 }
 
 
-class WebSearchInput(BaseModel):
-    """Input schema for the web_search tool."""
-
-    query: str = Field(
-        description="The search query to look up on the web. Use specific, descriptive terms.",
-    )
-    top_k: int = Field(
-        default=10,
-        description="Number of results to retrieve (default: 10, max: 50).",
-    )
+def _web_source_label(url: str) -> str:
+    """A compact, human-readable source for the ``<document source=…>`` attr."""
+    domain = urlparse(url).netloc.removeprefix("www.") if url else ""
+    return f"Web · {domain}" if domain else "Web"
 
 
-def _format_web_results(
+def _to_renderable_web_documents(
     documents: list[dict[str, Any]],
     *,
     max_chars: int = 50_000,
-) -> str:
-    """Format web search results into XML suitable for the LLM context."""
-    if not documents:
-        return "No web search results found."
+) -> list[RenderableDocument]:
+    """Map raw web results to renderable documents, one passage (the snippet) each.
 
-    parts: list[str] = []
+    A result with no URL is skipped: ``url`` is the citation locator, so without
+    it the result cannot be registered or resolved.
+    """
+    from app.agents.chat.multi_agent_chat.shared.citations import CitationSourceType
+    from app.agents.chat.multi_agent_chat.shared.document_render import (
+        RenderableDocument,
+        RenderablePassage,
+    )
+
+    renderables: list[RenderableDocument] = []
     total_chars = 0
 
     for doc in documents:
@@ -67,36 +88,28 @@ def _format_web_results(
         title = doc_info.get("title") or "Web Result"
         url = metadata.get("url") or ""
         content = (doc.get("content") or "").strip()
-        source = metadata.get("document_type") or doc.get("source") or "WEB_SEARCH"
-        if not content:
+        if not content or not url:
             continue
 
-        metadata_json = json.dumps(metadata, ensure_ascii=False)
-        doc_xml = "\n".join(
-            [
-                "<document>",
-                "<document_metadata>",
-                f"  <document_type>{source}</document_type>",
-                f"  <title><![CDATA[{title}]]></title>",
-                f"  <url><![CDATA[{url}]]></url>",
-                f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-                "</document_metadata>",
-                "<document_content>",
-                f"  <chunk id='{url}'><![CDATA[{content}]]></chunk>",
-                "</document_content>",
-                "</document>",
-                "",
-            ]
-        )
-
-        if total_chars + len(doc_xml) > max_chars:
-            parts.append("<!-- Output truncated to fit context window -->")
+        total_chars += len(content)
+        if total_chars > max_chars:
             break
 
-        parts.append(doc_xml)
-        total_chars += len(doc_xml)
+        renderables.append(
+            RenderableDocument(
+                title=title,
+                source=_web_source_label(url),
+                passages=[
+                    RenderablePassage(
+                        content=content,
+                        locator={"url": url},
+                        source_type=CitationSourceType.WEB_RESULT,
+                    )
+                ],
+            )
+        )
 
-    return "\n".join(parts).strip() or "No web search results found."
+    return renderables
 
 
 async def _search_live_connector(
@@ -141,7 +154,7 @@ async def _search_live_connector(
 def create_web_search_tool(
     search_space_id: int | None = None,
     available_connectors: list[str] | None = None,
-) -> StructuredTool:
+) -> BaseTool:
     """Factory for the ``web_search`` tool.
 
     Dispatches in parallel to the platform SearXNG instance and any
@@ -168,7 +181,17 @@ def create_web_search_tool(
     _search_space_id = search_space_id
     _active_live = active_live_connectors
 
-    async def _web_search_impl(query: str, top_k: int = 10) -> str:
+    async def _web_search_impl(
+        query: Annotated[
+            str,
+            "The search query to look up on the web. Use specific, descriptive terms.",
+        ],
+        runtime: ToolRuntime,
+        top_k: Annotated[
+            int,
+            "Number of results to retrieve (default: 10, max: 50).",
+        ] = 10,
+    ) -> Command | str:
         from app.services import web_search_service
 
         perf = get_perf_logger()
@@ -226,22 +249,39 @@ def create_web_search_tool(
                 seen_urls.add(url)
             deduplicated.append(doc)
 
-        formatted = _format_web_results(deduplicated)
+        from app.agents.chat.multi_agent_chat.shared.citations import load_registry
+        from app.agents.chat.multi_agent_chat.shared.document_render import (
+            render_web_results,
+        )
+
+        registry = load_registry(getattr(runtime, "state", None))
+        renderables = _to_renderable_web_documents(deduplicated)
+        rendered = render_web_results(renderables, registry)
 
         perf.info(
-            "[web_search] query=%r engines=%d results=%d deduped=%d chars=%d in %.3fs",
+            "[web_search] query=%r engines=%d results=%d deduped=%d renderable=%d in %.3fs",
             query[:60],
             len(tasks),
             len(all_documents),
             len(deduplicated),
-            len(formatted),
+            len(renderables),
             time.perf_counter() - t0,
         )
-        return formatted
 
-    return StructuredTool(
+        if rendered is None:
+            return "No web search results found."
+
+        return Command(
+            update={
+                "messages": [
+                    ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
+                ],
+                "citation_registry": registry,
+            }
+        )
+
+    return StructuredTool.from_function(
         name="web_search",
         description=description,
         coroutine=_web_search_impl,
-        args_schema=WebSearchInput,
     )
diff --git a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
index b2f441961..e1ba32ce9 100644
--- a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
+++ b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
@@ -78,7 +78,7 @@ async def _resolve_mention_context(
     Automation always runs in cloud filesystem mode, so we mirror the chat
     ``new_chat`` flow: substitute ``@title`` tokens with canonical
     ``/documents/...`` paths, prepend a ``<mentioned_connectors>`` block, and
-    build a ``SurfSenseContextSchema`` that ``KnowledgePriorityMiddleware``
+    build a ``SurfSenseContextSchema`` that the ``search_knowledge_base`` tool
     reads via ``runtime.context``. Returns ``(query, None)`` unchanged when
     there are no mentions.
     """
@@ -210,7 +210,7 @@ async def run_agent_task(
             runtime_context.turn_id = turn_id
 
         # The compiled graph declares ``context_schema=SurfSenseContextSchema``;
-        # mentions only reach ``KnowledgePriorityMiddleware`` via ``context=``.
+        # mentions only reach the ``search_knowledge_base`` tool via ``context=``.
         invoke_kwargs: dict[str, Any] = {"config": config}
         if runtime_context is not None:
             invoke_kwargs["context"] = runtime_context
diff --git a/surfsense_backend/app/prompts/default_system_instructions.py b/surfsense_backend/app/prompts/default_system_instructions.py
deleted file mode 100644
index b968fc1f0..000000000
--- a/surfsense_backend/app/prompts/default_system_instructions.py
+++ /dev/null
@@ -1,135 +0,0 @@
-"""
-Thin compatibility wrapper around :mod:`app.prompts.system_prompt_composer.composer`.
-
-The composer split the previous monolithic prompt string into a fragment
-tree under ``prompts/`` plus a model-family dispatch step (see the
-composer module docstring for credits). This module preserves the public
-function surface (``build_surfsense_system_prompt`` /
-``build_configurable_system_prompt`` /
-``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
-that existing call sites — the multi-agent chat factory, anonymous chat
-routes, and the configurable-prompt admin path — keep working without churn.
-
-For new call sites prefer importing ``compose_system_prompt`` directly
-from :mod:`app.prompts.system_prompt_composer.composer`.
-"""
-
-from __future__ import annotations
-
-from datetime import UTC, datetime
-
-from app.db import ChatVisibility
-
-from .system_prompt_composer.composer import (
-    _read_fragment,
-    compose_system_prompt,
-    detect_provider_variant,
-)
-
-# Optional routing fragments under ``prompts/routing/`` (see composer).
-_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
-
-# Public re-exports for backwards compatibility (some legacy code reads the
-# raw default-instructions text directly).
-SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
-    "<system_instruction>\nDefault SurfSense agent system instructions are now\n"
-    "composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
-    "</system_instruction>"
-)
-
-# Citation block re-exposed for legacy importers that referenced this constant
-# directly. The composer is the canonical source; this is a frozen snapshot
-# loaded at module-init time.
-SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
-SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
-
-
-def build_surfsense_system_prompt(
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    *,
-    model_name: str | None = None,
-) -> str:
-    """Build the default SurfSense system prompt (citations on, defaults).
-
-    See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
-    for full parameter docs.
-    """
-    return compose_system_prompt(
-        today=today,
-        thread_visibility=thread_visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-        mcp_connector_tools=mcp_connector_tools,
-        citations_enabled=True,
-        model_name=model_name,
-        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
-    )
-
-
-def build_configurable_system_prompt(
-    custom_system_instructions: str | None = None,
-    use_default_system_instructions: bool = True,
-    citations_enabled: bool = True,
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    *,
-    model_name: str | None = None,
-) -> str:
-    """Build a configurable SurfSense system prompt.
-
-    See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
-    for full parameter docs.
-    """
-    return compose_system_prompt(
-        today=today,
-        thread_visibility=thread_visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-        mcp_connector_tools=mcp_connector_tools,
-        custom_system_instructions=custom_system_instructions,
-        use_default_system_instructions=use_default_system_instructions,
-        citations_enabled=citations_enabled,
-        model_name=model_name,
-        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
-    )
-
-
-def get_default_system_instructions() -> str:
-    """Return the default ``<system_instruction>`` block (no tools / citations).
-
-    Useful for populating the UI when editing custom system instructions.
-    The output reflects the current fragment tree, not a baked-in constant.
-    """
-    resolved_today = datetime.now(UTC).date().isoformat()
-    from .system_prompt_composer.composer import (
-        _build_system_instructions,  # local import
-    )
-
-    return _build_system_instructions(
-        visibility=ChatVisibility.PRIVATE,
-        resolved_today=resolved_today,
-    ).strip()
-
-
-# Backwards compatibility — some modules import the constant directly.
-SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
-
-
-__all__ = [
-    "SURFSENSE_CITATION_INSTRUCTIONS",
-    "SURFSENSE_NO_CITATION_INSTRUCTIONS",
-    "SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE",
-    "SURFSENSE_SYSTEM_PROMPT",
-    "build_configurable_system_prompt",
-    "build_surfsense_system_prompt",
-    "compose_system_prompt",
-    "detect_provider_variant",
-    "get_default_system_instructions",
-]
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/__init__.py b/surfsense_backend/app/prompts/system_prompt_composer/__init__.py
deleted file mode 100644
index c91bb8a0b..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""SurfSense agent prompt fragments.
-
-The prompt is composed at runtime by :mod:`composer` from the markdown
-fragments under ``base/``, ``providers/``, ``tools/``, ``examples/``, and
-``routing/``. ``system_prompt.py`` is now a thin wrapper that delegates
-to :func:`composer.compose_system_prompt`.
-"""
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/__init__.py b/surfsense_backend/app/prompts/system_prompt_composer/base/__init__.py
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/agent_private.md b/surfsense_backend/app/prompts/system_prompt_composer/base/agent_private.md
deleted file mode 100644
index 88554ad4e..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/agent_private.md
+++ /dev/null
@@ -1,7 +0,0 @@
-You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base.
-
-Today's date (UTC): {resolved_today}
-
-When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
-
-NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/agent_team.md b/surfsense_backend/app/prompts/system_prompt_composer/base/agent_team.md
deleted file mode 100644
index 5fd56ae1b..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/agent_team.md
+++ /dev/null
@@ -1,9 +0,0 @@
-You are SurfSense, a reasoning and acting AI agent designed to answer questions in this team space using the team's shared knowledge base.
-
-In this team thread, each message is prefixed with **[DisplayName of the author]**. Use this to attribute and reference the author of anything in the discussion (who asked a question, made a suggestion, or contributed an idea) and to cite who said what in your answers.
-
-Today's date (UTC): {resolved_today}
-
-When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
-
-NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/citations_off.md b/surfsense_backend/app/prompts/system_prompt_composer/base/citations_off.md
deleted file mode 100644
index 8288886e9..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/citations_off.md
+++ /dev/null
@@ -1,16 +0,0 @@
-<citation_instructions>
-IMPORTANT: Citations are DISABLED for this configuration.
-
-DO NOT include any citations in your responses. Specifically:
-1. Do NOT use the [citation:chunk_id] format anywhere in your response.
-2. Do NOT reference document IDs, chunk IDs, or source IDs.
-3. Simply provide the information naturally without any citation markers.
-4. Write your response as if you're having a normal conversation, incorporating the information from your knowledge seamlessly.
-
-When answering questions based on documents from the knowledge base:
-- Present the information directly and confidently
-- Do not mention that information comes from specific documents or chunks
-- Integrate facts naturally into your response without attribution markers
-
-Your goal is to provide helpful, informative answers in a clean, readable format without any citation notation.
-</citation_instructions>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/citations_on.md b/surfsense_backend/app/prompts/system_prompt_composer/base/citations_on.md
deleted file mode 100644
index 3562ce66e..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/citations_on.md
+++ /dev/null
@@ -1,89 +0,0 @@
-<citation_instructions>
-CRITICAL CITATION REQUIREMENTS:
-
-1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `<chunk id='...'>` tag inside `<document_content>`.
-2. Make sure ALL factual statements from the documents have proper citations.
-3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2].
-4. You MUST use the exact chunk_id values from the `<chunk id='...'>` attributes. Do not create your own citation numbers.
-5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value.
-6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags.
-7. Do not return citations as clickable links.
-8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only.
-9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting.
-10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `<chunk id='...'>` tags.
-11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up.
-
-<document_structure_example>
-The documents you receive are structured like this:
-
-**Knowledge base documents (numeric chunk IDs):**
-<document>
-<document_metadata>
-  <document_id>42</document_id>
-  <document_type>GITHUB_CONNECTOR</document_type>
-  <title><![CDATA[Some repo / file / issue title]]></title>
-  <url><![CDATA[https://example.com]]></url>
-  <metadata_json><![CDATA[{{"any":"other metadata"}}]]></metadata_json>
-</document_metadata>
-
-<document_content>
-  <chunk id='123'><![CDATA[First chunk text...]]></chunk>
-  <chunk id='124'><![CDATA[Second chunk text...]]></chunk>
-</document_content>
-</document>
-
-**Web search results (URL chunk IDs):**
-<document>
-<document_metadata>
-  <document_type>WEB_SEARCH</document_type>
-  <title><![CDATA[Some web search result]]></title>
-  <url><![CDATA[https://example.com/article]]></url>
-</document_metadata>
-
-<document_content>
-  <chunk id='https://example.com/article'><![CDATA[Content from web search...]]></chunk>
-</document_content>
-</document>
-
-IMPORTANT: You MUST cite using the EXACT chunk ids from the `<chunk id='...'>` tags.
-- For knowledge base documents, chunk ids are numeric (e.g. 123, 124) or prefixed (e.g. doc-45).
-- For live web search results, chunk ids are URLs (e.g. https://example.com/article).
-Do NOT cite document_id. Always use the chunk id.
-</document_structure_example>
-
-<citation_format>
-- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `<chunk id='...'>` tag
-- Citations should appear at the end of the sentence containing the information they support
-- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3]
-- No need to return references section. Just citations in answer.
-- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
-- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
-- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
-- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
-- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
-</citation_format>
-
-<citation_examples>
-CORRECT citation formats:
-- [citation:5] (numeric chunk ID from knowledge base)
-- [citation:https://example.com/article] (URL chunk ID from web search results)
-- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
-
-INCORRECT citation formats (DO NOT use):
-- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense))
-- Using parentheses around brackets: ([citation:5])
-- Using hyperlinked text: [link to source 5](https://example.com)
-- Using footnote style: ... library¹
-- Making up source IDs when source_id is unknown
-- Using old IEEE format: [1], [2], [3]
-- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5]
-</citation_examples>
-
-<citation_output_example>
-Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5].
-
-According to web search results, the key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:https://docs.python.org/3/library/asyncio.html]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources.
-
-However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead.
-</citation_output_example>
-</citation_instructions>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_private.md b/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_private.md
deleted file mode 100644
index 073b75fa5..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_private.md
+++ /dev/null
@@ -1,15 +0,0 @@
-<knowledge_base_only_policy>
-CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
-- You MUST answer questions ONLY using information retrieved from the user's knowledge base, web search results, scraped webpages, or other tool outputs.
-- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless the user explicitly grants permission.
-- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
-  1. Inform the user that you could not find relevant information in their knowledge base.
-  2. Ask the user: "Would you like me to answer from my general knowledge instead?"
-  3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
-- This policy does NOT apply to:
-  * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
-  * Formatting, summarization, or analysis of content already present in the conversation
-  * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
-  * Tool-usage actions like generating reports, podcasts, images, or scraping webpages
-  * Queries about services that have direct tools (Linear, ClickUp, Jira, Slack, Airtable) — see <tool_routing> below
-</knowledge_base_only_policy>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_team.md b/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_team.md
deleted file mode 100644
index 1a43ed490..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_team.md
+++ /dev/null
@@ -1,15 +0,0 @@
-<knowledge_base_only_policy>
-CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
-- You MUST answer questions ONLY using information retrieved from the team's shared knowledge base, web search results, scraped webpages, or other tool outputs.
-- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless a team member explicitly grants permission.
-- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
-  1. Inform the team that you could not find relevant information in the shared knowledge base.
-  2. Ask: "Would you like me to answer from my general knowledge instead?"
-  3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
-- This policy does NOT apply to:
-  * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
-  * Formatting, summarization, or analysis of content already present in the conversation
-  * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
-  * Tool-usage actions like generating reports, podcasts, images, or scraping webpages
-  * Queries about services that have direct tools (Linear, ClickUp, Jira, Slack, Airtable) — see <tool_routing> below
-</knowledge_base_only_policy>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_private.md b/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_private.md
deleted file mode 100644
index 22fed418a..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_private.md
+++ /dev/null
@@ -1,12 +0,0 @@
-<memory_protocol>
-IMPORTANT — After understanding each user message, ALWAYS check: does this message
-reveal durable facts about the user (role, interests, preferences, projects,
-background, or standing instructions)? If yes, you MUST call update_memory
-alongside your normal response — do not defer this to a later turn.
-
-Memory is stored as a heading-based markdown document. New entries should be
-under `##` headings such as `## Facts`, `## Preferences`, or `## Instructions`
-with bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy
-`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write
-new saves in the heading-based format.
-</memory_protocol>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_team.md b/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_team.md
deleted file mode 100644
index 38ec798c0..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_team.md
+++ /dev/null
@@ -1,14 +0,0 @@
-<memory_protocol>
-IMPORTANT — After understanding each user message, ALWAYS check: does this message
-reveal durable facts about the team (decisions, conventions, architecture, processes,
-or key facts)? If yes, you MUST call update_memory alongside your normal response —
-do not defer this to a later turn.
-
-Team memory is stored as a heading-based markdown document. New entries should
-be under `##` headings such as `## Product Decisions`,
-`## Engineering Conventions`, `## Project Facts`, or `## Open Questions` with
-bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy
-`(YYYY-MM-DD) [fact]` markers, preserve the information but write new saves in
-the heading-based format. Do not create personal headings such as
-`## Preferences` or `## Instructions`.
-</memory_protocol>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/parameter_resolution.md b/surfsense_backend/app/prompts/system_prompt_composer/base/parameter_resolution.md
deleted file mode 100644
index 77be4d87c..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/parameter_resolution.md
+++ /dev/null
@@ -1,39 +0,0 @@
-<parameter_resolution>
-Some service tools require identifiers or context you do not have (account IDs,
-workspace names, channel IDs, project keys, etc.). NEVER ask the user for raw
-IDs or technical identifiers — they cannot memorise them.
-
-Instead, follow this discovery pattern:
-1. Call a listing/discovery tool to find available options.
-2. ONE result → use it silently, no question to the user.
-3. MULTIPLE results → present the options by their display names and let the
-   user choose. Never show raw UUIDs — always use friendly names.
-
-Discovery tools by level:
-- Which account/workspace? → get_connected_accounts("<service>")
-- Which Jira site (cloudId)? → getAccessibleAtlassianResources
-- Which Jira project?  → getVisibleJiraProjects (after resolving cloudId)
-- Which Jira issue type? → getJiraProjectIssueTypesMetadata (after resolving project)
-- Which channel?  → slack_search_channels
-- Which base?     → list_bases
-- Which table?    → list_tables_for_base (after resolving baseId)
-- Which task?     → clickup_search
-- Which issue?    → list_issues (Linear) or searchJiraIssuesUsingJql (Jira)
-
-For Jira specifically: ALWAYS call getAccessibleAtlassianResources first to
-obtain the cloudId, then pass it to other Jira tools. When creating an issue,
-chain: getAccessibleAtlassianResources → getVisibleJiraProjects → createJiraIssue.
-If there is only one option at each step, use it silently. If multiple, present
-friendly names.
-
-Chain discovery when needed — e.g. for Airtable records: list_bases → pick
-base → list_tables_for_base → pick table → list_records_for_table.
-
-MULTI-ACCOUNT TOOL NAMING: When the user has multiple accounts connected for
-the same service, tool names are prefixed to avoid collisions — e.g.
-linear_25_list_issues and linear_30_list_issues instead of two list_issues.
-Each prefixed tool's description starts with [Account: <display_name>] so you
-know which account it targets. Use get_connected_accounts("<service>") to see
-the full list of accounts with their connector IDs and display names.
-When only one account is connected, tools have their normal unprefixed names.
-</parameter_resolution>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_private.md b/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_private.md
deleted file mode 100644
index 9121de879..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_private.md
+++ /dev/null
@@ -1,24 +0,0 @@
-<tool_routing>
-CRITICAL — You have direct tools for these services: Linear, ClickUp, Jira, Slack, Airtable.
-Their data is NEVER in the knowledge base. You MUST call their tools immediately — never
-say "I don't see it in the knowledge base" or ask the user if they want you to check.
-Ignore any knowledge base results for these services.
-
-When to use which tool:
-- Linear (issues, teams, users, projects when MCP exposes them) → hosted Linear MCP read tools (e.g. `list_issues`, `get_issue`, `list_teams`, `list_users`, …) and `save_issue` for create/update; native SurfSense Linear issue tools when present. For **multi-step Linear-only** work (several reads, structured evidence), delegate with the `task` tool to subagent **`linear_specialist`** instead of mixing unrelated tools.
-- ClickUp (tasks) → clickup_search, clickup_get_task
-- Jira (issues) → getAccessibleAtlassianResources (cloudId discovery), getVisibleJiraProjects (project discovery), getJiraProjectIssueTypesMetadata (issue type discovery), searchJiraIssuesUsingJql, createJiraIssue, editJiraIssue
-- Slack (messages, channels) → `slack_search_channels`, `slack_read_channel`, `slack_read_thread`, and other `slack_*` tools when connected. For **multi-step Slack-only** work, delegate with `task` to **`slack_specialist`**.
-- Airtable (bases, tables, records) → list_bases, list_tables_for_base, list_records_for_table
-- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
-- Real-time public web data → call web_search
-- Reading a specific webpage → call scrape_webpage
-- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
-
-**`task` subagents (when to delegate):**
-- **`linear_specialist`** — Linear-only investigations and tool use.
-- **`slack_specialist`** — Slack-only investigations and tool use.
-- **`connector_negotiator`** — **Cross-connector** chains (e.g. data from Slack then action in Linear).
-- **`explore`** — Read-only KB + web research with citations.
-- **`report_writer`** — Single `generate_report` deliverable.
-</tool_routing>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_team.md b/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_team.md
deleted file mode 100644
index c5383be77..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_team.md
+++ /dev/null
@@ -1,24 +0,0 @@
-<tool_routing>
-CRITICAL — You have direct tools for these services: Linear, ClickUp, Jira, Slack, Airtable.
-Their data is NEVER in the knowledge base. You MUST call their tools immediately — never
-say "I don't see it in the knowledge base" or ask if they want you to check.
-Ignore any knowledge base results for these services.
-
-When to use which tool:
-- Linear (issues, teams, users, projects when MCP exposes them) → hosted Linear MCP read tools (e.g. `list_issues`, `get_issue`, `list_teams`, `list_users`, …) and `save_issue` for create/update; native SurfSense Linear issue tools when present. For **multi-step Linear-only** work (several reads, structured evidence), delegate with the `task` tool to subagent **`linear_specialist`** instead of mixing unrelated tools.
-- ClickUp (tasks) → clickup_search, clickup_get_task
-- Jira (issues) → getAccessibleAtlassianResources (cloudId discovery), getVisibleJiraProjects (project discovery), getJiraProjectIssueTypesMetadata (issue type discovery), searchJiraIssuesUsingJql, createJiraIssue, editJiraIssue
-- Slack (messages, channels) → `slack_search_channels`, `slack_read_channel`, `slack_read_thread`, and other `slack_*` tools when connected. For **multi-step Slack-only** work, delegate with `task` to **`slack_specialist`**.
-- Airtable (bases, tables, records) → list_bases, list_tables_for_base, list_records_for_table
-- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
-- Real-time public web data → call web_search
-- Reading a specific webpage → call scrape_webpage
-- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
-
-**`task` subagents (when to delegate):**
-- **`linear_specialist`** — Linear-only investigations and tool use.
-- **`slack_specialist`** — Slack-only investigations and tool use.
-- **`connector_negotiator`** — **Cross-connector** chains (e.g. data from Slack then action in Linear).
-- **`explore`** — Read-only KB + web research with citations.
-- **`report_writer`** — Single `generate_report` deliverable.
-</tool_routing>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/composer.py b/surfsense_backend/app/prompts/system_prompt_composer/composer.py
deleted file mode 100644
index c639d4aa0..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/composer.py
+++ /dev/null
@@ -1,403 +0,0 @@
-"""
-Prompt composer for the SurfSense ``new_chat`` agent.
-
-This module assembles the agent's system prompt from the markdown fragments
-under :mod:`app.prompts.system_prompt_composer`. It replaces the monolithic
-``system_prompt.py`` with a clean, fragment-based composition:
-
-::
-
-    prompts/
-      base/                  # agent identity, KB policy, tool routing, …
-      providers/             # provider-specific tweaks (anthropic, gpt5, …)
-      tools/                 # one ``<name>.md`` per tool
-      examples/              # one ``<name>.md`` per tool with call examples
-      routing/               # connector-specific routing notes (linear, slack, …)
-
-The model-family dispatch step (see :func:`detect_provider_variant`)
-mirrors OpenCode's ``packages/opencode/src/session/system.ts`` — different
-model families respond best to differently-styled prompts (Claude likes
-XML/narrative, GPT-5 wants channel-aware pragmatic, Codex needs
-terse/file:line, Gemini wants formal numbered steps, etc.). LangChain's
-``dynamic_prompt`` helper supports per-call prompt swaps but ships no
-out-of-the-box family classifier, so we keep our own.
-
-Backwards compatibility
-=======================
-
-``system_prompt.py`` re-exports :func:`compose_system_prompt` and wraps it
-in functions with the same signatures as the legacy
-``build_surfsense_system_prompt`` / ``build_configurable_system_prompt`` so
-existing call sites do not change.
-"""
-
-from __future__ import annotations
-
-import re
-from collections.abc import Iterable
-from datetime import UTC, datetime
-from importlib import resources
-
-from app.db import ChatVisibility
-
-# -----------------------------------------------------------------------------
-# Provider variant detection
-# -----------------------------------------------------------------------------
-
-# String literal alias for the supported provider-specific prompt variants.
-# When adding a new variant, also drop a matching ``providers/<variant>.md``
-# file in this package and (if appropriate) extend the regex matchers below.
-#
-# Stylistic clusters: each variant is a focused style nudge, NOT a full
-# system prompt — the main prompt is already assembled from base/ +
-# tools/ + routing/. The clustering itself (which models map to which
-# style) follows OpenCode's ``system.ts`` family table; see the module
-# docstring for credits.
-ProviderVariant = str
-# Known values:
-#   "anthropic"        — Claude family (XML-friendly, narrative todos)
-#   "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
-#   "openai_classic"   — GPT-4 family (autonomous persistence)
-#   "openai_codex"     — gpt-*-codex (code-purist, terse, file:line refs)
-#   "google"           — Gemini (formal, <3-line, numbered workflow)
-#   "kimi"             — Moonshot Kimi-K* (action-bias, parallel tools)
-#   "grok"             — xAI Grok (extreme-terse, one-word ok)
-#   "deepseek"         — DeepSeek V3 / R1 (terse, R1-aware reasoning)
-#   "default"          — fallback, no provider-specific block emitted
-
-# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
-# More specific patterns must come first (e.g. ``codex`` before
-# ``openai_reasoning`` because codex model ids contain ``gpt``).
-
-_OPENAI_CODEX_RE = re.compile(
-    r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE
-)
-_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
-_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
-_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
-_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
-_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
-_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
-_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
-
-
-def detect_provider_variant(model_name: str | None) -> ProviderVariant:
-    """Pick a provider-specific prompt variant from a model id string.
-
-    Heuristic match on the model id; returns ``"default"`` when nothing
-    matches so the composer can fall back to the empty placeholder file.
-
-    Order is significant: more-specific patterns are tried first so
-    ``gpt-5-codex`` routes to ``"openai_codex"`` rather than
-    ``"openai_reasoning"`` — same dispatch order as OpenCode's
-    ``packages/opencode/src/session/system.ts``.
-    """
-    if not model_name:
-        return "default"
-    name = model_name.strip()
-    if _OPENAI_CODEX_RE.search(name):
-        return "openai_codex"
-    if _OPENAI_REASONING_RE.search(name):
-        return "openai_reasoning"
-    if _OPENAI_CLASSIC_RE.search(name):
-        return "openai_classic"
-    if _ANTHROPIC_RE.search(name):
-        return "anthropic"
-    if _GOOGLE_RE.search(name):
-        return "google"
-    if _KIMI_RE.search(name):
-        return "kimi"
-    if _GROK_RE.search(name):
-        return "grok"
-    if _DEEPSEEK_RE.search(name):
-        return "deepseek"
-    return "default"
-
-
-# -----------------------------------------------------------------------------
-# Fragment loading
-# -----------------------------------------------------------------------------
-
-
-_PROMPTS_PACKAGE = "app.prompts.system_prompt_composer"
-
-
-def _read_fragment(subpath: str) -> str:
-    """Read a fragment file from the ``prompts/`` resource tree.
-
-    Returns the raw contents stripped of any single trailing newline so
-    composition can append explicit separators without compounding blank
-    lines. Missing files return an empty string so optional fragments
-    (e.g. provider hints) act as no-ops.
-    """
-    parts = subpath.split("/")
-    try:
-        ref = resources.files(_PROMPTS_PACKAGE).joinpath(*parts)
-        if not ref.is_file():
-            return ""
-        text = ref.read_text(encoding="utf-8")
-    except (FileNotFoundError, ModuleNotFoundError):
-        return ""
-    if text.endswith("\n"):
-        text = text[:-1]
-    return text
-
-
-# -----------------------------------------------------------------------------
-# Tool ordering + memory variant resolution
-# -----------------------------------------------------------------------------
-
-
-# Ordered for reading flow: fundamentals first, then artifact generators,
-# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
-ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
-    "web_search",
-    "generate_podcast",
-    "generate_video_presentation",
-    "generate_report",
-    "generate_resume",
-    "generate_image",
-    "scrape_webpage",
-    "update_memory",
-)
-
-
-_MEMORY_VARIANT_TOOLS: frozenset[str] = frozenset({"update_memory"})
-
-
-def _tool_fragment_path(tool_name: str, variant: str) -> str:
-    """Resolve a tool's instruction fragment path.
-
-    Tools listed in :data:`_MEMORY_VARIANT_TOOLS` switch on the conversation
-    visibility and load ``tools/<name>_<variant>.md``; everything else
-    falls back to ``tools/<name>.md``.
-    """
-    if tool_name in _MEMORY_VARIANT_TOOLS:
-        return f"tools/{tool_name}_{variant}.md"
-    return f"tools/{tool_name}.md"
-
-
-def _example_fragment_path(tool_name: str, variant: str) -> str:
-    if tool_name in _MEMORY_VARIANT_TOOLS:
-        return f"examples/{tool_name}_{variant}.md"
-    return f"examples/{tool_name}.md"
-
-
-def _format_tool_label(tool_name: str) -> str:
-    return tool_name.replace("_", " ").title()
-
-
-# -----------------------------------------------------------------------------
-# Section builders
-# -----------------------------------------------------------------------------
-
-
-def _build_system_instructions(
-    *,
-    visibility: ChatVisibility,
-    resolved_today: str,
-) -> str:
-    """Reconstruct the legacy ``<system_instruction>`` block from fragments."""
-    variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
-
-    sections = [
-        _read_fragment(f"base/agent_{variant}.md"),
-        _read_fragment(f"base/kb_only_policy_{variant}.md"),
-        _read_fragment(f"base/tool_routing_{variant}.md"),
-        _read_fragment("base/parameter_resolution.md"),
-        _read_fragment(f"base/memory_protocol_{variant}.md"),
-    ]
-    body = "\n\n".join(s for s in sections if s)
-    block = f"\n<system_instruction>\n{body}\n\n</system_instruction>\n"
-    return block.format(resolved_today=resolved_today)
-
-
-def _build_mcp_routing_block(
-    mcp_connector_tools: dict[str, list[str]] | None,
-) -> str:
-    """Emit the ``<mcp_tool_routing>`` block when at least one MCP server is wired."""
-    if not mcp_connector_tools:
-        return ""
-    lines: list[str] = [
-        "\n<mcp_tool_routing>",
-        "You also have direct tools from these user-connected MCP servers.",
-        "Their data is NEVER in the knowledge base — call their tools directly.",
-        "",
-    ]
-    for server_name, tool_names in mcp_connector_tools.items():
-        lines.append(f"- {server_name} → {', '.join(tool_names)}")
-    lines.append("</mcp_tool_routing>\n")
-    return "\n".join(lines)
-
-
-def _build_tools_section(
-    *,
-    visibility: ChatVisibility,
-    enabled_tool_names: set[str] | None,
-    disabled_tool_names: set[str] | None,
-) -> str:
-    """Reconstruct the ``<tools>`` block + ``<tool_call_examples>`` block."""
-    variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
-
-    parts: list[str] = []
-    preamble = _read_fragment("tools/_preamble.md")
-    if preamble:
-        parts.append(preamble + "\n")
-
-    examples: list[str] = []
-
-    for tool_name in ALL_TOOL_NAMES_ORDERED:
-        if enabled_tool_names is not None and tool_name not in enabled_tool_names:
-            continue
-
-        instruction = _read_fragment(_tool_fragment_path(tool_name, variant))
-        if instruction:
-            parts.append(instruction + "\n")
-
-        example = _read_fragment(_example_fragment_path(tool_name, variant))
-        if example:
-            examples.append(example + "\n")
-
-    known_disabled = (
-        set(disabled_tool_names) & set(ALL_TOOL_NAMES_ORDERED)
-        if disabled_tool_names
-        else set()
-    )
-    if known_disabled:
-        disabled_list = ", ".join(
-            _format_tool_label(n) for n in ALL_TOOL_NAMES_ORDERED if n in known_disabled
-        )
-        parts.append(
-            "\n"
-            "DISABLED TOOLS (by user):\n"
-            f"The following tools are available in SurfSense but have been disabled by the user for this session: {disabled_list}.\n"
-            "You do NOT have access to these tools and MUST NOT claim you can use them.\n"
-            "If the user asks about a capability provided by a disabled tool, let them know the relevant tool\n"
-            "is currently disabled and they can re-enable it.\n"
-        )
-
-    parts.append("\n</tools>\n")
-
-    if examples:
-        parts.append("<tool_call_examples>")
-        parts.extend(examples)
-        parts.append("</tool_call_examples>\n")
-
-    return "".join(parts)
-
-
-def _build_provider_block(provider_variant: ProviderVariant) -> str:
-    """Optional provider-tuned hints. Empty for ``"default"``."""
-    if not provider_variant or provider_variant == "default":
-        return ""
-    text = _read_fragment(f"providers/{provider_variant}.md")
-    return f"\n{text}\n" if text else ""
-
-
-def _build_routing_block(connector_routing: Iterable[str] | None) -> str:
-    if not connector_routing:
-        return ""
-    fragments: list[str] = []
-    for name in connector_routing:
-        text = _read_fragment(f"routing/{name}.md")
-        if text:
-            fragments.append(text)
-    if not fragments:
-        return ""
-    return "\n" + "\n\n".join(fragments) + "\n"
-
-
-def _build_citation_block(citations_enabled: bool) -> str:
-    fragment = (
-        _read_fragment("base/citations_on.md")
-        if citations_enabled
-        else _read_fragment("base/citations_off.md")
-    )
-    return f"\n{fragment}\n" if fragment else ""
-
-
-# -----------------------------------------------------------------------------
-# Public API
-# -----------------------------------------------------------------------------
-
-
-def compose_system_prompt(
-    *,
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    custom_system_instructions: str | None = None,
-    use_default_system_instructions: bool = True,
-    citations_enabled: bool = True,
-    provider_variant: ProviderVariant | None = None,
-    model_name: str | None = None,
-    connector_routing: Iterable[str] | None = None,
-) -> str:
-    """Assemble the SurfSense system prompt from disk fragments.
-
-    Args:
-        today: Optional clock injection for tests.
-        thread_visibility: Private vs shared (team) — drives memory wording
-            and a few base block variants.
-        enabled_tool_names: When provided, only these tools' instructions
-            are included; ``None`` keeps the legacy "include everything"
-            behavior.
-        disabled_tool_names: User-disabled tools (note appended to prompt).
-        mcp_connector_tools: ``{server_name: [tool_names...]}`` to inject
-            an explicit MCP routing block.
-        custom_system_instructions: Free-form instructions that override
-            the default ``<system_instruction>`` block.
-        use_default_system_instructions: When ``custom_system_instructions``
-            is empty/None, fall back to defaults (legacy semantics).
-        citations_enabled: Include ``citations_on.md`` (true) or
-            ``citations_off.md`` (false).
-        provider_variant: Explicit provider variant override
-            (``"anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"``).
-            When ``None``, falls back to :func:`detect_provider_variant`
-            on ``model_name``.
-        model_name: Used to auto-detect ``provider_variant`` when not
-            provided explicitly.
-        connector_routing: Optional list of routing fragment names
-            (``["linear", "slack", ...]``) to include from
-            ``prompts/routing/``.
-
-    Returns:
-        The fully composed system prompt string.
-    """
-    resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
-    visibility = thread_visibility or ChatVisibility.PRIVATE
-
-    if custom_system_instructions and custom_system_instructions.strip():
-        sys_block = custom_system_instructions.format(resolved_today=resolved_today)
-    elif use_default_system_instructions:
-        sys_block = _build_system_instructions(
-            visibility=visibility, resolved_today=resolved_today
-        )
-    else:
-        sys_block = ""
-
-    sys_block += _build_mcp_routing_block(mcp_connector_tools)
-
-    if provider_variant is None:
-        provider_variant = detect_provider_variant(model_name)
-    sys_block += _build_provider_block(provider_variant)
-    sys_block += _build_routing_block(connector_routing)
-
-    tools_block = _build_tools_section(
-        visibility=visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-    )
-    citation_block = _build_citation_block(citations_enabled)
-
-    return sys_block + tools_block + citation_block
-
-
-__all__ = [
-    "ALL_TOOL_NAMES_ORDERED",
-    "ProviderVariant",
-    "compose_system_prompt",
-    "detect_provider_variant",
-]
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/__init__.py b/surfsense_backend/app/prompts/system_prompt_composer/examples/__init__.py
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_image.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_image.md
deleted file mode 100644
index 216c2926a..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_image.md
+++ /dev/null
@@ -1,12 +0,0 @@
-
-- User: "Generate an image of a cat"
-  - Call: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")`
-  - The generated image will automatically be displayed in the chat.
-- User: "Draw me a logo for a coffee shop called Bean Dream"
-  - Call: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")`
-  - The generated image will automatically be displayed in the chat.
-- User: "Show me this image: https://example.com/image.png"
-  - Simply include it in your response using markdown: `![Image](https://example.com/image.png)`
-- User uploads an image file and asks: "What is this image about?"
-  - The user's uploaded image is already visible in the chat.
-  - Simply analyze the image content and respond directly.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_podcast.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_podcast.md
deleted file mode 100644
index aabf8ce7a..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_podcast.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-- User: "Give me a podcast about AI trends based on what we discussed"
-  - First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")`
-- User: "Create a podcast summary of this conversation"
-  - Call: `generate_podcast(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")`
-- User: "Make a podcast about quantum computing"
-  - First explore `/documents/` (ls/glob/grep/read_file), then: `generate_podcast(source_content="Key insights about quantum computing from retrieved files:\n\n[Comprehensive summary of findings]", podcast_title="Quantum Computing Explained")`
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_report.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_report.md
deleted file mode 100644
index 7e9d0a595..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_report.md
+++ /dev/null
@@ -1,13 +0,0 @@
-
-- User: "Generate a report about AI trends"
-  - Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")`
-  - WHY: Has creation verb "generate" → call the tool. No prior discussion → use kb_search.
-- User: "Write a research report from this conversation"
-  - Call: `generate_report(topic="Research Report", source_strategy="conversation", source_content="Complete conversation summary:\n\n...", report_style="deep_research")`
-  - WHY: Has creation verb "write" → call the tool. Conversation has the content → use source_strategy="conversation".
-- User: (after a report on Climate Change was generated) "Add a section about carbon capture technologies"
-  - Call: `generate_report(topic="Climate Crisis: Causes, Impacts, and Solutions", source_strategy="conversation", source_content="[summary of conversation context if any]", parent_report_id=<previous_report_id>, user_instructions="Add a new section about carbon capture technologies")`
-  - WHY: Has modification verb "add" + specific deliverable target → call the tool with parent_report_id.
-- User: (after a report was generated) "What else could we add to have more depth?"
-  - Do NOT call generate_report. Answer in chat with suggestions.
-  - WHY: No creation/modification verb directed at producing a deliverable.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_resume.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_resume.md
deleted file mode 100644
index d8a6c381e..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_resume.md
+++ /dev/null
@@ -1,19 +0,0 @@
-
-- User: "Build me a resume. I'm John Doe, engineer at Acme Corp..."
-  - Call: `generate_resume(user_info="John Doe, engineer at Acme Corp...", max_pages=1)`
-  - WHY: Has creation verb "build" + resume → call the tool.
-- User: "Create my CV with this info: [experience, education, skills]"
-  - Call: `generate_resume(user_info="[experience, education, skills]", max_pages=1)`
-- User: "Build me a resume" (and there is a resume/CV document in the conversation context)
-  - Extract the FULL content from the document in context, then call:
-    `generate_resume(user_info="Name: John Doe\nEmail: john@example.com\n\nExperience:\n- Senior Engineer at Acme Corp (2020-2024)\n  Led team of 5...\n\nEducation:\n- BS Computer Science, MIT (2016-2020)\n\nSkills: Python, TypeScript, AWS...", max_pages=1)`
-  - WHY: Document content is available in context — extract ALL of it into user_info. Do NOT ignore referenced documents.
-- User: (after resume generated) "Change my title to Senior Engineer"
-  - Call: `generate_resume(user_info="", user_instructions="Change the job title to Senior Engineer", parent_report_id=<previous_report_id>, max_pages=1)`
-  - WHY: Modification verb "change" + refers to existing resume → set parent_report_id.
-- User: (after resume generated) "Make this 2 pages and expand projects"
-  - Call: `generate_resume(user_info="", user_instructions="Expand projects and keep this to at most 2 pages", parent_report_id=<previous_report_id>, max_pages=2)`
-  - WHY: Explicit page increase request → set max_pages to 2.
-- User: "How should I structure my resume?"
-  - Do NOT call generate_resume. Answer in chat with advice.
-  - WHY: No creation/modification verb.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_video_presentation.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_video_presentation.md
deleted file mode 100644
index 257ec86cf..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_video_presentation.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-- User: "Give me a presentation about AI trends based on what we discussed"
-  - First search for relevant content, then call: `generate_video_presentation(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", video_title="AI Trends Presentation")`
-- User: "Create slides summarizing this conversation"
-  - Call: `generate_video_presentation(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", video_title="Conversation Summary")`
-- User: "Make a video presentation about quantum computing"
-  - First explore `/documents/` (ls/glob/grep/read_file), then: `generate_video_presentation(source_content="Key insights about quantum computing from retrieved files:\n\n[Comprehensive summary of findings]", video_title="Quantum Computing Explained")`
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/scrape_webpage.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/scrape_webpage.md
deleted file mode 100644
index 0f156bf24..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/scrape_webpage.md
+++ /dev/null
@@ -1,13 +0,0 @@
-
-- User: "Check out https://dev.to/some-article"
-  - Call: `scrape_webpage(url="https://dev.to/some-article")`
-  - Respond with a structured analysis — key points, takeaways.
-- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
-  - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
-  - Respond with a thorough summary using headings and bullet points.
-- User: (after discussing https://example.com/stats) "Can you get the live data from that page?"
-  - Call: `scrape_webpage(url="https://example.com/stats")`
-  - IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
-- User: "https://example.com/blog/weekend-recipes"
-  - Call: `scrape_webpage(url="https://example.com/blog/weekend-recipes")`
-  - When a user sends just a URL with no instructions, scrape it and provide a concise summary of the content.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_private.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_private.md
deleted file mode 100644
index 496bdcae3..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_private.md
+++ /dev/null
@@ -1,16 +0,0 @@
-
-- <user_name>Alex</user_name>, <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
-  - The user casually shared a durable fact:
-    update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n")
-- User: "Remember that I prefer concise answers over detailed explanations"
-  - Durable preference. Merge with existing memory:
-    update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n")
-- User: "I actually moved to Tokyo last month"
-  - Updated fact, date prefix reflects when recorded:
-    update_memory(updated_memory="## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...")
-- User: "I'm a freelance photographer working on a nature documentary"
-  - Durable background info under a fitting heading:
-    update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n")
-- User: "Always respond in bullet points"
-  - Standing instruction:
-    update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n")
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_team.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_team.md
deleted file mode 100644
index 16b90babf..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_team.md
+++ /dev/null
@@ -1,7 +0,0 @@
-
-- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
-  - Durable team decision:
-    update_memory(updated_memory="## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...")
-- User: "Our office is in downtown Seattle, 5th floor"
-  - Durable team fact:
-    update_memory(updated_memory="## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...")
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/examples/web_search.md b/surfsense_backend/app/prompts/system_prompt_composer/examples/web_search.md
deleted file mode 100644
index 6b9828ac7..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/web_search.md
+++ /dev/null
@@ -1,8 +0,0 @@
-
-- User: "What's the current USD to INR exchange rate?"
-  - Call: `web_search(query="current USD to INR exchange rate")`
-  - Then answer using the returned web results with citations.
-- User: "What's the latest news about AI?"
-  - Call: `web_search(query="latest AI news today")`
-- User: "What's the weather in New York?"
-  - Call: `web_search(query="weather New York today")`
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/__init__.py b/surfsense_backend/app/prompts/system_prompt_composer/providers/__init__.py
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/anthropic.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/anthropic.md
deleted file mode 100644
index f574da541..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/anthropic.md
+++ /dev/null
@@ -1,20 +0,0 @@
-<provider_hints>
-You are running on an Anthropic Claude model.
-
-Structured reasoning:
-- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
-- For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
-
-Professional objectivity:
-- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
-- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
-- Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
-
-Task management:
-- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
-- Narrate progress through the todo list itself, not through chatty status lines.
-
-Tool calls:
-- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
-- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/deepseek.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/deepseek.md
deleted file mode 100644
index 8acf008ca..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/deepseek.md
+++ /dev/null
@@ -1,18 +0,0 @@
-<provider_hints>
-You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
-
-Reasoning hygiene (R1-aware):
-- If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
-- Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
-- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
-
-Output style:
-- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
-- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
-- For factual answers, cite once with `[citation:chunk_id]` and stop.
-
-Tool calls:
-- Issue independent tool calls in parallel within a single turn.
-- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
-- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/default.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/default.md
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/default.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/google.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/google.md
deleted file mode 100644
index cac3b328b..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/google.md
+++ /dev/null
@@ -1,20 +0,0 @@
-<provider_hints>
-You are running on a Google Gemini model.
-
-Output style:
-- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
-- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
-- Format with GitHub-flavoured Markdown; assume monospace rendering.
-- For one-line factual answers, just answer. No headers, no bullets.
-
-Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
-1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
-2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
-3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
-4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
-
-Discipline:
-- Do not take significant actions beyond the clear scope of the user's request without confirming first.
-- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
-- Path arguments must be the exact strings returned by tools; do not synthesise file paths.
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/grok.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/grok.md
deleted file mode 100644
index 95b8fcc14..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/grok.md
+++ /dev/null
@@ -1,17 +0,0 @@
-<provider_hints>
-You are running on an xAI Grok model.
-
-Maximum terseness:
-- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
-- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
-- Avoid restating the user's question.
-- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
-
-Tool discipline:
-- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
-- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
-
-Style:
-- No emojis unless the user asked. No nested bullets, no headers for short answers.
-- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/kimi.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/kimi.md
deleted file mode 100644
index c3c11ad5e..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/kimi.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<provider_hints>
-You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
-
-Action bias:
-- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
-- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
-- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
-
-Tool calls:
-- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
-- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
-- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
-
-Language:
-- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
-
-Discipline:
-- Stay on track. Never give the user more than what they asked for.
-- Fact-check before stating anything as factual; don't fabricate citations.
-- Keep it stupidly simple. Don't overcomplicate.
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_classic.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_classic.md
deleted file mode 100644
index 9128609e0..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_classic.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<provider_hints>
-You are running on a classic OpenAI chat model (GPT-4 family).
-
-Persistence:
-- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
-- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
-- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
-
-Planning:
-- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
-- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
-
-Output style:
-- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
-- Don't dump tool output verbatim — summarise the relevant lines.
-- Don't add a closing recap unless the user asked for one. After completing the work, just stop.
-
-Tool calls:
-- Issue independent tool calls in parallel within one response.
-- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_codex.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_codex.md
deleted file mode 100644
index 6167d4b06..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_codex.md
+++ /dev/null
@@ -1,19 +0,0 @@
-<provider_hints>
-You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
-
-Output style:
-- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
-- Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
-- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
-- Skip headers and heavy formatting for simple confirmations.
-- No emojis, no em-dashes, no nested bullets. Single-level lists only.
-
-Code & structured-output tasks:
-- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
-- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
-- For multi-line snippets use fenced code blocks with a language tag.
-
-Tool calls:
-- Run independent tool calls in parallel; chain only when later calls need earlier results.
-- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_reasoning.md b/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_reasoning.md
deleted file mode 100644
index dd7a61536..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/openai_reasoning.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<provider_hints>
-You are running on an OpenAI reasoning model (GPT-5+ / o-series).
-
-Output style:
-- Be terse and direct. Don't restate the user's request before answering.
-- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
-- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
-- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
-- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
-
-Channels (for clients that support them):
-- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
-- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
-
-Tool calls:
-- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
-- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
-
-Autonomy:
-- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
-</provider_hints>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/routing/__init__.py b/surfsense_backend/app/prompts/system_prompt_composer/routing/__init__.py
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/routing/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/routing/jira.md b/surfsense_backend/app/prompts/system_prompt_composer/routing/jira.md
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/routing/jira.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/routing/linear.md b/surfsense_backend/app/prompts/system_prompt_composer/routing/linear.md
deleted file mode 100644
index 2f1bfacd9..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/routing/linear.md
+++ /dev/null
@@ -1,3 +0,0 @@
-<linear_routing>
-**Linear:** Prefer the `task` tool with subagent **`linear_specialist`** when the user’s request is **only about Linear** and may need several tool calls (list issues, inspect one issue, teams, users, statuses, comments, documents). Use **`connector_negotiator`** when Linear is one hop in a **multi-connector** workflow. Call Linear MCP tools directly from the parent when a **single** quick call is enough.
-</linear_routing>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/routing/slack.md b/surfsense_backend/app/prompts/system_prompt_composer/routing/slack.md
deleted file mode 100644
index 4b5d07a9a..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/routing/slack.md
+++ /dev/null
@@ -1,3 +0,0 @@
-<slack_routing>
-**Slack:** Prefer `task` with **`slack_specialist`** for **Slack-only** multi-step work (channels, threads, reads, writes that need approval in the specialist). Use **`connector_negotiator`** when Slack feeds another connector in one chain. Use direct `slack_*` tools from the parent for a **single** quick read or write when appropriate.
-</slack_routing>
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/__init__.py b/surfsense_backend/app/prompts/system_prompt_composer/tools/__init__.py
deleted file mode 100644
index 8b1378917..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/_preamble.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/_preamble.md
deleted file mode 100644
index 2c169e015..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/_preamble.md
+++ /dev/null
@@ -1,6 +0,0 @@
-<tools>
-You have access to the following tools:
-
-IMPORTANT: You can ONLY use the tools listed below. If a capability is not listed here, you do NOT have it.
-Do NOT claim you can do something if the corresponding tool is not listed.
-
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_image.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_image.md
deleted file mode 100644
index 8bde13f22..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_image.md
+++ /dev/null
@@ -1,11 +0,0 @@
-
-- generate_image: Generate images from text descriptions using AI image models.
-  - Use this when the user asks you to create, generate, draw, design, or make an image.
-  - Trigger phrases: "generate an image of", "create a picture of", "draw me", "make an image", "design a logo", "create artwork"
-  - Args:
-    - prompt: A detailed text description of the image to generate. Be specific about subject, style, colors, composition, and mood.
-    - n: Number of images to generate (1-4, default: 1)
-  - Returns: A dictionary with the generated image metadata. The image will automatically be displayed in the chat.
-  - IMPORTANT: Write a detailed, descriptive prompt for best results. Don't just pass the user's words verbatim -
-    expand and improve the prompt with specific details about style, lighting, composition, and mood.
-  - If the user's request is vague (e.g., "make me an image of a cat"), enhance the prompt with artistic details.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_podcast.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_podcast.md
deleted file mode 100644
index 58be143d7..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_podcast.md
+++ /dev/null
@@ -1,15 +0,0 @@
-
-- generate_podcast: Generate an audio podcast from provided content.
-  - Use this when the user asks to create, generate, or make a podcast.
-  - Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast"
-  - Args:
-    - source_content: The text content to convert into a podcast. This MUST be comprehensive and include:
-      * If discussing the current conversation: Include a detailed summary of the FULL chat history (all user questions and your responses)
-      * If based on knowledge base search: Include the key findings and insights from the search results
-      * You can combine both: conversation context + search results for richer podcasts
-      * The more detailed the source_content, the better the podcast quality
-    - podcast_title: Optional title for the podcast (default: "SurfSense Podcast")
-    - user_prompt: Optional instructions for podcast style/format (e.g., "Make it casual and fun")
-  - Returns: A task_id for tracking. The podcast will be generated in the background.
-  - IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating".
-  - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes).
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_report.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_report.md
deleted file mode 100644
index 8a285a433..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_report.md
+++ /dev/null
@@ -1,39 +0,0 @@
-
-- generate_report: Generate or revise a structured Markdown report artifact.
-  - WHEN TO CALL THIS TOOL — the message must contain a creation or modification VERB directed at producing a deliverable:
-    * Creation verbs: write, create, generate, draft, produce, summarize into, turn into, make
-    * Modification verbs: revise, update, expand, add (a section), rewrite, make (it shorter/longer/formal)
-    * Example triggers: "generate a report about...", "write a document on...", "add a section about budget", "make the report shorter", "rewrite in formal tone"
-  - WHEN NOT TO CALL THIS TOOL (answer in chat instead):
-    * Questions or discussion about the report: "What can we add?", "What's missing?", "Is the data accurate?", "How could this be improved?"
-    * Suggestions or brainstorming: "What other topics could be covered?", "What else could be added?", "What would make this better?"
-    * Asking for explanations: "Can you explain section 2?", "Why did you include that?", "What does this part mean?"
-    * Quick follow-ups or critiques: "Is the conclusion strong enough?", "Are there any gaps?", "What about the competitors?"
-    * THE TEST: Does the message contain a creation/modification VERB (from the list above) directed at producing or changing a deliverable? If NO verb → answer conversationally in chat. Do NOT assume the user wants a revision just because a report exists in the conversation.
-  - IMPORTANT FORMAT RULE: Reports are ALWAYS generated in Markdown.
-  - Args:
-    - topic: Short title for the report (max ~8 words).
-    - source_content: The text content to base the report on.
-      * For source_strategy="conversation" or "provided": Include a comprehensive summary of the relevant content.
-      * For source_strategy="kb_search": Can be empty or minimal — the tool handles searching internally.
-      * For source_strategy="auto": Include what you have; the tool searches KB if it's not enough.
-    - source_strategy: Controls how the tool collects source material. One of:
-      * "conversation" — The conversation already contains enough context (prior Q&A, discussion, pasted text, scraped pages). Pass a thorough summary as source_content.
-      * "kb_search" — The tool will search the knowledge base internally. Provide search_queries with 1-5 targeted queries.
-      * "auto" — Use source_content if sufficient, otherwise fall back to internal KB search using search_queries.
-      * "provided" — Use only what is in source_content (default, backward-compatible).
-    - search_queries: When source_strategy is "kb_search" or "auto", provide 1-5 specific search queries for the knowledge base. These should be precise, not just the topic name repeated.
-    - report_style: Controls report depth. Options: "detailed" (DEFAULT), "deep_research", "brief".
-      Use "brief" ONLY when the user explicitly asks for a short/concise/one-page report (e.g., "one page", "keep it short", "brief report", "500 words"). Default to "detailed" for all other requests.
-    - user_instructions: Optional specific instructions (e.g., "focus on financial impacts", "include recommendations"). When revising (parent_report_id set), describe WHAT TO CHANGE. If the user mentions a length preference (e.g., "one page", "500 words", "2 pages"), include that VERBATIM here AND set report_style="brief".
-    - parent_report_id: Set this to the report_id from a previous generate_report result when the user wants to MODIFY an existing report. Do NOT set it for new reports or questions about reports.
-  - Returns: A dictionary with status "ready" or "failed", report_id, title, and word_count.
-  - The report is generated immediately in Markdown and displayed inline in the chat.
-  - Export/download formats (PDF, DOCX, HTML, LaTeX, EPUB, ODT, plain text) are produced from the generated Markdown report.
-  - SOURCE STRATEGY DECISION (HIGH PRIORITY — follow this exactly):
-    * If the conversation already has substantive Q&A / discussion on the topic → use source_strategy="conversation" with a comprehensive summary as source_content.
-    * If the user wants a report on a topic not yet discussed → use source_strategy="kb_search" with targeted search_queries.
-    * If you have some content but might need more → use source_strategy="auto" with both source_content and search_queries.
-    * When revising an existing report (parent_report_id set) and the conversation has relevant context → use source_strategy="conversation". The revision will use the previous report content plus your source_content.
-    * NEVER run a separate KB lookup step and then pass those results to generate_report. The tool handles KB search internally.
-  - AFTER CALLING THIS TOOL: Do NOT repeat, summarize, or reproduce the report content in the chat. The report is already displayed as an interactive card that the user can open, read, copy, and export. Simply confirm that the report was generated (e.g., "I've generated your report on [topic]. You can view the Markdown report now, and export it in various formats from the card."). NEVER write out the report text in the chat.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_resume.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_resume.md
deleted file mode 100644
index 321ea90c9..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_resume.md
+++ /dev/null
@@ -1,30 +0,0 @@
-
-- generate_resume: Generate or revise a professional resume as a Typst document.
-  - WHEN TO CALL: The user asks to create, build, generate, write, or draft a resume or CV.
-    Also when they ask to modify, update, or revise an existing resume from this conversation.
-  - WHEN NOT TO CALL: General career advice, resume tips, cover letters, or reviewing
-    a resume without making changes. For cover letters, use generate_report instead.
-  - The tool produces Typst source code that is compiled to a PDF preview automatically.
-  - PAGE POLICY:
-    - Default behavior is ONE PAGE. For new resume creation, set max_pages=1 unless the user explicitly asks for more.
-    - If the user requests a longer resume (e.g., "make it 2 pages"), set max_pages to that value.
-  - Args:
-    - user_info: The user's resume content — work experience, education, skills, contact
-      info, etc. Can be structured or unstructured text.
-      CRITICAL: user_info must be COMPREHENSIVE. Do NOT just pass the user's raw message.
-      You MUST gather and consolidate ALL available information:
-        * Content from referenced/mentioned documents (e.g., uploaded resumes, CVs, LinkedIn profiles)
-          that appear in the conversation context — extract and include their FULL content.
-        * Information the user shared across multiple messages in the conversation.
-        * Any relevant details from knowledge base search results in the context.
-      The more complete the user_info, the better the resume. Include names, contact info,
-      work experience with dates, education, skills, projects, certifications — everything available.
-    - user_instructions: Optional style or content preferences (e.g. "emphasize leadership",
-      "keep it to one page"). For revisions, describe what to change.
-    - parent_report_id: Set this when the user wants to MODIFY an existing resume from
-      this conversation. Use the report_id from a previous generate_resume result.
-    - max_pages: Maximum resume length in pages (integer 1-5). Default is 1.
-  - Returns: Dict with status, report_id, title, and content_type.
-  - After calling: Give a brief confirmation. Do NOT paste resume content in chat. Do NOT mention report_id or any internal IDs — the resume card is shown automatically.
-  - VERSIONING: Same rules as generate_report — set parent_report_id for modifications
-    of an existing resume, leave as None for new resumes.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_video_presentation.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_video_presentation.md
deleted file mode 100644
index c3def88f2..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/generate_video_presentation.md
+++ /dev/null
@@ -1,9 +0,0 @@
-
-- generate_video_presentation: Generate a video presentation from provided content.
-  - Use this when the user asks to create a video, presentation, slides, or slide deck.
-  - Trigger phrases: "give me a presentation", "create slides", "generate a video", "make a slide deck", "turn this into a presentation"
-  - Args:
-    - source_content: The text content to turn into a presentation. The more detailed, the better.
-    - video_title: Optional title (default: "SurfSense Presentation")
-    - user_prompt: Optional style instructions (e.g., "Make it technical and detailed")
-  - After calling this tool, inform the user that generation has started and they will see the presentation when it's ready.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/scrape_webpage.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/scrape_webpage.md
deleted file mode 100644
index 46e299392..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/scrape_webpage.md
+++ /dev/null
@@ -1,30 +0,0 @@
-
-- scrape_webpage: Scrape and extract the main content from a webpage.
-  - Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage.
-  - CRITICAL — WHEN TO USE (always attempt scraping, never refuse before trying):
-    * When a user asks to "get", "fetch", "pull", "grab", "scrape", or "read" content from a URL
-    * When the user wants live/dynamic data from a specific webpage (e.g., tables, scores, stats, prices)
-    * When a URL was mentioned earlier in the conversation and the user asks for its actual content
-    * When `/documents/` knowledge-base data is insufficient and the user wants more
-  - Trigger scenarios:
-    * "Read this article and summarize it"
-    * "What does this page say about X?"
-    * "Summarize this blog post for me"
-    * "Tell me the key points from this article"
-    * "What's in this webpage?"
-    * "Can you analyze this article?"
-    * "Can you get the live table/data from [URL]?"
-    * "Scrape it" / "Can you scrape that?" (referring to a previously mentioned URL)
-    * "Fetch the content from [URL]"
-    * "Pull the data from that page"
-  - Args:
-    - url: The URL of the webpage to scrape (must be HTTP/HTTPS)
-    - max_length: Maximum content length to return (default: 50000 chars)
-  - Returns: The page title, description, full content (in markdown), word count, and metadata
-  - After scraping, provide a comprehensive, well-structured summary with key takeaways using headings or bullet points.
-  - Reference the source using markdown links [descriptive text](url) — never bare URLs.
-  - IMAGES: The scraped content may contain image URLs in markdown format like `![alt text](image_url)`.
-    * When you find relevant/important images in the scraped content, include them in your response using standard markdown image syntax: `![alt text](image_url)`.
-    * This makes your response more visual and engaging.
-    * Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content.
-    * Don't show every image - just the most relevant 1-3 images that enhance understanding.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/update_memory_private.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/update_memory_private.md
deleted file mode 100644
index 65de785e9..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/update_memory_private.md
+++ /dev/null
@@ -1,26 +0,0 @@
-
-- update_memory: Update your personal memory document about the user.
-  - Your current memory is already in <user_memory> in your context. The `chars`
-    and `limit` attributes show current usage and the maximum allowed size.
-  - This is curated long-term memory, not raw conversation logs.
-  - Call update_memory when the user explicitly asks to remember/forget
-    something or shares durable facts, preferences, or standing instructions.
-  - The user's first name is provided in <user_name>. Use it in entries instead
-    of "the user" when helpful. Do not store the name alone as a memory entry.
-  - Do not store short-lived info: one-off questions, greetings, session
-    logistics, or things that only matter for the current task.
-  - Args:
-    - updated_memory: The FULL updated markdown document, not a diff. Merge new
-      facts with existing ones, update contradictions, remove outdated entries,
-      and consolidate instead of only appending.
-  - Use heading-based Markdown:
-    * Every entry must be under a `##` heading.
-    * Recommended headings: `## Facts`, `## Preferences`, `## Instructions`.
-      Specific natural headings are allowed when clearer.
-    * New bullets should use `- YYYY-MM-DD: text`.
-    * Each entry should be one concise but descriptive bullet.
-  - If existing memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers,
-    preserve the information but write the updated document in the new
-    heading-based format.
-  - During consolidation, prioritize durable instructions and preferences before
-    generic facts.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/update_memory_team.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/update_memory_team.md
deleted file mode 100644
index 79d4ead3a..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/update_memory_team.md
+++ /dev/null
@@ -1,28 +0,0 @@
-
-- update_memory: Update the team's shared memory document for this search space.
-  - Your current team memory is already in <team_memory> in your context. The
-    `chars` and `limit` attributes show current usage and the maximum allowed size.
-  - This is curated long-term team memory: decisions, conventions, architecture,
-    processes, and key shared facts.
-  - NEVER store personal memory in team memory: individual bios, personal
-    preferences, or user-only standing instructions.
-  - Call update_memory when a team member asks to remember/forget something, or
-    when the conversation surfaces durable team context that matters later.
-  - Do not store short-lived info: one-off questions, greetings, session
-    logistics, or things that only matter for the current task.
-  - Args:
-    - updated_memory: The FULL updated markdown document, not a diff. Merge new
-      facts with existing ones, update contradictions, remove outdated entries,
-      and consolidate instead of only appending.
-  - Use heading-based Markdown:
-    * Every entry must be under a `##` heading.
-    * Recommended headings: `## Product Decisions`, `## Engineering Conventions`,
-      `## Project Facts`, `## Open Questions`.
-    * New bullets should use `- YYYY-MM-DD: text`.
-    * Each entry should be one concise but descriptive bullet.
-  - If existing memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the
-    information but write the updated document in the new heading-based format.
-  - Do not create personal headings such as `## Preferences`, `## Instructions`,
-    `## Personal Notes`, or `## Personal Instructions`.
-  - During consolidation, prioritize decisions/conventions, then key facts, then
-    current priorities.
diff --git a/surfsense_backend/app/prompts/system_prompt_composer/tools/web_search.md b/surfsense_backend/app/prompts/system_prompt_composer/tools/web_search.md
deleted file mode 100644
index 7ed7c332d..000000000
--- a/surfsense_backend/app/prompts/system_prompt_composer/tools/web_search.md
+++ /dev/null
@@ -1,18 +0,0 @@
-
-- web_search: Search the web for real-time information using all configured search engines.
-  - Use this for current events, news, prices, weather, public facts, or any question requiring
-    up-to-date information from the internet.
-  - This tool dispatches to all configured search engines (SearXNG, Tavily, Linkup, Baidu) in
-    parallel and merges the results.
-  - IMPORTANT (REAL-TIME / PUBLIC WEB QUERIES): For questions that require current public web data
-    (e.g., live exchange rates, stock prices, breaking news, weather, current events), you MUST call
-    `web_search` instead of answering from memory.
-  - For these real-time/public web queries, DO NOT answer from memory and DO NOT say you lack internet
-    access before attempting a web search.
-  - If the search returns no relevant results, explain that web sources did not return enough
-    data and ask the user if they want you to retry with a refined query.
-  - Args:
-    - query: The search query - use specific, descriptive terms
-    - top_k: Number of results to retrieve (default: 10, max: 50)
-  - If search snippets are insufficient for the user's question, use `scrape_webpage` on the most relevant result URL for full content.
-  - When presenting results, reference sources as markdown links [descriptive text](url) — never bare URLs.
diff --git a/surfsense_backend/app/routes/agent_flags_route.py b/surfsense_backend/app/routes/agent_flags_route.py
index 222909c59..c57a6b5ef 100644
--- a/surfsense_backend/app/routes/agent_flags_route.py
+++ b/surfsense_backend/app/routes/agent_flags_route.py
@@ -53,7 +53,6 @@ class AgentFeatureFlagsRead(BaseModel):
 
     enable_skills: bool
     enable_specialized_subagents: bool
-    enable_kb_planner_runnable: bool
 
     enable_action_log: bool
     enable_revert_route: bool
diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py
index d45303e97..e486b3dda 100644
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@@ -246,10 +246,10 @@ class NewChatRequest(BaseModel):
         description=(
             "Optional knowledge-base folder IDs the user mentioned with "
             "@. Resolved to virtual paths (``/documents/.../``) by "
-            "``mention_resolver`` and surfaced to the agent via "
-            "(a) backtick-wrapped substitution in ``user_query`` and "
-            "(b) a ``[USER-MENTIONED]`` entry in ``<priority_documents>``. "
-            "The agent's ``ls`` tool can then walk the folder itself."
+            "``mention_resolver``, surfaced to the agent via backtick-wrapped "
+            "substitution in ``user_query`` and pinned into the "
+            "``search_knowledge_base`` retrieval scope. The agent's ``ls`` "
+            "tool can then walk the folder itself."
         ),
     )
     mentioned_documents: list[MentionedDocumentInfo] | None = Field(
diff --git a/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py b/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py
index 939cd9b17..5ffe46280 100644
--- a/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py
+++ b/surfsense_backend/app/tasks/chat/streaming/agent/event_loop.py
@@ -81,6 +81,7 @@ async def stream_agent_events(
     result.final_message_parts = final_assistant_parts_from_messages(
         state_values.get("messages")
     )
+    result.citation_registry = state_values.get("citation_registry")
 
     # Safety net: if astream_events was cancelled before
     # KnowledgeBasePersistenceMiddleware.aafter_agent ran, any staged work
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
index 195a16b1e..5ef2b8ad1 100644
--- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
@@ -22,7 +22,8 @@ def build_new_chat_runtime_context(
     request_id: str | None,
     turn_id: str,
 ) -> SurfSenseContextSchema:
-    """``mentioned_document_ids`` is consumed by ``KnowledgePriorityMiddleware``.
+    """``mentioned_document_ids`` is consumed by the ``search_knowledge_base``
+    tool (via ``referenced_document_ids``) to pin mentioned docs into scope.
 
     ``accepted_folder_ids`` (post-resolve) wins over the raw
     ``mentioned_folder_ids`` from the request: the resolver drops chips that
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py b/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py
index 3f767c60b..c59c2dcda 100644
--- a/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/shared/assistant_finalize.py
@@ -22,8 +22,12 @@ Never raises (best-effort, logs only).
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    normalize_citations,
+)
 from app.tasks.chat.streaming.shared.stream_result import StreamResult
 from app.utils.perf import get_perf_logger
 
@@ -33,6 +37,35 @@ if TYPE_CHECKING:
 _perf_log = get_perf_logger()
 
 
+def _as_registry(raw: Any) -> CitationRegistry | None:
+    """Coerce the captured state value into a registry, tolerating a serialized dict."""
+    if isinstance(raw, CitationRegistry):
+        return raw
+    if isinstance(raw, dict):
+        try:
+            return CitationRegistry.model_validate(raw)
+        except Exception:
+            return None
+    return None
+
+
+def _resolve_citations(
+    content_payload: list[dict[str, Any]], raw_registry: Any
+) -> list[dict[str, Any]]:
+    """Rewrite ``[n]`` -> ``[citation:<payload>]`` in each text part before persisting.
+
+    No-op when the turn registered no citable sources; ``web_search``'s existing
+    ``[citation:url]`` markers pass through untouched (the regex matches bare ``[n]``).
+    """
+    registry = _as_registry(raw_registry)
+    if registry is None or not registry.by_n:
+        return content_payload
+    for part in content_payload:
+        if part.get("type") == "text" and isinstance(part.get("text"), str):
+            part["text"] = normalize_citations(part["text"], registry)
+    return content_payload
+
+
 async def finalize_assistant_message(
     *,
     stream_result: StreamResult | None,
@@ -79,6 +112,9 @@ async def finalize_assistant_message(
         content_payload,
         stream_result.final_message_parts,
     )
+    content_payload = _resolve_citations(
+        content_payload, stream_result.citation_registry
+    )
 
     if builder_stats is not None:
         _perf_log.info(
diff --git a/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py b/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py
index 5e164070a..96fc75708 100644
--- a/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py
+++ b/surfsense_backend/app/tasks/chat/streaming/shared/stream_result.py
@@ -39,3 +39,7 @@ class StreamResult:
     # state. Used after streaming completes as a provider-agnostic persistence
     # backfill when no text chunks reached the live stream.
     final_message_parts: list[dict[str, Any]] = field(default_factory=list)
+    # Per-conversation citation registry captured from the final LangGraph state
+    # (a ``CitationRegistry`` or its serialized dict). Read at finalize to rewrite
+    # the model's ``[n]`` ordinals into ``[citation:<payload>]`` markers.
+    citation_registry: Any | None = field(default=None, repr=False)
diff --git a/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py b/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
new file mode 100644
index 000000000..b25e8eeeb
--- /dev/null
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/main_agent/tools/test_search_knowledge_base.py
@@ -0,0 +1,237 @@
+"""Behavior tests for the ``search_knowledge_base`` main-agent tool.
+
+These exercise the tool through its public contract: seed a real document,
+invoke the tool, and assert on the ``Command`` it returns — the rendered
+``<retrieved_context>`` carries ``[n]`` labels and the citation registry handed
+back on state is populated.
+The tool's own DB session is redirected to the test session, and the embedding
+leg is pinned so the search is deterministic without a live model.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import uuid
+from types import SimpleNamespace
+
+import pytest
+from langchain_core.messages import ToolMessage
+from langgraph.types import Command
+
+from app.agents.chat.multi_agent_chat.main_agent.tools import search_knowledge_base
+from app.agents.chat.multi_agent_chat.main_agent.tools.search_knowledge_base import (
+    create_search_knowledge_base_tool,
+)
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.config import config
+from app.db import Chunk, Document, DocumentType, Folder
+
+pytestmark = pytest.mark.integration
+
+_DIM = config.embedding_model_instance.dimension
+
+
+def _axis(index: int) -> list[float]:
+    vector = [0.0] * _DIM
+    vector[index] = 1.0
+    return vector
+
+
+async def _add_document(
+    db_session,
+    *,
+    search_space_id: int,
+    title: str,
+    text: str,
+    folder_id: int | None = None,
+):
+    document = Document(
+        title=title,
+        document_type=DocumentType.FILE,
+        content=text,
+        content_hash=uuid.uuid4().hex,
+        search_space_id=search_space_id,
+        folder_id=folder_id,
+        status={"state": "ready"},
+    )
+    db_session.add(document)
+    await db_session.flush()
+    db_session.add(
+        Chunk(content=text, document_id=document.id, position=0, embedding=_axis(0))
+    )
+    await db_session.flush()
+    return document
+
+
+async def _add_folder(db_session, *, search_space_id: int, name: str = "Folder"):
+    folder = Folder(name=name, position="0", search_space_id=search_space_id)
+    db_session.add(folder)
+    await db_session.flush()
+    return folder
+
+
+@pytest.fixture
+def _tool_uses_test_session(db_session, monkeypatch):
+    """Redirect the tool's ``shielded_async_session`` to the test transaction."""
+
+    @contextlib.asynccontextmanager
+    async def _session():
+        yield db_session
+
+    monkeypatch.setattr(search_knowledge_base, "shielded_async_session", _session)
+
+
+@pytest.fixture
+def _pinned_embedding(monkeypatch):
+    monkeypatch.setattr(
+        config.embedding_model_instance, "embed", lambda _query: _axis(0)
+    )
+
+
+async def _invoke(tool, query: str, state: dict | None = None, context=None):
+    runtime = SimpleNamespace(
+        state=state or {}, tool_call_id="call-1", context=context
+    )
+    return await tool.coroutine(query, runtime)
+
+
+def _mentions(*, document_ids=(), folder_ids=()):
+    return SimpleNamespace(
+        mentioned_document_ids=list(document_ids),
+        mentioned_folder_ids=list(folder_ids),
+    )
+
+
+async def test_tool_returns_retrieved_context_with_numbered_passages(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        text="The asyncio library enables concurrency.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "asyncio")
+
+    assert isinstance(result, Command)
+    message = result.update["messages"][0]
+    assert isinstance(message, ToolMessage)
+    assert "<retrieved_context>" in message.content
+    assert "[1]" in message.content
+
+
+async def test_tool_populates_citation_registry_on_state(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        text="The asyncio library enables concurrency.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "asyncio")
+
+    registry = result.update["citation_registry"]
+    assert isinstance(registry, CitationRegistry)
+    assert registry.by_n  # at least one passage was registered as [n]
+
+
+async def test_tool_reuses_existing_registry_numbering(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        text="The asyncio library enables concurrency.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    first = await _invoke(tool, "asyncio")
+    carried = first.update["citation_registry"]
+    second = await _invoke(tool, "asyncio", state={"citation_registry": carried})
+
+    # Same passage searched twice keeps a single [n] (find-or-create).
+    assert len(second.update["citation_registry"].by_n) == 1
+
+
+async def test_tool_reports_no_matches_without_touching_state(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "nonexistent-term-zzz")
+
+    assert isinstance(result, str)
+    assert "No knowledge-base matches" in result
+
+
+async def test_tool_rejects_empty_query(
+    db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(tool, "   ")
+
+    assert isinstance(result, str)
+    assert "non-empty" in result
+
+
+async def test_document_mention_confines_search_to_pinned_doc(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    pinned = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Pinned",
+        text="asyncio appears in the pinned doc.",
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Other",
+        text="asyncio appears in the other doc.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool, "asyncio", context=_mentions(document_ids=[pinned.id])
+    )
+
+    # Search is confined to the pinned doc: only its content is rendered.
+    content = result.update["messages"][0].content
+    assert "Pinned" in content
+    assert "Other" not in content
+
+
+async def test_folder_mention_confines_search_to_folder_documents(
+    db_session, db_search_space, _tool_uses_test_session, _pinned_embedding
+):
+    folder = await _add_folder(db_session, search_space_id=db_search_space.id)
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Inside",
+        text="asyncio appears inside the folder.",
+        folder_id=folder.id,
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Outside",
+        text="asyncio appears outside the folder.",
+    )
+    tool = create_search_knowledge_base_tool(search_space_id=db_search_space.id)
+
+    result = await _invoke(
+        tool, "asyncio", context=_mentions(folder_ids=[folder.id])
+    )
+
+    # Search is confined to the folder's document: only its content is rendered.
+    content = result.update["messages"][0].content
+    assert "Inside" in content
+    assert "Outside" not in content
diff --git a/surfsense_backend/tests/integration/agents/multi_agent_chat/shared/retrieval/test_hybrid_search.py b/surfsense_backend/tests/integration/agents/multi_agent_chat/shared/retrieval/test_hybrid_search.py
new file mode 100644
index 000000000..f7ba86a67
--- /dev/null
+++ b/surfsense_backend/tests/integration/agents/multi_agent_chat/shared/retrieval/test_hybrid_search.py
@@ -0,0 +1,236 @@
+"""Behavior tests for the hybrid chunk retriever against a real Postgres.
+
+These exercise ``search_chunks`` through its public surface only: seed real
+documents/chunks, run a search, and assert on the returned ``DocumentHit``s —
+never on SQL shape or internal ranking math. ``query_embedding`` is supplied
+directly (a public parameter) so the semantic leg is deterministic instead of
+depending on a live embedding model.
+"""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
+    search_chunks,
+)
+from app.agents.chat.multi_agent_chat.shared.retrieval.models import SearchScope
+from app.config import config
+from app.db import Chunk, Document, DocumentType, SearchSpace
+
+pytestmark = pytest.mark.integration
+
+_DIM = config.embedding_model_instance.dimension
+
+
+def _axis(index: int) -> list[float]:
+    """A unit vector pointing along one axis — orthogonal axes are dissimilar."""
+    vector = [0.0] * _DIM
+    vector[index] = 1.0
+    return vector
+
+
+async def _add_document(
+    db_session,
+    *,
+    search_space_id: int,
+    title: str = "Doc",
+    document_type: DocumentType = DocumentType.FILE,
+    state: str = "ready",
+    chunks: list[tuple[str, int, list[float]]],
+) -> Document:
+    """Persist one document and its chunks; ``chunks`` is (content, position, embedding)."""
+    document = Document(
+        title=title,
+        document_type=document_type,
+        content="\n".join(content for content, _, _ in chunks),
+        content_hash=uuid.uuid4().hex,
+        search_space_id=search_space_id,
+        status={"state": state},
+    )
+    db_session.add(document)
+    await db_session.flush()
+    for content, position, embedding in chunks:
+        db_session.add(
+            Chunk(
+                content=content,
+                document_id=document.id,
+                position=position,
+                embedding=embedding,
+            )
+        )
+    await db_session.flush()
+    return document
+
+
+async def test_keyword_relevant_document_is_retrieved(db_session, db_search_space):
+    document = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Asyncio Guide",
+        chunks=[("The asyncio library enables concurrency.", 0, _axis(0))],
+    )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asyncio",
+        scope=SearchScope(),
+        top_k=5,
+        query_embedding=_axis(99),
+    )
+
+    assert document.id in {hit.document_id for hit in results}
+
+
+async def test_semantically_closest_document_ranks_first(db_session, db_search_space):
+    aligned = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Background Work",
+        chunks=[("Parallel execution of background work.", 0, _axis(0))],
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        title="Dessert",
+        chunks=[("Recipes for chocolate cake.", 0, _axis(1))],
+    )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asynchronous coroutines",
+        scope=SearchScope(),
+        top_k=5,
+        query_embedding=_axis(0),
+    )
+
+    assert results[0].document_id == aligned.id
+
+
+async def test_results_stay_within_the_search_space(db_session, db_search_space):
+    other_space = SearchSpace(name="Other Space", user_id=db_search_space.user_id)
+    db_session.add(other_space)
+    await db_session.flush()
+
+    mine = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        chunks=[("Shared keyword asyncio here.", 0, _axis(0))],
+    )
+    foreign = await _add_document(
+        db_session,
+        search_space_id=other_space.id,
+        chunks=[("Shared keyword asyncio here.", 0, _axis(0))],
+    )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asyncio",
+        scope=SearchScope(),
+        top_k=5,
+        query_embedding=_axis(0),
+    )
+
+    found = {hit.document_id for hit in results}
+    assert mine.id in found and foreign.id not in found
+
+
+async def test_document_ids_scope_pins_results(db_session, db_search_space):
+    pinned = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        chunks=[("asyncio appears in the pinned doc.", 0, _axis(0))],
+    )
+    await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        chunks=[("asyncio appears in the other doc too.", 0, _axis(0))],
+    )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asyncio",
+        scope=SearchScope(document_ids=(pinned.id,)),
+        top_k=5,
+        query_embedding=_axis(0),
+    )
+
+    assert {hit.document_id for hit in results} == {pinned.id}
+
+
+async def test_deleting_documents_are_excluded(db_session, db_search_space):
+    ready = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        chunks=[("asyncio in a ready document.", 0, _axis(0))],
+    )
+    deleting = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        state="deleting",
+        chunks=[("asyncio in a deleting document.", 0, _axis(0))],
+    )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asyncio",
+        scope=SearchScope(),
+        top_k=5,
+        query_embedding=_axis(0),
+    )
+
+    found = {hit.document_id for hit in results}
+    assert ready.id in found and deleting.id not in found
+
+
+async def test_matched_chunks_are_ordered_for_reading(db_session, db_search_space):
+    # Insert out of order, and give the later-position chunk the stronger
+    # semantic score, so reading order differs from both insertion and score.
+    document = await _add_document(
+        db_session,
+        search_space_id=db_search_space.id,
+        chunks=[
+            ("asyncio paragraph two.", 1, _axis(0)),
+            ("asyncio paragraph one.", 0, _axis(50)),
+        ],
+    )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asyncio",
+        scope=SearchScope(),
+        top_k=5,
+        query_embedding=_axis(0),
+    )
+
+    hit = next(hit for hit in results if hit.document_id == document.id)
+    assert [chunk.position for chunk in hit.chunks] == [0, 1]
+
+
+async def test_top_k_caps_the_number_of_documents(db_session, db_search_space):
+    for index in range(3):
+        await _add_document(
+            db_session,
+            search_space_id=db_search_space.id,
+            title=f"Doc {index}",
+            chunks=[(f"asyncio mentioned in doc {index}.", 0, _axis(index))],
+        )
+
+    results = await search_chunks(
+        db_session,
+        search_space_id=db_search_space.id,
+        query="asyncio",
+        scope=SearchScope(),
+        top_k=2,
+        query_embedding=_axis(0),
+    )
+
+    assert len(results) == 2
diff --git a/surfsense_backend/tests/integration/google_unification/conftest.py b/surfsense_backend/tests/integration/google_unification/conftest.py
index 390442fdd..151ee98e3 100644
--- a/surfsense_backend/tests/integration/google_unification/conftest.py
+++ b/surfsense_backend/tests/integration/google_unification/conftest.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import uuid
-from contextlib import asynccontextmanager
 from datetime import UTC, datetime
 from unittest.mock import MagicMock
 
@@ -227,23 +226,6 @@ def patched_embed(monkeypatch):
     return mock
 
 
-@pytest.fixture
-def patched_shielded_session(async_engine, monkeypatch):
-    """Replace ``shielded_async_session`` in the knowledge_base module
-    with one that yields sessions from the test engine."""
-    test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
-
-    @asynccontextmanager
-    async def _test_shielded():
-        async with test_maker() as session:
-            yield session
-
-    monkeypatch.setattr(
-        "app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools.knowledge_base.shielded_async_session",
-        _test_shielded,
-    )
-
-
 # ---------------------------------------------------------------------------
 # Indexer test helpers
 # ---------------------------------------------------------------------------
diff --git a/surfsense_backend/tests/integration/google_unification/test_browse_includes_legacy_docs.py b/surfsense_backend/tests/integration/google_unification/test_browse_includes_legacy_docs.py
deleted file mode 100644
index f0d5c6c6c..000000000
--- a/surfsense_backend/tests/integration/google_unification/test_browse_includes_legacy_docs.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""Integration test: _browse_recent_documents returns docs of multiple types.
-
-Exercises the browse path (degenerate-query fallback) with a real PostgreSQL
-database.  Verifies that passing a list of document types correctly returns
-documents of all listed types -- the same ``.in_()`` SQL path used by hybrid
-search but through the browse/recency-ordered code path.
-"""
-
-from __future__ import annotations
-
-import pytest
-
-pytestmark = pytest.mark.integration
-
-
-async def test_browse_recent_documents_with_list_type_returns_both(
-    committed_google_data, patched_shielded_session
-):
-    """_browse_recent_documents returns docs of all types when given a list."""
-    from app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools.knowledge_base import (
-        _browse_recent_documents,
-    )
-
-    space_id = committed_google_data["search_space_id"]
-
-    results = await _browse_recent_documents(
-        search_space_id=space_id,
-        document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
-        top_k=10,
-        start_date=None,
-        end_date=None,
-    )
-
-    returned_types = set()
-    for doc in results:
-        doc_info = doc.get("document", {})
-        dtype = doc_info.get("document_type")
-        if dtype:
-            returned_types.add(dtype)
-
-    assert "GOOGLE_DRIVE_FILE" in returned_types, (
-        "Native Drive docs should appear in browse results"
-    )
-    assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
-        "Legacy Composio Drive docs should appear in browse results"
-    )
diff --git a/surfsense_backend/tests/integration/retriever/test_knowledge_search_date_filters.py b/surfsense_backend/tests/integration/retriever/test_knowledge_search_date_filters.py
deleted file mode 100644
index ce076b147..000000000
--- a/surfsense_backend/tests/integration/retriever/test_knowledge_search_date_filters.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Integration smoke tests for KB search query/date scoping."""
-
-from __future__ import annotations
-
-from contextlib import asynccontextmanager
-from datetime import UTC, datetime, timedelta
-
-import numpy as np
-import pytest
-
-from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    search_knowledge_base,
-)
-
-from .conftest import DUMMY_EMBEDDING
-
-pytestmark = pytest.mark.integration
-
-
-async def test_search_knowledge_base_applies_date_filters(
-    db_session,
-    seed_date_filtered_docs,
-    monkeypatch,
-):
-    """Date filters should remove older matching documents from scoped KB results."""
-
-    @asynccontextmanager
-    async def fake_shielded_async_session():
-        yield db_session
-
-    monkeypatch.setattr(ks, "shielded_async_session", fake_shielded_async_session)
-    monkeypatch.setattr(
-        ks, "embed_texts", lambda texts: [np.array(DUMMY_EMBEDDING) for _ in texts]
-    )
-
-    space_id = seed_date_filtered_docs["search_space"].id
-    recent_cutoff = datetime.now(UTC) - timedelta(days=30)
-
-    unfiltered_results = await search_knowledge_base(
-        query="ocv meeting decisions",
-        search_space_id=space_id,
-        available_document_types=["FILE"],
-        top_k=10,
-    )
-    filtered_results = await search_knowledge_base(
-        query="ocv meeting decisions",
-        search_space_id=space_id,
-        available_document_types=["FILE"],
-        top_k=10,
-        start_date=recent_cutoff,
-        end_date=datetime.now(UTC),
-    )
-
-    unfiltered_ids = {result["document"]["id"] for result in unfiltered_results}
-    filtered_ids = {result["document"]["id"] for result in filtered_results}
-
-    assert seed_date_filtered_docs["recent_doc"].id in unfiltered_ids
-    assert seed_date_filtered_docs["old_doc"].id in unfiltered_ids
-    assert seed_date_filtered_docs["recent_doc"].id in filtered_ids
-    assert seed_date_filtered_docs["old_doc"].id not in filtered_ids
diff --git a/surfsense_backend/tests/unit/agents/chat/runtime/references/test_connectors.py b/surfsense_backend/tests/unit/agents/chat/runtime/references/test_connectors.py
new file mode 100644
index 000000000..56e938812
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/references/test_connectors.py
@@ -0,0 +1,41 @@
+"""Tests for connector pointer field selection."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.runtime.references.connectors import connector_pointer_fields
+
+pytestmark = pytest.mark.unit
+
+
+def test_prefers_chip_account_and_type() -> None:
+    label, provider = connector_pointer_fields(
+        account_name="work@acme.com",
+        connector_type="Gmail",
+        fallback_name="My Gmail",
+    )
+
+    assert (label, provider) == ("work@acme.com", "Gmail")
+
+
+def test_falls_back_to_stored_name_when_account_missing() -> None:
+    label, provider = connector_pointer_fields(
+        account_name=None,
+        connector_type="Slack",
+        fallback_name="Acme Slack",
+    )
+
+    assert label == "Acme Slack"
+    assert provider == "Slack"
+
+
+def test_provider_is_none_when_unknown() -> None:
+    label, provider = connector_pointer_fields(
+        account_name="a@b.com",
+        connector_type=None,
+        fallback_name=None,
+    )
+
+    assert label == "a@b.com"
+    assert provider is None
diff --git a/surfsense_backend/tests/unit/agents/chat/runtime/references/test_folders.py b/surfsense_backend/tests/unit/agents/chat/runtime/references/test_folders.py
new file mode 100644
index 000000000..856bcb172
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/references/test_folders.py
@@ -0,0 +1,21 @@
+"""Tests for folder pointer-path shaping."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.runtime.references.folders import folder_pointer_path
+
+pytestmark = pytest.mark.unit
+
+
+def test_adds_trailing_slash_so_path_reads_as_directory() -> None:
+    assert folder_pointer_path(7, {7: "/documents/Specs"}) == "/documents/Specs/"
+
+
+def test_keeps_existing_trailing_slash() -> None:
+    assert folder_pointer_path(7, {7: "/documents/Specs/"}) == "/documents/Specs/"
+
+
+def test_unknown_folder_falls_back_to_documents_root() -> None:
+    assert folder_pointer_path(99, {}) == "/documents/"
diff --git a/surfsense_backend/tests/unit/agents/chat/runtime/references/test_reference_pointers.py b/surfsense_backend/tests/unit/agents/chat/runtime/references/test_reference_pointers.py
new file mode 100644
index 000000000..4ac23b616
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/chat/runtime/references/test_reference_pointers.py
@@ -0,0 +1,93 @@
+"""Tests for reference pointer rendering."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.runtime.references import (
+    ChatReference,
+    ConnectorReference,
+    DocumentReference,
+    FolderReference,
+    render_reference_pointers,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_returns_none_when_no_references() -> None:
+    assert render_reference_pointers([]) is None
+
+
+def test_wraps_block_and_keeps_reference_order() -> None:
+    block = render_reference_pointers(
+        [
+            DocumentReference(entity_id=42, label="Q3 Notes", path="/documents/q3.xml"),
+            ChatReference(entity_id=5, label="Pricing"),
+        ]
+    )
+
+    assert block is not None
+    assert block.startswith("<referenced_this_turn>")
+    assert block.endswith("</referenced_this_turn>")
+    assert block.index("document 42") < block.index("chat 5")
+
+
+def test_document_with_path_shows_title_and_path() -> None:
+    block = render_reference_pointers(
+        [
+            DocumentReference(
+                entity_id=42,
+                label="Q3 Launch Notes",
+                path="/documents/Launch/Q3.xml",
+            )
+        ]
+    )
+
+    assert block is not None
+    assert '- document 42 — "Q3 Launch Notes" (/documents/Launch/Q3.xml)' in block
+
+
+def test_folder_with_path_renders_with_folder_kind() -> None:
+    block = render_reference_pointers(
+        [FolderReference(entity_id=7, label="Specs", path="/documents/Specs/")]
+    )
+
+    assert block is not None
+    assert '- folder 7 — "Specs" (/documents/Specs/)' in block
+
+
+def test_connector_shows_provider_and_account() -> None:
+    block = render_reference_pointers(
+        [ConnectorReference(entity_id=12, label="work@acme.com", provider="Gmail")]
+    )
+
+    assert block is not None
+    assert "- connector 12 — Gmail (work@acme.com)" in block
+
+
+def test_connector_without_provider_falls_back_to_label() -> None:
+    block = render_reference_pointers(
+        [ConnectorReference(entity_id=12, label="work@acme.com")]
+    )
+
+    assert block is not None
+    assert "- connector 12 — work@acme.com" in block
+
+
+def test_chat_shows_quoted_title() -> None:
+    block = render_reference_pointers(
+        [ChatReference(entity_id=5, label="Pricing debate")]
+    )
+
+    assert block is not None
+    assert '- chat 5 — "Pricing debate"' in block
+
+
+def test_label_whitespace_is_collapsed_to_one_line() -> None:
+    block = render_reference_pointers(
+        [DocumentReference(entity_id=1, label="line one\nline two", path="/d.xml")]
+    )
+
+    assert block is not None
+    assert '- document 1 — "line one line two"' in block
diff --git a/surfsense_backend/tests/unit/agents/chat/shared/tools/test_web_search.py b/surfsense_backend/tests/unit/agents/chat/shared/tools/test_web_search.py
new file mode 100644
index 000000000..7137bfdfc
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/chat/shared/tools/test_web_search.py
@@ -0,0 +1,93 @@
+"""Tests for the shared ``web_search`` tool's citable-result adaptation.
+
+The tool's network path (SearXNG + live connectors) is out of scope here; these
+cover the pure mapping from raw web results to renderable, citable documents and
+the end-to-end registration of ``WEB_RESULT`` ``[n]`` labels.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.agents.chat.multi_agent_chat.shared.document_render import render_web_results
+from app.agents.chat.shared.tools.web_search import (
+    _to_renderable_web_documents,
+    _web_source_label,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _raw_result(url: str, title: str, content: str) -> dict:
+    return {
+        "document": {"title": title, "metadata": {"url": url}},
+        "content": content,
+    }
+
+
+def test_web_source_label_strips_scheme_and_www() -> None:
+    assert _web_source_label("https://www.example.com/path") == "Web · example.com"
+    assert _web_source_label("http://news.site.org/a/b") == "Web · news.site.org"
+    assert _web_source_label("") == "Web"
+
+
+def test_adapter_maps_each_result_to_one_web_passage() -> None:
+    docs = _to_renderable_web_documents(
+        [
+            _raw_result("https://a.com/x", "Alpha", "alpha body"),
+            _raw_result("https://b.com/y", "Beta", "beta body"),
+        ]
+    )
+
+    assert [d.title for d in docs] == ["Alpha", "Beta"]
+    passages = [p for d in docs for p in d.passages]
+    assert all(p.source_type is CitationSourceType.WEB_RESULT for p in passages)
+    assert passages[0].locator == {"url": "https://a.com/x"}
+    assert passages[0].content == "alpha body"
+
+
+def test_adapter_skips_results_without_url_or_content() -> None:
+    docs = _to_renderable_web_documents(
+        [
+            _raw_result("", "No URL", "has content"),
+            _raw_result("https://c.com/z", "Empty", "   "),
+            _raw_result("https://d.com/w", "Good", "real content"),
+        ]
+    )
+
+    assert [d.title for d in docs] == ["Good"]
+
+
+def test_adapter_truncates_on_char_budget() -> None:
+    big = "x" * 30
+    docs = _to_renderable_web_documents(
+        [
+            _raw_result("https://a.com", "A", big),
+            _raw_result("https://b.com", "B", big),
+            _raw_result("https://c.com", "C", big),
+        ],
+        max_chars=50,
+    )
+
+    # First fits (30), second crosses 50 and stops the loop.
+    assert [d.title for d in docs] == ["A"]
+
+
+def test_end_to_end_registers_web_results_for_citation() -> None:
+    registry = CitationRegistry()
+    docs = _to_renderable_web_documents(
+        [_raw_result("https://example.com/a", "Example", "the answer is 42")]
+    )
+
+    block = render_web_results(docs, registry)
+
+    assert block is not None
+    assert "[1] the answer is 42" in block
+    entry = registry.resolve(1)
+    assert entry is not None
+    assert entry.source_type is CitationSourceType.WEB_RESULT
+    assert entry.locator == {"url": "https://example.com/a"}
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_markers.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_markers.py
new file mode 100644
index 000000000..53cf058a8
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_markers.py
@@ -0,0 +1,49 @@
+"""Tests for citation-entry → frontend payload mapping."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations.markers import (
+    to_frontend_payload,
+)
+from app.agents.chat.multi_agent_chat.shared.citations.models import (
+    CitationEntry,
+    CitationSourceType,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _entry(source_type: CitationSourceType, locator: dict) -> CitationEntry:
+    return CitationEntry(n=1, source_type=source_type, locator=locator)
+
+
+def test_kb_chunk_maps_to_chunk_id() -> None:
+    entry = _entry(CitationSourceType.KB_CHUNK, {"chunk_id": 42, "document_id": 7})
+
+    assert to_frontend_payload(entry) == "42"
+
+
+def test_anon_chunk_keeps_negative_id() -> None:
+    entry = _entry(CitationSourceType.ANON_CHUNK, {"chunk_id": -3})
+
+    assert to_frontend_payload(entry) == "-3"
+
+
+def test_web_result_maps_to_url() -> None:
+    entry = _entry(CitationSourceType.WEB_RESULT, {"url": "https://example.com/a"})
+
+    assert to_frontend_payload(entry) == "https://example.com/a"
+
+
+def test_not_yet_renderable_kind_is_dropped() -> None:
+    entry = _entry(CitationSourceType.CHAT_TURN, {"thread_id": 1, "turn": 2})
+
+    assert to_frontend_payload(entry) is None
+
+
+def test_missing_locator_field_is_dropped() -> None:
+    entry = _entry(CitationSourceType.KB_CHUNK, {})
+
+    assert to_frontend_payload(entry) is None
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_normalizer.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_normalizer.py
new file mode 100644
index 000000000..dddd240df
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_normalizer.py
@@ -0,0 +1,113 @@
+"""Tests for rewriting model ``[n]`` ordinals into frontend citation markers."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations.models import CitationSourceType
+from app.agents.chat.multi_agent_chat.shared.citations.normalizer import (
+    normalize_citations,
+)
+from app.agents.chat.multi_agent_chat.shared.citations.registry import CitationRegistry
+
+pytestmark = pytest.mark.unit
+
+
+def _registry_with_chunks(*chunk_ids: int) -> CitationRegistry:
+    registry = CitationRegistry()
+    for chunk_id in chunk_ids:
+        registry.register(CitationSourceType.KB_CHUNK, {"chunk_id": chunk_id})
+    return registry
+
+
+def test_single_ordinal_is_rewritten() -> None:
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("We shipped it [1].", registry) == (
+        "We shipped it [citation:42]."
+    )
+
+
+def test_adjacent_brackets_are_each_rewritten() -> None:
+    registry = _registry_with_chunks(42, 7)
+
+    assert normalize_citations("Both agree [1][2].", registry) == (
+        "Both agree [citation:42][citation:7]."
+    )
+
+
+def test_comma_separated_brackets_are_each_rewritten() -> None:
+    registry = _registry_with_chunks(42, 7)
+
+    assert normalize_citations("Both agree [1], [2].", registry) == (
+        "Both agree [citation:42], [citation:7]."
+    )
+
+
+def test_unknown_ordinal_is_dropped() -> None:
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("Maybe [9] is real.", registry) == "Maybe  is real."
+
+
+def test_unknown_ordinal_among_known_is_dropped() -> None:
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("See [1][9].", registry) == "See [citation:42]."
+
+
+def test_web_result_rewrites_to_url() -> None:
+    registry = CitationRegistry()
+    registry.register(CitationSourceType.WEB_RESULT, {"url": "https://example.com"})
+
+    assert normalize_citations("Per the docs [1].", registry) == (
+        "Per the docs [citation:https://example.com]."
+    )
+
+
+def test_word_glued_citation_is_rewritten() -> None:
+    # The model frequently writes citations glued to the preceding word
+    # (``docs[1]``); these must still resolve to a marker, not leak as raw text.
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("verifying against docs[1].", registry) == (
+        "verifying against docs[citation:42]."
+    )
+
+
+def test_word_glued_unknown_ordinal_drops() -> None:
+    # A glued ordinal that doesn't resolve drops harmlessly (no broken marker,
+    # no raw ``[n]`` leak) rather than being preserved as array-index syntax.
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("see notes[9] later", registry) == "see notes later"
+
+
+def test_array_index_inside_code_is_left_alone() -> None:
+    # Genuine array/index syntax is protected by the code-region carve-out.
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("Read `arr[1]` carefully.", registry) == (
+        "Read `arr[1]` carefully."
+    )
+
+
+def test_ordinals_inside_inline_code_are_untouched() -> None:
+    registry = _registry_with_chunks(42)
+
+    assert normalize_citations("Use `list[1]` here [1].", registry) == (
+        "Use `list[1]` here [citation:42]."
+    )
+
+
+def test_ordinals_inside_fenced_code_are_untouched() -> None:
+    registry = _registry_with_chunks(42)
+    text = "Before [1].\n```\nx = a[1]\n```\nAfter [1]."
+
+    assert normalize_citations(text, registry) == (
+        "Before [citation:42].\n```\nx = a[1]\n```\nAfter [citation:42]."
+    )
+
+
+def test_empty_text_is_returned_unchanged() -> None:
+    assert normalize_citations("", _registry_with_chunks(42)) == ""
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_registry.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_registry.py
new file mode 100644
index 000000000..ba2d7cc59
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/citations/test_registry.py
@@ -0,0 +1,174 @@
+"""Unit tests for the citation registry spine."""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+    make_key,
+)
+
+
+def test_register_assigns_monotonic_labels() -> None:
+    registry = CitationRegistry()
+
+    first = registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 42, "chunk_id": 880}
+    )
+    second = registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 42, "chunk_id": 881}
+    )
+
+    assert (first, second) == (1, 2)
+    assert registry.next_n == 3
+
+
+def test_register_is_find_or_create_for_same_unit() -> None:
+    registry = CitationRegistry()
+    locator = {"document_id": 42, "chunk_id": 880}
+
+    first = registry.register(CitationSourceType.KB_CHUNK, locator)
+    again = registry.register(CitationSourceType.KB_CHUNK, locator)
+
+    assert first == again == 1
+    assert len(registry.by_n) == 1
+    assert registry.next_n == 2
+
+
+def test_dedup_is_insensitive_to_locator_key_order() -> None:
+    registry = CitationRegistry()
+
+    first = registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 42, "chunk_id": 880}
+    )
+    reordered = registry.register(
+        CitationSourceType.KB_CHUNK, {"chunk_id": 880, "document_id": 42}
+    )
+
+    assert first == reordered
+
+
+def test_same_locator_values_across_types_do_not_collide() -> None:
+    registry = CitationRegistry()
+
+    chunk = registry.register(CitationSourceType.KB_CHUNK, {"id": 7})
+    chat = registry.register(CitationSourceType.CHAT_TURN, {"id": 7})
+
+    assert chunk != chat
+
+
+def test_resolve_returns_entry_with_locator_and_display() -> None:
+    registry = CitationRegistry()
+    n = registry.register(
+        CitationSourceType.WEB_RESULT,
+        {"url": "https://example.com"},
+        {"title": "Example"},
+    )
+
+    entry = registry.resolve(n)
+
+    assert entry is not None
+    assert entry.n == n
+    assert entry.source_type is CitationSourceType.WEB_RESULT
+    assert entry.locator == {"url": "https://example.com"}
+    assert entry.display == {"title": "Example"}
+
+
+def test_resolve_unknown_label_returns_none() -> None:
+    registry = CitationRegistry()
+
+    assert registry.resolve(999) is None
+
+
+def test_registry_round_trips_through_serialization() -> None:
+    registry = CitationRegistry()
+    registry.register(
+        CitationSourceType.KB_CHUNK,
+        {"document_id": 42, "chunk_id": 880},
+        {"title": "Q3 Launch Notes"},
+    )
+
+    restored = CitationRegistry.model_validate(registry.model_dump())
+
+    entry = restored.resolve(1)
+    assert entry is not None
+    assert entry.source_type is CitationSourceType.KB_CHUNK
+    assert restored.next_n == registry.next_n
+
+
+def test_make_key_is_stable_and_type_prefixed() -> None:
+    key_a = make_key(CitationSourceType.KB_CHUNK, {"document_id": 42, "chunk_id": 880})
+    key_b = make_key(CitationSourceType.KB_CHUNK, {"chunk_id": 880, "document_id": 42})
+
+    assert key_a == key_b
+    assert key_a.startswith("kb_chunk|")
+
+
+def _kb(registry: CitationRegistry, chunk_id: int) -> int:
+    return registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 1, "chunk_id": chunk_id}
+    )
+
+
+def test_merge_unions_disjoint_registries_preserving_labels() -> None:
+    left = CitationRegistry()
+    _kb(left, 10)  # [1]
+    _kb(left, 11)  # [2]
+
+    # A branch that forked from `left`, then registered its own chunk at [3].
+    right = left.model_copy(deep=True)
+    third = _kb(right, 12)  # [3]
+    assert third == 3
+
+    merged = left.merge(right)
+
+    assert merged.resolve(1).locator["chunk_id"] == 10
+    assert merged.resolve(2).locator["chunk_id"] == 11
+    assert merged.resolve(3).locator["chunk_id"] == 12
+    assert merged.next_n == 4
+
+
+def test_merge_keeps_one_label_for_a_shared_source() -> None:
+    left = CitationRegistry()
+    _kb(left, 10)  # [1]
+    right = CitationRegistry()
+    _kb(right, 10)  # also [1], same source
+
+    merged = left.merge(right)
+
+    assert len(merged.by_n) == 1
+    assert merged.resolve(1).locator["chunk_id"] == 10
+    assert merged.next_n == 2
+
+
+def test_merge_remints_on_collision_without_losing_sources() -> None:
+    # Two branches forked from the same base [1], each minting a *different*
+    # source at [2]. Merge must keep both sources, re-minting one.
+    base = CitationRegistry()
+    _kb(base, 10)  # [1]
+
+    left = base.model_copy(deep=True)
+    _kb(left, 11)  # [2] -> chunk 11
+
+    right = base.model_copy(deep=True)
+    _kb(right, 12)  # [2] -> chunk 12 (collision)
+
+    merged = left.merge(right)
+
+    chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+    assert chunk_ids == {10, 11, 12}
+    assert merged.resolve(2).locator["chunk_id"] == 11  # left wins the slot
+    assert merged.resolve(3).locator["chunk_id"] == 12  # right re-minted
+    assert merged.next_n == 4
+
+
+def test_merge_does_not_mutate_inputs() -> None:
+    left = CitationRegistry()
+    _kb(left, 10)
+    right = CitationRegistry()
+    _kb(right, 11)
+
+    left.merge(right)
+
+    assert list(left.by_n) == [1]
+    assert list(right.by_n) == [1]
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_document.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_document.py
new file mode 100644
index 000000000..6c4cb7c25
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_document.py
@@ -0,0 +1,152 @@
+"""Tests for the shared ``render_document`` (one ``<document>`` block)."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    render_document,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _document(
+    document_id: int,
+    title: str,
+    chunk_ids: list[int],
+    *,
+    source: str | None = None,
+) -> RenderableDocument:
+    return RenderableDocument(
+        title=title,
+        source=source,
+        passages=[
+            RenderablePassage(
+                content=f"text {cid}",
+                locator={"document_id": document_id, "chunk_id": cid},
+            )
+            for cid in chunk_ids
+        ],
+    )
+
+
+def test_returns_none_when_no_passages() -> None:
+    registry = CitationRegistry()
+
+    assert (
+        render_document(_document(1, "Empty", []), view="excerpt", registry=registry)
+        is None
+    )
+
+
+def test_excerpt_open_and_close_tags() -> None:
+    registry = CitationRegistry()
+
+    block = render_document(
+        _document(1, "Q3 Launch Notes", [880], source="Slack · #launch"),
+        view="excerpt",
+        registry=registry,
+    )
+
+    assert block is not None
+    assert block.startswith(
+        '<document title="Q3 Launch Notes" source="Slack · #launch" view="excerpt">'
+    )
+    assert block.endswith("</document>")
+
+
+def test_full_view_renders_view_attribute() -> None:
+    registry = CitationRegistry()
+
+    block = render_document(_document(1, "Doc", [880]), view="full", registry=registry)
+
+    assert block is not None
+    assert '<document title="Doc" view="full">' in block
+
+
+def test_source_attribute_omitted_when_absent() -> None:
+    registry = CitationRegistry()
+
+    block = render_document(
+        _document(1, "Plain", [1]), view="excerpt", registry=registry
+    )
+
+    assert block is not None
+    assert block.startswith('<document title="Plain" view="excerpt">')
+
+
+def test_registers_passages_with_chunk_locators() -> None:
+    registry = CitationRegistry()
+
+    render_document(
+        _document(1, "Doc", [880], source="Slack"),
+        view="excerpt",
+        registry=registry,
+    )
+
+    entry = registry.resolve(1)
+    assert entry is not None
+    assert entry.source_type is CitationSourceType.KB_CHUNK
+    assert entry.locator == {"document_id": 1, "chunk_id": 880}
+    assert entry.display == {"title": "Doc", "source": "Slack"}
+
+
+def test_passages_get_monotonic_labels() -> None:
+    registry = CitationRegistry()
+
+    block = render_document(
+        _document(1, "Doc", [880, 881]), view="excerpt", registry=registry
+    )
+
+    assert block is not None
+    assert "  [1] text 880" in block
+    assert "  [2] text 881" in block
+
+
+def test_multiline_passage_indents_under_label() -> None:
+    registry = CitationRegistry()
+    document = RenderableDocument(
+        title="Doc",
+        passages=[
+            RenderablePassage(
+                content="line one\nline two",
+                locator={"document_id": 1, "chunk_id": 5},
+            )
+        ],
+    )
+
+    block = render_document(document, view="excerpt", registry=registry)
+
+    assert block is not None
+    assert "  [1] line one\n      line two" in block
+
+
+def test_attribute_values_are_escaped() -> None:
+    registry = CitationRegistry()
+
+    block = render_document(
+        _document(1, 'A & B <c> "d"', [1], source="x & y"),
+        view="excerpt",
+        registry=registry,
+    )
+
+    assert block is not None
+    assert 'title="A &amp; B &lt;c&gt; &quot;d&quot;"' in block
+    assert 'source="x &amp; y"' in block
+
+
+def test_same_passage_reuses_label_across_calls() -> None:
+    registry = CitationRegistry()
+    document = _document(1, "Doc", [880])
+
+    render_document(document, view="excerpt", registry=registry)
+    render_document(document, view="full", registry=registry)
+
+    assert registry.next_n == 2
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_search_context.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_search_context.py
new file mode 100644
index 000000000..6b22d81a7
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_search_context.py
@@ -0,0 +1,94 @@
+"""Tests for the ``<retrieved_context>`` wrapper around excerpt documents."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    render_search_context,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _document(
+    document_id: int,
+    title: str,
+    chunk_ids: list[int],
+    *,
+    source: str | None = None,
+) -> RenderableDocument:
+    return RenderableDocument(
+        title=title,
+        source=source,
+        passages=[
+            RenderablePassage(
+                content=f"text {cid}",
+                locator={"document_id": document_id, "chunk_id": cid},
+            )
+            for cid in chunk_ids
+        ],
+    )
+
+
+def test_returns_none_when_nothing_to_show() -> None:
+    registry = CitationRegistry()
+
+    assert render_search_context([], registry) is None
+    assert render_search_context([_document(1, "Empty", [])], registry) is None
+
+
+def test_assigns_monotonic_labels_across_documents() -> None:
+    registry = CitationRegistry()
+
+    block = render_search_context(
+        [
+            _document(1, "Q3 Launch Notes", [880, 881], source="Slack"),
+            _document(2, "Timeline", [12], source="Notion"),
+        ],
+        registry,
+    )
+
+    assert block is not None
+    assert "[1] text 880" in block
+    assert "[2] text 881" in block
+    assert "[3] text 12" in block
+
+
+def test_wraps_in_retrieved_context_and_teaches_excerpt_and_citation() -> None:
+    registry = CitationRegistry()
+
+    block = render_search_context([_document(1, "Doc", [1])], registry)
+
+    assert block is not None
+    assert block.startswith("<retrieved_context>")
+    assert block.endswith("</retrieved_context>")
+    assert "excerpt view" in block
+    assert "Cite a chunk with its [n]." in block
+
+
+def test_documents_render_as_excerpt_blocks() -> None:
+    registry = CitationRegistry()
+
+    block = render_search_context(
+        [_document(1, "Q3", [1], source="Slack · #launch")], registry
+    )
+
+    assert block is not None
+    assert '<document title="Q3" source="Slack · #launch" view="excerpt">' in block
+    assert "</document>" in block
+
+
+def test_same_passage_reuses_label_across_calls() -> None:
+    registry = CitationRegistry()
+    document = _document(1, "Doc", [880])
+
+    render_search_context([document], registry)
+    block = render_search_context([document], registry)
+
+    assert block is not None
+    assert "[1] text 880" in block
+    assert registry.next_n == 2
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_source_label.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_source_label.py
new file mode 100644
index 000000000..ee492269f
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_source_label.py
@@ -0,0 +1,35 @@
+"""Tests for building a document's source label."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.document_render import source_label
+
+pytestmark = pytest.mark.unit
+
+
+def test_known_type_uses_friendly_name() -> None:
+    assert source_label("SLACK_CONNECTOR", {}) == "Slack"
+
+
+def test_unmapped_type_is_prettified() -> None:
+    assert source_label("GOOGLE_DRIVE_FILE", {}) == "Google Drive"
+
+
+def test_url_host_is_appended_and_www_stripped() -> None:
+    label = source_label("CRAWLED_URL", {"url": "https://www.docs.python.org/3/"})
+
+    assert label == "Web · docs.python.org"
+
+
+def test_host_only_when_type_unknown() -> None:
+    assert source_label(None, {"url": "https://example.com/a"}) == "example.com"
+
+
+def test_returns_none_when_nothing_known() -> None:
+    assert source_label(None, {}) is None
+
+
+def test_non_http_url_is_ignored() -> None:
+    assert source_label("FILE", {"url": "/local/path"}) == "File"
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
new file mode 100644
index 000000000..75cf0e1fb
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/document_render/test_web_results.py
@@ -0,0 +1,82 @@
+"""Tests for the ``<web_results>`` wrapper around web-result excerpt documents."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    render_web_results,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _web_doc(url: str, title: str, content: str) -> RenderableDocument:
+    return RenderableDocument(
+        title=title,
+        source=f"Web · {url.split('//', 1)[-1].split('/', 1)[0]}",
+        passages=[
+            RenderablePassage(
+                content=content,
+                locator={"url": url},
+                source_type=CitationSourceType.WEB_RESULT,
+            )
+        ],
+    )
+
+
+def test_returns_none_when_nothing_to_show() -> None:
+    registry = CitationRegistry()
+
+    assert render_web_results([], registry) is None
+
+
+def test_wraps_in_web_results_container() -> None:
+    registry = CitationRegistry()
+
+    block = render_web_results(
+        [_web_doc("https://example.com/a", "Example", "the answer is 42")],
+        registry,
+    )
+
+    assert block is not None
+    assert block.startswith("<web_results>")
+    assert block.endswith("</web_results>")
+    assert "cite a result with its [n]" in block
+    assert '<document title="Example" source="Web · example.com" view="excerpt">' in block
+    assert "[1] the answer is 42" in block
+
+
+def test_registers_each_result_as_web_result_with_url_locator() -> None:
+    registry = CitationRegistry()
+
+    render_web_results(
+        [
+            _web_doc("https://a.com/x", "A", "alpha"),
+            _web_doc("https://b.com/y", "B", "beta"),
+        ],
+        registry,
+    )
+
+    first = registry.resolve(1)
+    second = registry.resolve(2)
+    assert first is not None and second is not None
+    assert first.source_type is CitationSourceType.WEB_RESULT
+    assert first.locator == {"url": "https://a.com/x"}
+    assert second.locator == {"url": "https://b.com/y"}
+
+
+def test_same_url_reuses_label_across_calls() -> None:
+    registry = CitationRegistry()
+    doc = _web_doc("https://example.com/a", "Example", "stable fact")
+
+    render_web_results([doc], registry)
+    render_web_results([doc], registry)
+
+    assert registry.next_n == 2
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
new file mode 100644
index 000000000..fd700dd1d
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_adapter.py
@@ -0,0 +1,51 @@
+"""Tests for mapping a DocumentHit to a renderable document."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.retrieval.adapter import (
+    to_renderable_document,
+)
+from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
+    ChunkHit,
+    DocumentHit,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def test_maps_identity_source_and_passages() -> None:
+    hit = DocumentHit(
+        document_id=42,
+        title="Q3 Launch Notes",
+        document_type="SLACK_CONNECTOR",
+        metadata={},
+        score=0.9,
+        chunks=[
+            ChunkHit(chunk_id=880, content="a", position=4, score=0.9),
+            ChunkHit(chunk_id=881, content="b", position=7, score=0.5),
+        ],
+    )
+
+    document = to_renderable_document(hit)
+
+    assert document.title == "Q3 Launch Notes"
+    assert document.source == "Slack"
+    assert [
+        (p.locator["chunk_id"], p.content) for p in document.passages
+    ] == [(880, "a"), (881, "b")]
+    assert all(p.locator["document_id"] == 42 for p in document.passages)
+
+
+def test_document_with_no_chunks_maps_to_no_passages() -> None:
+    hit = DocumentHit(
+        document_id=1,
+        title="Empty",
+        document_type=None,
+        metadata={},
+        score=0.0,
+        chunks=[],
+    )
+
+    assert to_renderable_document(hit).passages == []
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
new file mode 100644
index 000000000..bd44f5dc2
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/shared/retrieval/test_service.py
@@ -0,0 +1,65 @@
+"""Tests for the build_context pipeline (rerank → adapt → render)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
+    ChunkHit,
+    DocumentHit,
+)
+from app.agents.chat.multi_agent_chat.shared.retrieval.service import build_context
+
+pytestmark = pytest.mark.unit
+
+
+def _hit(document_id: int, chunk_id: int) -> DocumentHit:
+    return DocumentHit(
+        document_id=document_id,
+        title=f"Doc {document_id}",
+        document_type="FILE",
+        metadata={},
+        score=1.0 / document_id,
+        chunks=[ChunkHit(chunk_id=chunk_id, content=f"text {chunk_id}", position=0, score=1.0)],
+    )
+
+
+def test_no_hits_renders_nothing() -> None:
+    assert build_context("q", [], CitationRegistry()) is None
+
+
+def test_renders_block_and_registers_labels_in_order() -> None:
+    registry = CitationRegistry()
+
+    block = build_context("q", [_hit(1, 880), _hit(2, 12)], registry)
+
+    assert block is not None
+    assert "[1] text 880" in block
+    assert "[2] text 12" in block
+    assert registry.resolve(1).locator == {"document_id": 1, "chunk_id": 880}
+    assert registry.resolve(2).locator == {"document_id": 2, "chunk_id": 12}
+
+
+class _ReverseReranker:
+    """Stand-in reranker that simply reverses document order."""
+
+    def rerank_documents(
+        self, query_text: str, documents: list[dict[str, Any]]
+    ) -> list[dict[str, Any]]:
+        return list(reversed(documents))
+
+
+def test_reranker_reorders_documents_before_labeling() -> None:
+    registry = CitationRegistry()
+
+    block = build_context(
+        "q", [_hit(1, 880), _hit(2, 12)], registry, reranker=_ReverseReranker()
+    )
+
+    assert block is not None
+    # Reversed: doc 2 now renders first and gets [1].
+    assert registry.resolve(1).locator == {"document_id": 2, "chunk_id": 12}
+    assert registry.resolve(2).locator == {"document_id": 1, "chunk_id": 880}
diff --git a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
deleted file mode 100644
index 4f0369e12..000000000
--- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
+++ /dev/null
@@ -1,295 +0,0 @@
-"""Tests for the prompt fragment composer."""
-
-from __future__ import annotations
-
-from datetime import UTC, datetime
-
-import pytest
-
-from app.db import ChatVisibility
-from app.prompts.system_prompt_composer.composer import (
-    ALL_TOOL_NAMES_ORDERED,
-    compose_system_prompt,
-    detect_provider_variant,
-)
-
-pytestmark = pytest.mark.unit
-
-
-@pytest.fixture
-def fixed_today() -> datetime:
-    return datetime(2025, 6, 1, 12, 0, tzinfo=UTC)
-
-
-class TestProviderVariantDetection:
-    @pytest.mark.parametrize(
-        "model_name,expected",
-        [
-            # GPT-4 family routes to "classic" (autonomous-persistence style)
-            ("openai:gpt-4o-mini", "openai_classic"),
-            ("openai:gpt-4-turbo", "openai_classic"),
-            # GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
-            ("openai:gpt-5", "openai_reasoning"),
-            ("openai:o1-preview", "openai_reasoning"),
-            ("openai:o3-mini", "openai_reasoning"),
-            # Codex family beats reasoning (more specific). Mirrors OpenCode
-            # ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
-            ("openai:gpt-5-codex", "openai_codex"),
-            ("openai:gpt-codex", "openai_codex"),
-            ("openai:codex-mini", "openai_codex"),
-            # Anthropic + Google
-            ("anthropic:claude-3-5-sonnet", "anthropic"),
-            ("anthropic/claude-opus-4", "anthropic"),
-            ("google:gemini-2.0-flash", "google"),
-            ("vertex:gemini-1.5-pro", "google"),
-            # Newly-covered families
-            ("moonshot:kimi-k2", "kimi"),
-            ("openrouter:moonshot/kimi-k2.5", "kimi"),
-            ("xai:grok-2", "grok"),
-            ("openrouter:x-ai/grok-3", "grok"),
-            ("openai:deepseek-v3", "deepseek"),
-            ("deepseek:deepseek-r1", "deepseek"),
-            # Unknown families fall back to default (no provider block emitted)
-            ("groq:mixtral-8x7b", "default"),
-            ("together:llama-3.1-70b", "default"),
-            (None, "default"),
-            ("", "default"),
-        ],
-    )
-    def test_detection(self, model_name: str | None, expected: str) -> None:
-        assert detect_provider_variant(model_name) == expected
-
-    def test_codex_takes_precedence_over_reasoning(self) -> None:
-        """Regression guard: ``gpt-5-codex`` must NOT match the generic
-        ``gpt-5`` reasoning regex first. Codex is the more specialised
-        prompt and mirrors OpenCode's dispatch order.
-        """
-        from app.prompts.system_prompt_composer.composer import detect_provider_variant
-
-        assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
-        assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
-
-
-class TestCompose:
-    def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
-        prompt = compose_system_prompt(today=fixed_today)
-        # System instruction wrapper
-        assert "<system_instruction>" in prompt
-        assert "</system_instruction>" in prompt
-        # Date interpolated
-        assert "2025-06-01" in prompt
-        # Core policy blocks present
-        assert "<knowledge_base_only_policy>" in prompt
-        assert "<tool_routing>" in prompt
-        assert "<parameter_resolution>" in prompt
-        assert "<memory_protocol>" in prompt
-        # Tools
-        assert "<tools>" in prompt
-        assert "</tools>" in prompt
-        # Citations on by default
-        assert "<citation_instructions>" in prompt
-        assert "[citation:chunk_id]" in prompt
-
-    def test_team_visibility_uses_team_variants(self, fixed_today: datetime) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            thread_visibility=ChatVisibility.SEARCH_SPACE,
-        )
-        # Team-specific phrasing in the agent block
-        assert "team space" in prompt
-        # Memory protocol mentions team
-        assert "team" in prompt
-        # Should NOT mention the user-only memory phrasing
-        assert "personal knowledge base" not in prompt
-
-    def test_private_visibility_uses_private_variants(
-        self, fixed_today: datetime
-    ) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            thread_visibility=ChatVisibility.PRIVATE,
-        )
-        assert "personal knowledge base" in prompt
-        # Should NOT mention the team-specific phrasing about prefixed authors
-        assert "[DisplayName of the author]" not in prompt
-
-    def test_citations_disabled_swaps_block(self, fixed_today: datetime) -> None:
-        prompt_on = compose_system_prompt(today=fixed_today, citations_enabled=True)
-        prompt_off = compose_system_prompt(today=fixed_today, citations_enabled=False)
-        assert "Citations are DISABLED" in prompt_off
-        assert "Citations are DISABLED" not in prompt_on
-        assert "[citation:chunk_id]" in prompt_on
-
-    def test_enabled_tool_filter_only_includes_listed_tools(
-        self, fixed_today: datetime
-    ) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            enabled_tool_names={"web_search", "scrape_webpage"},
-        )
-        assert "web_search:" in prompt or "- web_search:" in prompt
-        assert "scrape_webpage:" in prompt or "- scrape_webpage:" in prompt
-        # Excluded tools should NOT appear in tool listing
-        assert "generate_podcast:" not in prompt
-        assert "generate_image:" not in prompt
-
-    def test_disabled_tool_note_is_appended(self, fixed_today: datetime) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            enabled_tool_names={"web_search"},
-            disabled_tool_names={"generate_image", "generate_podcast"},
-        )
-        assert "DISABLED TOOLS (by user):" in prompt
-        assert "Generate Image" in prompt
-        assert "Generate Podcast" in prompt
-
-    def test_mcp_routing_block_emits_when_provided(self, fixed_today: datetime) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            mcp_connector_tools={"My GitLab": ["gitlab_search", "gitlab_create_mr"]},
-        )
-        assert "<mcp_tool_routing>" in prompt
-        assert "My GitLab" in prompt
-        assert "gitlab_search" in prompt
-
-    def test_mcp_routing_block_absent_when_no_servers(
-        self, fixed_today: datetime
-    ) -> None:
-        prompt = compose_system_prompt(today=fixed_today, mcp_connector_tools={})
-        assert "<mcp_tool_routing>" not in prompt
-
-    def test_provider_block_renders_when_anthropic(self, fixed_today: datetime) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today, model_name="anthropic:claude-3-5-sonnet"
-        )
-        assert "<provider_hints>" in prompt
-        assert "Anthropic" in prompt or "Claude" in prompt
-
-    def test_provider_block_absent_for_default(self, fixed_today: datetime) -> None:
-        prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
-        assert "<provider_hints>" not in prompt
-
-    @pytest.mark.parametrize(
-        "model_name,expected_marker",
-        [
-            # Each marker is a unique-ish phrase from the corresponding fragment.
-            # If a fragment is renamed/rewritten such that the marker is gone,
-            # update both the fragment and this test deliberately.
-            ("openai:gpt-5-codex", "Codex-class"),
-            ("openai:gpt-5", "OpenAI reasoning model"),
-            ("openai:gpt-4o", "classic OpenAI chat model"),
-            ("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
-            ("google:gemini-2.0-flash", "Google Gemini"),
-            ("moonshot:kimi-k2", "Moonshot Kimi"),
-            ("xai:grok-2", "xAI Grok"),
-            ("deepseek:deepseek-r1", "DeepSeek"),
-        ],
-    )
-    def test_each_known_variant_renders_with_its_marker(
-        self,
-        fixed_today: datetime,
-        model_name: str,
-        expected_marker: str,
-    ) -> None:
-        """Every supported variant must produce a ``<provider_hints>`` block
-        containing its identifying marker. This pins the dispatch + the
-        on-disk fragments together so a missing/renamed file is caught
-        immediately.
-        """
-        prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
-        assert "<provider_hints>" in prompt, (
-            f"variant for {model_name!r} did not emit a provider_hints block; "
-            "the corresponding providers/<variant>.md may be missing"
-        )
-        assert expected_marker in prompt, (
-            f"variant for {model_name!r} emitted hints but lacked the "
-            f"expected marker {expected_marker!r} — the fragment may have "
-            "drifted from the dispatch table"
-        )
-
-    def test_provider_blocks_are_byte_stable_across_calls(
-        self, fixed_today: datetime
-    ) -> None:
-        """Cache-stability guard: same model id → byte-identical prompt."""
-        a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
-        b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
-        assert a == b
-
-    def test_custom_system_instructions_override_default(
-        self, fixed_today: datetime
-    ) -> None:
-        custom = "You are a custom assistant. Today is {resolved_today}."
-        prompt = compose_system_prompt(
-            today=fixed_today, custom_system_instructions=custom
-        )
-        assert "You are a custom assistant. Today is 2025-06-01." in prompt
-        # Default block should NOT be present
-        assert "<knowledge_base_only_policy>" not in prompt
-
-    def test_provider_hints_render_with_custom_system_instructions(
-        self, fixed_today: datetime
-    ) -> None:
-        """Regression guard for the always-append decision: provider hints
-        append AFTER a custom system prompt.
-
-        Provider hints are stylistic nudges (parallel tool-call rules,
-        formatting guidance, etc.) that help the model regardless of
-        what the system instructions say. Suppressing them when a
-        custom prompt is set would partially defeat the per-family
-        prompt machinery.
-        """
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            custom_system_instructions="You are a custom assistant.",
-            model_name="anthropic/claude-3-5-sonnet",
-        )
-        assert "You are a custom assistant." in prompt
-        assert "<provider_hints>" in prompt
-        # The custom prompt must come BEFORE the provider hints so the
-        # user's framing isn't drowned out by the stylistic nudges.
-        assert prompt.index("You are a custom assistant.") < prompt.index(
-            "<provider_hints>"
-        )
-
-    def test_use_default_false_with_no_custom_yields_no_system_block(
-        self, fixed_today: datetime
-    ) -> None:
-        prompt = compose_system_prompt(
-            today=fixed_today,
-            use_default_system_instructions=False,
-        )
-        # No system_instruction wrapper but tools/citations still emitted
-        assert "<system_instruction>" not in prompt
-        assert "<tools>" in prompt
-
-    def test_all_known_tools_have_fragments(self) -> None:
-        # Soft assertion: verify that every tool in the canonical order
-        # produces non-empty content for at least one variant.
-        for tool in ALL_TOOL_NAMES_ORDERED:
-            prompt = compose_system_prompt(
-                today=datetime(2025, 1, 1, tzinfo=UTC),
-                enabled_tool_names={tool},
-            )
-            assert tool in prompt, f"tool {tool!r} missing from composed prompt"
-
-
-class TestStableOrderingForCacheStability:
-    """Regression guard: prompt cache hit-rate depends on byte-stable prefix."""
-
-    def test_composition_is_deterministic_given_same_inputs(
-        self, fixed_today: datetime
-    ) -> None:
-        a = compose_system_prompt(
-            today=fixed_today,
-            enabled_tool_names={"web_search", "scrape_webpage"},
-            mcp_connector_tools={"X": ["x_a", "x_b"]},
-        )
-        b = compose_system_prompt(
-            today=fixed_today,
-            enabled_tool_names={
-                "scrape_webpage",
-                "web_search",
-            },  # set order shouldn't matter
-            mcp_connector_tools={"X": ["x_a", "x_b"]},
-        )
-        assert a == b
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py b/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py
index 2ac462959..9db13ea8a 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py
@@ -38,7 +38,7 @@ class TestIsProtectedSystemMessage:
         )
 
     def test_tolerates_leading_whitespace(self) -> None:
-        msg = SystemMessage(content="   \n<priority_documents>\n...")
+        msg = SystemMessage(content="   \n<workspace_tree>\n...")
         assert _is_protected_system_message(msg) is True
 
 
@@ -89,7 +89,7 @@ class TestPartitionMessages:
 
     def test_protected_system_message_preserved_even_in_summarize_half(self) -> None:
         partitioner = self._build_partitioner()
-        protected = SystemMessage(content="<priority_documents>\n...")
+        protected = SystemMessage(content="<workspace_tree>\n...")
         msgs = [
             HumanMessage(content="old human"),
             AIMessage(content="old ai"),
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
index e715a80c6..627dcb99c 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
@@ -28,7 +28,6 @@ def _clear_all(monkeypatch: pytest.MonkeyPatch) -> None:
         "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR",
         "SURFSENSE_ENABLE_SKILLS",
         "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
-        "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
         "SURFSENSE_ENABLE_ACTION_LOG",
         "SURFSENSE_ENABLE_REVERT_ROUTE",
         "SURFSENSE_ENABLE_PLUGIN_LOADER",
@@ -57,7 +56,6 @@ def test_defaults_match_shipped_agent_stack(monkeypatch: pytest.MonkeyPatch) ->
     assert flags.enable_llm_tool_selector is False
     assert flags.enable_skills is True
     assert flags.enable_specialized_subagents is True
-    assert flags.enable_kb_planner_runnable is True
     assert flags.enable_action_log is True
     assert flags.enable_revert_route is True
     assert flags.enable_plugin_loader is False
@@ -122,7 +120,6 @@ def test_each_flag_can_be_set_independently(monkeypatch: pytest.MonkeyPatch) ->
         "enable_llm_tool_selector": "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR",
         "enable_skills": "SURFSENSE_ENABLE_SKILLS",
         "enable_specialized_subagents": "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
-        "enable_kb_planner_runnable": "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
         "enable_action_log": "SURFSENSE_ENABLE_ACTION_LOG",
         "enable_revert_route": "SURFSENSE_ENABLE_REVERT_ROUTE",
         "enable_plugin_loader": "SURFSENSE_ENABLE_PLUGIN_LOADER",
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py b/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py
index 4130c9d4e..6aebee093 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py
@@ -90,8 +90,8 @@ class TestSubstituteInText:
 
 class TestResolveMentions:
     """``resolve_mentions`` resolves chip ids → virtual paths and emits
-    a ``ResolvedMentionSet`` whose id partitions feed
-    ``KnowledgePriorityMiddleware``."""
+    a ``ResolvedMentionSet`` whose id partitions feed the
+    ``search_knowledge_base`` retrieval scope."""
 
     @pytest.mark.asyncio
     async def test_returns_empty_when_no_mentions(self):
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
index 637a10704..f5d322781 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
@@ -4,9 +4,14 @@ from __future__ import annotations
 
 import pytest
 
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
     _CLEAR,
     _add_unique_reducer,
+    _citation_registry_merge_reducer,
     _dict_merge_with_tombstones_reducer,
     _initial_filesystem_state,
     _list_append_reducer,
@@ -93,6 +98,57 @@ class TestDictMergeWithTombstones:
         }
 
 
+def _kb_registry(chunk_id: int) -> CitationRegistry:
+    registry = CitationRegistry()
+    registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 1, "chunk_id": chunk_id}
+    )
+    return registry
+
+
+class TestCitationRegistryMergeReducer:
+    def test_none_left_returns_right(self):
+        right = _kb_registry(10)
+        assert _citation_registry_merge_reducer(None, right) is right
+
+    def test_none_right_returns_left(self):
+        left = _kb_registry(10)
+        assert _citation_registry_merge_reducer(left, None) is left
+
+    def test_both_none_returns_none(self):
+        assert _citation_registry_merge_reducer(None, None) is None
+
+    def test_unions_two_registries(self):
+        left = _kb_registry(10)
+        right = _kb_registry(11)
+
+        merged = _citation_registry_merge_reducer(left, right)
+
+        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+        assert chunk_ids == {10, 11}
+
+    def test_coerces_serialized_dict_update(self):
+        # The checkpointer serializes Command.update via ormsgpack before the
+        # reducer runs, so `right` can arrive as a plain dict.
+        left = _kb_registry(10)
+        right = _kb_registry(11).model_dump()
+
+        merged = _citation_registry_merge_reducer(left, right)
+
+        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+        assert chunk_ids == {10, 11}
+
+    def test_coerces_both_sides_from_dict(self):
+        left = _kb_registry(10).model_dump()
+        right = _kb_registry(11).model_dump()
+
+        merged = _citation_registry_merge_reducer(left, right)
+
+        assert isinstance(merged, CitationRegistry)
+        chunk_ids = {entry.locator["chunk_id"] for entry in merged.by_n.values()}
+        assert chunk_ids == {10, 11}
+
+
 class TestInitialFilesystemState:
     def test_default_shape(self):
         state = _initial_filesystem_state()
@@ -105,8 +161,6 @@ class TestInitialFilesystemState:
         assert state["doc_id_by_path"] == {}
         assert state["dirty_paths"] == []
         assert state["dirty_path_tool_calls"] == {}
-        assert state["kb_priority"] == []
-        assert state["kb_matched_chunk_ids"] == {}
         assert state["kb_anon_doc"] is None
         assert state["tree_version"] == 0
 
diff --git a/surfsense_backend/tests/unit/middleware/test_kb_postgres_read.py b/surfsense_backend/tests/unit/middleware/test_kb_postgres_read.py
new file mode 100644
index 000000000..8117a6bdb
--- /dev/null
+++ b/surfsense_backend/tests/unit/middleware/test_kb_postgres_read.py
@@ -0,0 +1,124 @@
+"""Unit tests for the KB read path: full-view render + anonymous-doc loading.
+
+DB-backed loads are exercised by the integration suite; here we lock the pure
+pieces — ``render_full_document`` and the anonymous-upload branch of
+``aload_document`` — which need no database.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+)
+from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
+    KBPostgresBackend,
+    render_full_document,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _backend(state: dict) -> KBPostgresBackend:
+    return KBPostgresBackend(search_space_id=1, runtime=SimpleNamespace(state=state))
+
+
+def test_render_full_document_uses_full_view_and_registers() -> None:
+    registry = CitationRegistry()
+    document = RenderableDocument(
+        title="Launch Notes",
+        source="Slack",
+        passages=[
+            RenderablePassage(
+                content="push to March 10",
+                locator={"document_id": 7, "chunk_id": 880},
+            ),
+        ],
+    )
+
+    rendered = render_full_document(document, registry)
+
+    assert '<document title="Launch Notes" source="Slack" view="full">' in rendered
+    assert "[1] push to March 10" in rendered
+    entry = registry.resolve(1)
+    assert entry is not None
+    assert entry.locator == {"document_id": 7, "chunk_id": 880}
+
+
+def test_render_full_document_reuses_search_label() -> None:
+    """A chunk already registered from search keeps its [n] on a later full read."""
+    registry = CitationRegistry()
+    n = registry.register(
+        CitationSourceType.KB_CHUNK,
+        {"document_id": 7, "chunk_id": 880},
+        {"title": "Launch Notes", "source": "Slack"},
+    )
+    document = RenderableDocument(
+        title="Launch Notes",
+        source="Slack",
+        passages=[
+            RenderablePassage(
+                content="new chunk",
+                locator={"document_id": 7, "chunk_id": 881},
+            ),
+            RenderablePassage(
+                content="push to March 10",
+                locator={"document_id": 7, "chunk_id": 880},
+            ),
+        ],
+    )
+
+    rendered = render_full_document(document, registry)
+
+    assert f"[{n}] push to March 10" in rendered
+    assert "[2] new chunk" in rendered
+
+
+def test_render_full_document_empty_falls_back_to_notice() -> None:
+    registry = CitationRegistry()
+    document = RenderableDocument(title="Empty", passages=[])
+
+    assert render_full_document(document, registry) == (
+        "(This document has no readable content.)"
+    )
+
+
+async def test_aload_document_anonymous_upload() -> None:
+    backend = _backend(
+        {
+            "kb_anon_doc": {
+                "path": "/anon_upload.md",
+                "title": "Quarterly Report",
+                "chunks": [
+                    {"chunk_id": -1, "content": "revenue grew"},
+                    {"chunk_id": -2, "content": "costs fell"},
+                ],
+            }
+        }
+    )
+
+    loaded = await backend.aload_document("/anon_upload.md")
+
+    assert loaded is not None
+    document, doc_id = loaded
+    assert doc_id is None
+    assert document.title == "Quarterly Report"
+    assert [p.locator["chunk_id"] for p in document.passages] == [-1, -2]
+    assert all(p.locator["document_id"] == -1 for p in document.passages)
+    assert all(
+        p.source_type is CitationSourceType.ANON_CHUNK for p in document.passages
+    )
+
+
+async def test_aload_document_unknown_path_returns_none() -> None:
+    backend = _backend({})
+
+    assert await backend.aload_document("/not/under/documents.md") is None
diff --git a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
deleted file mode 100644
index 027738fba..000000000
--- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
+++ /dev/null
@@ -1,689 +0,0 @@
-"""Unit tests for knowledge_search middleware helpers."""
-
-import json
-
-import pytest
-from langchain_core.messages import AIMessage, HumanMessage
-
-from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
-from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
-    build_document_xml as _build_document_xml,
-)
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    KBSearchPlan,
-    KnowledgePriorityMiddleware,
-    _normalize_optional_date_range,
-    _parse_kb_search_plan_response,
-    _render_recent_conversation,
-    _resolve_search_types,
-)
-
-pytestmark = pytest.mark.unit
-
-
-# ── _resolve_search_types ──────────────────────────────────────────────
-
-
-class TestResolveSearchTypes:
-    def test_returns_none_when_no_inputs(self):
-        assert _resolve_search_types(None, None) is None
-
-    def test_returns_none_when_both_empty(self):
-        assert _resolve_search_types([], []) is None
-
-    def test_includes_legacy_type_for_google_gmail(self):
-        result = _resolve_search_types(["GOOGLE_GMAIL_CONNECTOR"], None)
-        assert "GOOGLE_GMAIL_CONNECTOR" in result
-        assert "COMPOSIO_GMAIL_CONNECTOR" in result
-
-    def test_includes_legacy_type_for_google_drive(self):
-        result = _resolve_search_types(None, ["GOOGLE_DRIVE_FILE"])
-        assert "GOOGLE_DRIVE_FILE" in result
-        assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in result
-
-    def test_includes_legacy_type_for_google_calendar(self):
-        result = _resolve_search_types(["GOOGLE_CALENDAR_CONNECTOR"], None)
-        assert "GOOGLE_CALENDAR_CONNECTOR" in result
-        assert "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" in result
-
-    def test_no_legacy_expansion_for_unrelated_types(self):
-        result = _resolve_search_types(["FILE", "NOTE"], None)
-        assert set(result) == {"FILE", "NOTE"}
-
-    def test_combines_connectors_and_document_types(self):
-        result = _resolve_search_types(["FILE"], ["NOTE", "CRAWLED_URL"])
-        assert {"FILE", "NOTE", "CRAWLED_URL"}.issubset(set(result))
-
-    def test_deduplicates(self):
-        result = _resolve_search_types(["FILE", "FILE"], ["FILE"])
-        assert result.count("FILE") == 1
-
-
-# ── _build_document_xml ────────────────────────────────────────────────
-
-
-class TestBuildDocumentXml:
-    @pytest.fixture
-    def sample_document(self):
-        return {
-            "document_id": 42,
-            "document": {
-                "id": 42,
-                "document_type": "FILE",
-                "title": "Test Doc",
-                "metadata": {"url": "https://example.com"},
-            },
-            "chunks": [
-                {"chunk_id": 101, "content": "First chunk content"},
-                {"chunk_id": 102, "content": "Second chunk content"},
-                {"chunk_id": 103, "content": "Third chunk content"},
-            ],
-        }
-
-    def test_contains_document_metadata(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<document_id>42</document_id>" in xml
-        assert "<document_type>FILE</document_type>" in xml
-        assert "Test Doc" in xml
-
-    def test_contains_chunk_index(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<chunk_index>" in xml
-        assert "</chunk_index>" in xml
-        assert 'chunk_id="101"' in xml
-        assert 'chunk_id="102"' in xml
-        assert 'chunk_id="103"' in xml
-
-    def test_matched_chunks_flagged_in_index(self, sample_document):
-        xml = _build_document_xml(sample_document, matched_chunk_ids={101, 103})
-        lines = xml.split("\n")
-        for line in lines:
-            if 'chunk_id="101"' in line:
-                assert 'matched="true"' in line
-            if 'chunk_id="102"' in line:
-                assert 'matched="true"' not in line
-            if 'chunk_id="103"' in line:
-                assert 'matched="true"' in line
-
-    def test_chunk_content_in_document_content_section(self, sample_document):
-        xml = _build_document_xml(sample_document)
-        assert "<document_content>" in xml
-        assert "First chunk content" in xml
-        assert "Second chunk content" in xml
-        assert "Third chunk content" in xml
-
-    def test_line_numbers_in_chunk_index_are_accurate(self, sample_document):
-        """Verify that the line ranges in chunk_index actually point to the right content."""
-        xml = _build_document_xml(sample_document, matched_chunk_ids={101})
-        xml_lines = xml.split("\n")
-
-        for line in xml_lines:
-            if 'chunk_id="101"' in line and "lines=" in line:
-                import re
-
-                m = re.search(r'lines="(\d+)-(\d+)"', line)
-                assert m, f"No lines= attribute found in: {line}"
-                start, _end = int(m.group(1)), int(m.group(2))
-                target_line = xml_lines[start - 1]
-                assert "101" in target_line
-                assert "First chunk content" in target_line
-                break
-        else:
-            pytest.fail("chunk_id=101 entry not found in chunk_index")
-
-    def test_splits_into_lines_correctly(self, sample_document):
-        """Each chunk occupies exactly one line (no embedded newlines)."""
-        xml = _build_document_xml(sample_document)
-        lines = xml.split("\n")
-        chunk_lines = [
-            line for line in lines if "<![CDATA[" in line and "<chunk" in line
-        ]
-        assert len(chunk_lines) == 3
-
-
-# ── planner parsing / date normalization ───────────────────────────────
-
-
-class TestPlannerHelpers:
-    def test_parse_kb_search_plan_response_accepts_plain_json(self):
-        plan = _parse_kb_search_plan_response(
-            json.dumps(
-                {
-                    "optimized_query": "ocv meeting decisions summary",
-                    "start_date": "2026-03-01",
-                    "end_date": "2026-03-31",
-                }
-            )
-        )
-        assert plan.optimized_query == "ocv meeting decisions summary"
-        assert plan.start_date == "2026-03-01"
-        assert plan.end_date == "2026-03-31"
-
-    def test_parse_kb_search_plan_response_accepts_fenced_json(self):
-        plan = _parse_kb_search_plan_response(
-            """```json
-            {"optimized_query":"deel founders guide","start_date":null,"end_date":null}
-            ```"""
-        )
-        assert plan.optimized_query == "deel founders guide"
-        assert plan.start_date is None
-        assert plan.end_date is None
-
-    def test_normalize_optional_date_range_returns_none_when_absent(self):
-        start_date, end_date = _normalize_optional_date_range(None, None)
-        assert start_date is None
-        assert end_date is None
-
-    def test_normalize_optional_date_range_resolves_single_bound(self):
-        start_date, end_date = _normalize_optional_date_range("2026-03-01", None)
-        assert start_date is not None
-        assert end_date is not None
-        assert start_date.date().isoformat() == "2026-03-01"
-        assert end_date >= start_date
-
-
-class FakeLLM:
-    def __init__(self, response_text: str):
-        self.response_text = response_text
-        self.calls: list[dict] = []
-
-    async def ainvoke(self, messages, config=None):
-        self.calls.append({"messages": messages, "config": config})
-        return AIMessage(content=self.response_text)
-
-
-class FakeBudgetLLM:
-    def __init__(self, *, max_input_tokens: int):
-        self._max_input_tokens_value = max_input_tokens
-
-    def _get_max_input_tokens(self) -> int:
-        return self._max_input_tokens_value
-
-    def _count_tokens(self, messages) -> int:
-        # Deterministic, simple proxy for tests: count characters as tokens.
-        return sum(len(msg.get("content", "")) for msg in messages)
-
-
-class TestKnowledgePriorityMiddlewarePlanner:
-    @pytest.fixture(autouse=True)
-    def _disable_planner_runnable(self, monkeypatch):
-        # ``FakeLLM`` is a duck-typed mock; ``create_agent`` (used when the
-        # planner Runnable path is enabled) calls ``.bind()`` on the LLM,
-        # which the mock does not implement. Pin the flag off so the
-        # planner falls through to the legacy ``self.llm.ainvoke`` path
-        # these tests assert against (``llm.calls[0]["config"]``).
-        monkeypatch.setenv("SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", "false")
-
-    def test_render_recent_conversation_prefers_latest_messages_under_budget(self):
-        messages = [
-            HumanMessage(content="old user context " * 40),
-            AIMessage(content="old assistant answer " * 35),
-            HumanMessage(content="recent user context " * 20),
-            AIMessage(content="recent assistant answer " * 18),
-            HumanMessage(content="latest question"),
-        ]
-
-        rendered = _render_recent_conversation(
-            messages,
-            llm=FakeBudgetLLM(max_input_tokens=900),
-            user_text="latest question",
-        )
-
-        assert "recent user context" in rendered
-        assert "recent assistant answer" in rendered
-        assert "latest question" not in rendered
-        assert rendered.index("recent user context") < rendered.index(
-            "recent assistant answer"
-        )
-
-    def test_render_recent_conversation_falls_back_to_legacy_without_budgeting(self):
-        messages = [
-            HumanMessage(content="message one"),
-            AIMessage(content="message two"),
-            HumanMessage(content="latest question"),
-        ]
-
-        rendered = _render_recent_conversation(
-            messages,
-            llm=None,
-            user_text="latest question",
-        )
-
-        assert "user: message one" in rendered
-        assert "assistant: message two" in rendered
-        assert "latest question" not in rendered
-
-    async def test_middleware_uses_optimized_query_and_dates(self, monkeypatch):
-        captured: dict = {}
-
-        async def fake_search_knowledge_base(**kwargs):
-            captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        llm = FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "ocv meeting decisions action items",
-                    "start_date": "2026-03-01",
-                    "end_date": "2026-03-31",
-                }
-            )
-        )
-        middleware = KnowledgePriorityMiddleware(llm=llm, search_space_id=37)
-
-        result = await middleware.abefore_agent(
-            {
-                "messages": [
-                    HumanMessage(content="what happened in our OCV meeting last month?")
-                ]
-            },
-            runtime=None,
-        )
-
-        assert result is not None
-        assert captured["query"] == "ocv meeting decisions action items"
-        assert captured["start_date"] is not None
-        assert captured["end_date"] is not None
-        assert captured["start_date"].date().isoformat() == "2026-03-01"
-        assert captured["end_date"].date().isoformat() == "2026-03-31"
-        assert llm.calls[0]["config"] == {"tags": ["surfsense:internal"]}
-
-    async def test_middleware_falls_back_when_planner_returns_invalid_json(
-        self,
-        monkeypatch,
-    ):
-        captured: dict = {}
-
-        async def fake_search_knowledge_base(**kwargs):
-            captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=FakeLLM("not json"),
-            search_space_id=37,
-        )
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="summarize founders guide by deel")]},
-            runtime=None,
-        )
-
-        assert captured["query"] == "summarize founders guide by deel"
-        assert captured["start_date"] is None
-        assert captured["end_date"] is None
-
-    async def test_middleware_passes_none_dates_when_planner_returns_nulls(
-        self,
-        monkeypatch,
-    ):
-        captured: dict = {}
-
-        async def fake_search_knowledge_base(**kwargs):
-            captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=FakeLLM(
-                json.dumps(
-                    {
-                        "optimized_query": "deel founders guide summary",
-                        "start_date": None,
-                        "end_date": None,
-                    }
-                )
-            ),
-            search_space_id=37,
-        )
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="summarize founders guide by deel")]},
-            runtime=None,
-        )
-
-        assert captured["query"] == "deel founders guide summary"
-        assert captured["start_date"] is None
-        assert captured["end_date"] is None
-
-    async def test_middleware_routes_to_recency_browse_when_flagged(
-        self,
-        monkeypatch,
-    ):
-        """When the planner sets is_recency_query=true, browse_recent_documents
-        is called instead of search_knowledge_base."""
-        browse_captured: dict = {}
-        search_called = False
-
-        async def fake_browse_recent_documents(**kwargs):
-            browse_captured.update(kwargs)
-            return []
-
-        async def fake_search_knowledge_base(**kwargs):
-            nonlocal search_called
-            search_called = True
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "browse_recent_documents",
-            fake_browse_recent_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        llm = FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "latest uploaded file",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": True,
-                }
-            )
-        )
-        middleware = KnowledgePriorityMiddleware(llm=llm, search_space_id=42)
-
-        result = await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="what's my latest file?")]},
-            runtime=None,
-        )
-
-        assert result is not None
-        assert browse_captured["search_space_id"] == 42
-        assert not search_called
-
-    async def test_middleware_uses_hybrid_search_when_not_recency(
-        self,
-        monkeypatch,
-    ):
-        """When is_recency_query is false (default), hybrid search is used."""
-        search_captured: dict = {}
-        browse_called = False
-
-        async def fake_browse_recent_documents(**kwargs):
-            nonlocal browse_called
-            browse_called = True
-            return []
-
-        async def fake_search_knowledge_base(**kwargs):
-            search_captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "browse_recent_documents",
-            fake_browse_recent_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        llm = FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "quarterly revenue report analysis",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": False,
-                }
-            )
-        )
-        middleware = KnowledgePriorityMiddleware(llm=llm, search_space_id=42)
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="find the quarterly revenue report")]},
-            runtime=None,
-        )
-
-        assert search_captured["query"] == "quarterly revenue report analysis"
-        assert not browse_called
-
-
-# ── KBSearchPlan schema ────────────────────────────────────────────────
-
-
-class TestKBSearchPlanSchema:
-    def test_is_recency_query_defaults_to_false(self):
-        plan = KBSearchPlan(optimized_query="test query")
-        assert plan.is_recency_query is False
-
-    def test_is_recency_query_parses_true(self):
-        plan = _parse_kb_search_plan_response(
-            json.dumps(
-                {
-                    "optimized_query": "latest uploaded file",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": True,
-                }
-            )
-        )
-        assert plan.is_recency_query is True
-        assert plan.optimized_query == "latest uploaded file"
-
-    def test_missing_is_recency_query_defaults_to_false(self):
-        plan = _parse_kb_search_plan_response(
-            json.dumps(
-                {
-                    "optimized_query": "meeting notes",
-                    "start_date": None,
-                    "end_date": None,
-                }
-            )
-        )
-        assert plan.is_recency_query is False
-
-
-# ── mentioned_document_ids cross-turn drain ────────────────────────────
-
-
-class TestKnowledgePriorityMentionDrain:
-    """Regression tests for the cross-turn ``mentioned_document_ids`` drain.
-
-    The compiled-agent cache reuses a single :class:`KnowledgePriorityMiddleware`
-    instance across turns of the same thread. ``mentioned_document_ids``
-    can therefore enter the middleware via two paths:
-
-    1. The constructor closure (``__init__(mentioned_document_ids=...)``) —
-       seeded by the cache-miss build on turn 1.
-    2. ``runtime.context.mentioned_document_ids`` — supplied freshly per
-       turn by the streaming task.
-
-    Without the drain fix, an empty ``runtime.context.mentioned_document_ids``
-    on turn 2 would fall through to the closure (because ``[]`` is falsy in
-    Python) and replay turn 1's mentions. This class pins down the
-    correct behaviour: the runtime path is authoritative even when empty,
-    and the closure is drained the first time the runtime path fires so
-    no later turn can ever resurrect stale state.
-    """
-
-    @staticmethod
-    def _make_runtime(mention_ids: list[int]):
-        """Minimal runtime stub exposing only ``runtime.context.mentioned_document_ids``."""
-        from types import SimpleNamespace
-
-        return SimpleNamespace(
-            context=SimpleNamespace(mentioned_document_ids=mention_ids),
-        )
-
-    @staticmethod
-    def _planner_llm() -> "FakeLLM":
-        # Planner returns a stable, non-recency plan so we always land in
-        # the hybrid-search branch (where ``fetch_mentioned_documents`` is
-        # invoked alongside the main search).
-        return FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "follow up question",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": False,
-                }
-            )
-        )
-
-    async def test_runtime_context_overrides_closure_and_drains_it(self, monkeypatch):
-        """Turn 1 with mentions in BOTH closure and runtime context: the
-        runtime path wins AND the closure is drained so a future turn
-        cannot replay it.
-        """
-        fetched_ids: list[list[int]] = []
-
-        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
-            fetched_ids.append(list(document_ids))
-            return []
-
-        async def fake_search_knowledge_base(**_kwargs):
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "fetch_mentioned_documents",
-            fake_fetch_mentioned_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=self._planner_llm(),
-            search_space_id=42,
-            mentioned_document_ids=[1, 2, 3],
-        )
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="what is in those docs?")]},
-            runtime=self._make_runtime([1, 2, 3]),
-        )
-
-        assert fetched_ids == [[1, 2, 3]], (
-            "runtime.context mentions must be the source of truth on turn 1"
-        )
-        assert middleware.mentioned_document_ids == [], (
-            "closure must be drained the first time the runtime path fires "
-            "so no later turn can replay stale mentions"
-        )
-
-    async def test_empty_runtime_context_does_not_replay_closure_mentions(
-        self, monkeypatch
-    ):
-        """Regression: turn 2 with NO mentions must not surface turn 1's
-        mentions from the constructor closure.
-
-        Before the fix, ``if ctx_mentions:`` treated an empty list as
-        absent and fell through to ``elif self.mentioned_document_ids:``,
-        replaying turn 1's mentions. This test pins down the corrected
-        behaviour.
-        """
-        fetched_ids: list[list[int]] = []
-
-        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
-            fetched_ids.append(list(document_ids))
-            return []
-
-        async def fake_search_knowledge_base(**_kwargs):
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "fetch_mentioned_documents",
-            fake_fetch_mentioned_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        # Simulate a cached middleware instance whose closure was seeded
-        # by a previous turn's cache-miss build (mentions=[1,2,3]).
-        middleware = KnowledgePriorityMiddleware(
-            llm=self._planner_llm(),
-            search_space_id=42,
-            mentioned_document_ids=[1, 2, 3],
-        )
-
-        # Turn 2: streaming task supplies an EMPTY mention list (no
-        # mentions on this follow-up turn).
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="what about the next steps?")]},
-            runtime=self._make_runtime([]),
-        )
-
-        assert fetched_ids == [], (
-            "fetch_mentioned_documents must NOT be called when the runtime "
-            "context says there are no mentions for this turn"
-        )
-
-    async def test_legacy_path_fires_only_when_runtime_context_absent(
-        self, monkeypatch
-    ):
-        """Backward-compat: if a caller doesn't supply runtime.context (old
-        non-streaming code path), the closure-injected mentions are still
-        honoured exactly once and then drained.
-        """
-        fetched_ids: list[list[int]] = []
-
-        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
-            fetched_ids.append(list(document_ids))
-            return []
-
-        async def fake_search_knowledge_base(**_kwargs):
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "fetch_mentioned_documents",
-            fake_fetch_mentioned_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=self._planner_llm(),
-            search_space_id=42,
-            mentioned_document_ids=[7, 8],
-        )
-
-        # First call: no runtime → legacy path uses the closure.
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="initial question")]},
-            runtime=None,
-        )
-        # Second call: still no runtime — closure already drained, so no replay.
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="follow up")]},
-            runtime=None,
-        )
-
-        assert fetched_ids == [[7, 8]], (
-            "legacy path must honour the closure exactly once and then drain it"
-        )
-        assert middleware.mentioned_document_ids == []
diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/flows/shared/test_assistant_finalize_citations.py b/surfsense_backend/tests/unit/tasks/chat/streaming/flows/shared/test_assistant_finalize_citations.py
new file mode 100644
index 000000000..437cbc528
--- /dev/null
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/flows/shared/test_assistant_finalize_citations.py
@@ -0,0 +1,85 @@
+"""Behavior tests for finalize-time citation resolution.
+
+The finalize step is the single server-side seam that turns the model's bare
+``[n]`` ordinals into renderer-ready ``[citation:<payload>]`` markers, using the
+registry captured from the run's final state. These tests pin that contract:
+known ordinals resolve, unknown ones drop, foreign markers survive, and a
+serialized (dict) registry is accepted just like a live one.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.tasks.chat.streaming.flows.shared.assistant_finalize import _resolve_citations
+
+
+def _registry_with_chunk(chunk_id: int = 42) -> CitationRegistry:
+    registry = CitationRegistry()
+    registry.register(
+        CitationSourceType.KB_CHUNK, {"document_id": 1, "chunk_id": chunk_id}
+    )
+    return registry
+
+
+def _text(value: str) -> list[dict]:
+    return [{"type": "text", "text": value}]
+
+
+def test_known_ordinal_resolves_to_chunk_marker():
+    payload = _resolve_citations(
+        _text("Launch is March 10 [1]."), _registry_with_chunk(42)
+    )
+
+    assert payload[0]["text"] == "Launch is March 10 [citation:42]."
+
+
+def test_unknown_ordinal_is_dropped():
+    payload = _resolve_citations(
+        _text("Unsupported claim [9]."), _registry_with_chunk(42)
+    )
+
+    assert payload[0]["text"] == "Unsupported claim ."
+
+
+def test_foreign_citation_marker_is_preserved():
+    payload = _resolve_citations(
+        _text("From the web [citation:https://example.com]."),
+        _registry_with_chunk(42),
+    )
+
+    assert payload[0]["text"] == "From the web [citation:https://example.com]."
+
+
+def test_serialized_registry_is_accepted():
+    serialized = _registry_with_chunk(7).model_dump()
+
+    payload = _resolve_citations(_text("See [1]."), serialized)
+
+    assert payload[0]["text"] == "See [citation:7]."
+
+
+def test_empty_registry_leaves_text_untouched():
+    payload = _resolve_citations(_text("No sources here [1]."), CitationRegistry())
+
+    assert payload[0]["text"] == "No sources here [1]."
+
+
+def test_missing_registry_is_a_noop():
+    payload = _resolve_citations(_text("Nothing to resolve [1]."), None)
+
+    assert payload[0]["text"] == "Nothing to resolve [1]."
+
+
+def test_non_text_parts_are_left_alone():
+    parts = [
+        {"type": "tool_call", "name": "search_knowledge_base", "args": {"q": "[1]"}},
+        {"type": "text", "text": "Result [1]."},
+    ]
+
+    payload = _resolve_citations(parts, _registry_with_chunk(5))
+
+    assert payload[0]["args"]["q"] == "[1]"
+    assert payload[1]["text"] == "Result [citation:5]."
diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx
index fd7be1a23..bc31dffed 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx
@@ -125,12 +125,6 @@ const FLAG_GROUPS: FlagGroup[] = [
 				description: "Spin up explore / report_writer / connector_negotiator subagents.",
 				envVar: "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
 			},
-			{
-				key: "enable_kb_planner_runnable",
-				label: "KB planner runnable",
-				description: "Compile a private planner sub-agent for KB search.",
-				envVar: "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
-			},
 		],
 	},
 	{
diff --git a/surfsense_web/lib/apis/agent-flags-api.service.ts b/surfsense_web/lib/apis/agent-flags-api.service.ts
index 534810c0e..5895d9924 100644
--- a/surfsense_web/lib/apis/agent-flags-api.service.ts
+++ b/surfsense_web/lib/apis/agent-flags-api.service.ts
@@ -19,7 +19,6 @@ const AgentFeatureFlagsSchema = z.object({
 
 	enable_skills: z.boolean(),
 	enable_specialized_subagents: z.boolean(),
-	enable_kb_planner_runnable: z.boolean(),
 
 	enable_action_log: z.boolean(),
 	enable_revert_route: z.boolean(),