Merge pull request #1539 from CREDO23/improve-chat-agent-context-and-citations

[FEAT] Unified [n] citation registry for KB + web, pull-based retrieval
2026-06-26 21:39:43 +02:00 · 2026-06-25 13:34:52 -07:00 · 2026-06-25 13:34:52 -07:00 · 94fdb8a113
commit 94fdb8a113
parent 5a6ea29610 232cc937c5
160 changed files with 4097 additions and 5238 deletions
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -433,14 +433,6 @@ LANGSMITH_PROJECT=surfsense
 # Skills + subagents
 # SURFSENSE_ENABLE_SKILLS=false
 # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
-# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
-
-# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
-# KB content on demand via the `search_knowledge_base` tool and skips the
-# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
-# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
-# restore the original eager `<priority_documents>` pre-injection.
-# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false

 # Snapshot / revert
 # SURFSENSE_ENABLE_ACTION_LOG=false
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
@ -6,8 +6,6 @@ read-only). This middleware loads it once on the first turn into

 * :class:`KnowledgeTreeMiddleware` can render the synthetic ``/documents``
  view without touching the DB.
-* :class:`KnowledgePriorityMiddleware` skips hybrid search and emits a
-  degenerate priority list.
 * :class:`KBPostgresBackend` (``als_info`` / ``aread`` / ``_load_file_data``)
  recognises the synthetic path.

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/knowledge_priority.py
@ -1,42 +0,0 @@
-"""KB priority planner: <priority_documents> injection."""
-
-from __future__ import annotations
-
-from langchain_core.language_models import BaseChatModel
-
-from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    KnowledgePriorityMiddleware,
-)
-from app.services.llm_service import get_planner_llm
-
-
-def build_knowledge_priority_mw(
-    *,
-    llm: BaseChatModel,
-    search_space_id: int,
-    filesystem_mode: FilesystemMode,
-    available_connectors: list[str] | None,
-    available_document_types: list[str] | None,
-    mentioned_document_ids: list[int] | None,
-    preinjection_enabled: bool = True,
-) -> KnowledgePriorityMiddleware:
-    """Build the KB priority middleware.
-
-    When ``preinjection_enabled`` is False (the lazy default), the middleware
-    runs in mentions-only mode: it skips the expensive planner LLM + embedding
-    + hybrid search and only surfaces explicit @-mentions. The main agent is
-    expected to pull relevant KB content on demand via the
-    ``search_knowledge_base`` tool instead.
-    """
-    return KnowledgePriorityMiddleware(
-        llm=llm,
-        planner_llm=get_planner_llm(),
-        search_space_id=search_space_id,
-        filesystem_mode=filesystem_mode,
-        available_connectors=available_connectors,
-        available_document_types=available_document_types,
-        mentioned_document_ids=mentioned_document_ids,
-        inject_system_message=False,
-        mentions_only=not preinjection_enabled,
-    )
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/stack.py
@ -1,10 +1,11 @@
 """Main-agent middleware list assembly: one line per slot.

 The main agent is a pure router — filesystem reads/writes are owned by the
-``knowledge_base`` subagent and delegated via the ``task`` tool. The stack
-here only renders KB context (workspace tree + priority docs), projects it
-into system messages, and commits any subagent-side staged writes at end of
-turn (cloud mode).
+``knowledge_base`` subagent and delegated via the ``task`` tool. Knowledge-base
+retrieval is pull-based: the ``search_knowledge_base`` tool runs the hybrid
+search on demand and renders ``<retrieved_context>`` with ``[n]`` citation
+labels. The stack here computes the workspace tree, commits any subagent-side
+staged writes at end of turn (cloud mode), and wires the supporting middleware.
 """

 from __future__ import annotations
@ -33,9 +34,6 @@ from app.agents.chat.multi_agent_chat.shared.middleware.anthropic_cache import (
 from app.agents.chat.multi_agent_chat.shared.middleware.compaction import (
    build_compaction_mw,
 )
-from app.agents.chat.multi_agent_chat.shared.middleware.kb_context_projection import (
-    build_kb_context_projection_mw,
-)
 from app.agents.chat.multi_agent_chat.shared.middleware.patch_tool_calls import (
    build_patch_tool_calls_mw,
 )
@ -84,7 +82,6 @@ from .context_editing import build_context_editing_mw
 from .dedup_hitl import build_dedup_hitl_mw
 from .doom_loop import build_doom_loop_mw
 from .kb_persistence import build_kb_persistence_mw
-from .knowledge_priority import build_knowledge_priority_mw
 from .knowledge_tree import build_knowledge_tree_mw
 from .noop_injection import build_noop_injection_mw
 from .otel_span import build_otel_mw
@ -237,16 +234,6 @@ def build_main_agent_deepagent_middleware(
            search_space_id=search_space_id,
            llm=llm,
        ),
-        build_knowledge_priority_mw(
-            llm=llm,
-            search_space_id=search_space_id,
-            filesystem_mode=filesystem_mode,
-            available_connectors=available_connectors,
-            available_document_types=available_document_types,
-            mentioned_document_ids=mentioned_document_ids,
-            preinjection_enabled=flags.enable_kb_priority_preinjection,
-        ),
-        build_kb_context_projection_mw(),
        build_kb_persistence_mw(
            filesystem_mode=filesystem_mode,
            search_space_id=search_space_id,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/skills/builtin/kb-research/SKILL.md
@ -15,7 +15,7 @@ allowed-tools: scrape_webpage, read_file, ls_tree, grep, web_search
 1. Decompose the user's question into 2-4 specific, citation-worthy sub-questions.
 2. For each sub-question, run **one** targeted KB search (focused on terms the user would have written, not synonyms). Open the most relevant 2-3 documents fully via `read_file` if their excerpts are too short.
 3. Use `grep` to find supporting passages in long files instead of re-reading them end to end.
-4. Cite every claim with `[citation:chunk_id]` exactly as the chunk tag specifies.
+4. Cite every claim with the `[n]` label shown on the passage you used (search results and `read_file` output both carry them); never write a chunk id, URL, or title yourself.

 ## What good output looks like
 - Short paragraphs with inline citations.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/off.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/off.md
@ -1,12 +1,13 @@
 <citations>
 Citation markers are **disabled** in this configuration.

-Do NOT include `[citation:…]` markers anywhere, even if tool descriptions or
+Do NOT include `[n]` citation labels or `[citation:…]` markers anywhere, even if
+tool output (`<retrieved_context>`, `<web_results>`), tool descriptions, or
 examples reference them. Ignore citation-format reminders elsewhere in this
 prompt when they conflict with this block.

 1. Answer in plain prose. Optional markdown links to public URLs when
   sources are URLs.
 2. Do not expose raw chunk ids, document ids, or internal ids to the user.
-3. Present KB or docs facts naturally without attribution markers.
+3. Present KB, web, or docs facts naturally without attribution markers.
 </citations>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@ -1,42 +1,16 @@
 <citations>
-Citations reach the answer through two channels. Use whichever applies — and
-never invent ids you didn't see. Citation ids are resolved by exact-match
-lookup; a wrong id silently breaks the link, so when in doubt, omit.
+Cite with one token: the bracket label `[n]`. Every citable result —
+`search_knowledge_base` passages, `web_search` results, and prose from a
+`task` knowledge_base/research specialist — already carries `[n]` labels on a
+single shared count. Those labels are the only citation you write; the server
+resolves each one back to its source after the turn.

-### Channel A — chunk blocks injected this turn
-When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
-turn:
-
-1. For each factual statement taken from those chunks, add
-   `[citation:chunk_id]` using the **exact** id from a visible
-   `<chunk id='…'>` tag. Copy digit-for-digit (or the URL verbatim);
-   do not retype from memory.
-2. `<document_id>` is the parent doc id, **not** a citation source —
-   only ids inside `<chunk id='…'>` count.
-3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated,
-   each id copied individually).
-4. Never invent, normalise, or guess at adjacent ids; if unsure, omit.
-5. Plain brackets only — no markdown links, no footnote numbering.
-
-### Channel B — citations relayed by a `task` specialist
-A `task(...)` tool message may contain `[citation:<chunk_id>]` markers
-the specialist already attached to its prose. The specialist saw the
-underlying `<chunk id='…'>` blocks; you didn't. So:
-
-1. **Preserve those markers verbatim** in your final answer — do not
-   reformat, renumber, drop, or wrap them in markdown links. When you
-   paraphrase a specialist sentence, copy the marker character-for-
-   character; do not regenerate the id from memory (LLMs reliably
-   corrupt nearby digits).
-2. Keep each marker attached to the sentence the specialist attached
-   it to.
-3. Do **not** add new `[citation:…]` markers of your own to a
-   specialist's prose; if a fact has no marker, the specialist
-   couldn't tie it to a chunk and neither can you.
-4. When a specialist returns JSON, the citation markers live inside
-   the prose-bearing fields (e.g. a summary or excerpt). Pull them
-   along with the surrounding sentence when you quote.
-
-If neither channel surfaces citation markers this turn, do not fabricate
-them.
+1. Put the label right after the claim it supports.
+2. Several sources for one claim: stack brackets, `[1][2]`.
+3. Copy labels exactly as shown, a specialist's included — never renumber them,
+   add your own, or write the underlying title, date, id, or URL instead.
+4. Write the bare `[n]` and nothing else: no `[citation:...]`, no markdown links,
+   no footnote marks, no "References" section.
+5. Only label claims the sources support. If nothing shown backs a claim — or you
+   never saw a label — leave it uncited; never invent one.
 </citations>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
@ -8,20 +8,14 @@ standing instructions. It also reports current character usage versus the
 hard limit so you can manage the budget. Treat it as background colour for
 your answer, not as the task itself.

-`<priority_documents>` lists the workspace documents most relevant to the
-latest user message, ranked by relevance score, with `[USER-MENTIONED]`
-flagged on anything the user explicitly referenced. When the task is about
-workspace content, read these first; matched passages inside each document
-are flagged via `<chunk_index>` so you can jump straight to them.
-
 `<workspace_tree>` shows the full `/documents/` folder and file layout. Use
 it to resolve paths the user describes in natural language ("my Q2 roadmap",
 "last week's meeting notes") into concrete document references before
 delegating to a specialist.

-`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
-by KB search (backing `<priority_documents>`). Each chunk carries a stable
-`id` attribute.
+`<retrieved_context>` blocks hold knowledge-base passages from
+`search_knowledge_base`; each `<document>` inside is in excerpt view and every
+passage is prefixed with an `[n]` citation label.

 If a block doesn't appear this turn, work from the conversation alone.
 </dynamic_context>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
@ -7,21 +7,14 @@ decisions, conventions, architecture notes, processes, key facts. It also
 reports current character usage versus the hard limit so you can manage the
 budget. Treat it as background colour for your answer, not as the task itself.

-`<priority_documents>` lists the workspace documents most relevant to the
-latest user message, ranked by relevance score, with `[USER-MENTIONED]`
-flagged on anything someone in the thread explicitly referenced. When the
-task is about workspace content, read these first; matched passages inside
-each document are flagged via `<chunk_index>` so you can jump straight to
-them.
-
 `<workspace_tree>` shows the full `/documents/` folder and file layout. Use
 it to resolve paths described in natural language ("the Q2 roadmap", "last
 week's planning notes") into concrete document references before delegating
 to a specialist.

-`<document>` and `<chunk id='…'>` blocks are chunked indexed content returned
-by KB search (backing `<priority_documents>`). Each chunk carries a stable
-`id` attribute.
+`<retrieved_context>` blocks hold knowledge-base passages from
+`search_knowledge_base`; each `<document>` inside is in excerpt view and every
+passage is prefixed with an `[n]` citation label.

 If a block doesn't appear this turn, work from the conversation alone.
 </dynamic_context>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
@ -14,5 +14,5 @@ Workflow (Understand → Plan → Act → Verify):

 Discipline:
 - Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>` or `<priority_documents>`. Otherwise describe the document in natural language and let the subagent resolve it.
+- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>`. Otherwise describe the document in natural language and let the subagent resolve it.
 </provider_hints>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/grok.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/grok.md
@ -8,8 +8,8 @@ Tool discipline:
 - Typically one investigative tool per turn unless several independent read-only queries are clearly needed; don’t repeat identical calls.

 Attribution:
- When citations are **enabled** (see citation block above) and you answer from chunk-tagged documents, use `[citation:chunk_id]` exactly as specified there.
- When citations are **disabled**, never emit `[citation:…]` — plain prose and links per tool guidance.
+- When citations are **enabled** (see citation block above) and you answer from labelled passages, cite with the bare `[n]` label exactly as specified there.
+- When citations are **disabled**, never emit `[n]` or `[citation:…]` — plain prose and links per tool guidance.

 Style:
 - No emojis unless asked; flat lists for short answers.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_codex.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/openai_codex.md
@ -3,7 +3,7 @@ You are running on an OpenAI Codex-class model (SurfSense **main agent**).

 Output style:
 - Concise; don’t paste huge fetch blobs — summarize.
- When citations are **enabled** and you rely on chunk-tagged docs, references may use `[citation:chunk_id]` per the citation block above; when **disabled**, use prose and URLs only.
+- When citations are **enabled** and you rely on labelled passages, cite with the bare `[n]` label per the citation block above; when **disabled**, use prose and URLs only.
 - Numbered lists work well when the user should reply with a single option index.
 - No emojis; single-level bullets.

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/web_search/description.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/tools/web_search/description.md
@ -4,7 +4,10 @@
    facts, anything outside SurfSense docs and the workspace KB. Reach for
    it whenever freshness matters or you'd otherwise guess from memory.
  - Don't refuse with "I lack network access" — call the tool.
+  - Returns a `<web_results>` block: each result is labelled `[n]`. Cite a
+    result by writing that `[n]` after the statement it supports (when
+    citations are enabled) — do not hand-write the URL as a markdown link.
  - If results are thin, say so and offer to refine the query.
  - Args: `query`, `top_k` (default 10, max 50).
  - Follow up with `scrape_webpage` on the best URL when snippets are too
-    shallow. Present sources with `[label](url)` markdown links.
+    shallow.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
@ -1,12 +1,11 @@
-"""On-demand ``search_knowledge_base`` main-agent tool (OpenCode-style lazy RAG).
+"""On-demand ``search_knowledge_base`` main-agent tool (citation-spine RAG).

-The main agent no longer receives eagerly pre-injected KB context on every
-turn (see :class:`KnowledgePriorityMiddleware`, now gated off by default).
-Instead it calls this tool only when it decides it needs knowledge-base
-content. The tool runs a single hybrid search (embed + DB search, ~0.5s),
-formats the top matches for the model, and writes ``kb_matched_chunk_ids``
-into graph state so matched-section highlighting is preserved when the agent
-later reads a document via ``task(knowledge_base)``.
+The main agent calls this when it decides it needs knowledge-base content. The
+tool runs one hybrid search, renders the matched passages as a
+``<retrieved_context>`` block whose passages carry server-assigned ``[n]``
+labels, and persists the conversation's ``CitationRegistry`` onto graph state so
+the ``[n]`` -> ``[citation:<payload>]`` normalizer can resolve them after the
+turn.
 """

 from __future__ import annotations
@ -18,153 +17,70 @@ from langchain.tools import ToolRuntime
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
-from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession

-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    search_knowledge_base as _hybrid_search_kb,
+from app.agents.chat.multi_agent_chat.shared.citations import load_registry
+from app.agents.chat.multi_agent_chat.shared.retrieval import SearchScope, build_context
+from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
+    search_chunks,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
    SurfSenseFilesystemState,
 )
-from app.agents.chat.runtime.path_resolver import (
-    PathIndex,
-    build_path_index,
-    doc_to_virtual_path,
-)
-from app.db import Document, shielded_async_session
+from app.agents.chat.runtime.references import referenced_document_ids
+from app.db import shielded_async_session
 from app.utils.perf import get_perf_logger

 _perf_log = get_perf_logger()

 _DEFAULT_TOP_K = 5
 _MAX_TOP_K = 20
-_PER_DOC_SNIPPET_CHARS = 1200
-_MAX_TOTAL_CHARS = 16_000

 _TOOL_DESCRIPTION = (
    "Search the user's knowledge base (their indexed documents, files, and "
    "connector content) for passages relevant to a query, using hybrid "
    "semantic + keyword retrieval.\n\n"
    "Use this FIRST to ground any factual or informational answer about the "
-    "user's own documents, notes, or connected sources. The workspace tree "
-    "shows which files exist; this tool pulls the actual relevant content. "
-    "Each hit returns the document's virtual path, a relevance score, and the "
-    "matched snippets. If you need a document's full text, delegate a read to "
-    "the knowledge_base specialist via `task` using the returned path.\n\n"
+    "user's own documents, notes, or connected sources. It returns a "
+    "<retrieved_context> block: each matched passage is labelled [n]. Cite a "
+    "passage by writing that [n] after the statement it supports.\n\n"
    "Write a focused, specific query containing the concrete entities, "
    "acronyms, people, projects, or terms you are looking for."
 )


-async def _resolve_virtual_paths(
-    results: list[dict[str, Any]],
+def _search_types(
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> tuple[str, ...] | None:
+    """Merge connector + document-type filters into a scope; ``None`` if unrestricted."""
+    types: set[str] = set()
+    if available_document_types:
+        types.update(available_document_types)
+    if available_connectors:
+        types.update(available_connectors)
+    return tuple(sorted(types)) or None
+
+
+async def _build_search_scope(
+    session: AsyncSession,
    *,
    search_space_id: int,
-) -> dict[int, str]:
-    """Resolve ``Document.id`` -> canonical virtual path for the search hits."""
-    doc_ids = [
-        doc_id
-        for doc_id in (
-            (doc.get("document") or {}).get("id")
-            for doc in results
-            if isinstance(doc, dict)
-        )
-        if isinstance(doc_id, int)
-    ]
-    if not doc_ids:
-        return {}
-
-    async with shielded_async_session() as session:
-        index: PathIndex = await build_path_index(session, search_space_id)
-        folder_rows = await session.execute(
-            select(Document.id, Document.folder_id).where(
-                Document.search_space_id == search_space_id,
-                Document.id.in_(doc_ids),
-            )
-        )
-        folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
-
-    paths: dict[int, str] = {}
-    for doc in results:
-        doc_meta = doc.get("document") or {}
-        doc_id = doc_meta.get("id")
-        if not isinstance(doc_id, int):
-            continue
-        folder_id = folder_by_doc_id.get(doc_id, doc_meta.get("folder_id"))
-        paths[doc_id] = doc_to_virtual_path(
-            doc_id=doc_id,
-            title=str(doc_meta.get("title") or "untitled"),
-            folder_id=folder_id if isinstance(folder_id, int) else None,
-            index=index,
-        )
-    return paths
-
-
-def _format_hits(
-    results: list[dict[str, Any]],
-    *,
-    paths: dict[int, str],
-    query: str,
-) -> str:
-    """Render search hits as a compact, model-readable block."""
-    if not results:
-        return (
-            f"No knowledge-base matches found for query: {query!r}.\n"
-            "Tell the user nothing relevant was found in their workspace, or "
-            "try a different query."
-        )
-
-    lines: list[str] = [f"<knowledge_base_results query={query!r}>"]
-    total = len(lines[0])
-    for rank, doc in enumerate(results, start=1):
-        doc_meta = doc.get("document") or {}
-        doc_id = doc_meta.get("id")
-        title = str(doc_meta.get("title") or "untitled")
-        doc_type = doc_meta.get("document_type") or doc.get("source") or "document"
-        score = doc.get("score")
-        score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
-        path = paths.get(doc_id) if isinstance(doc_id, int) else None
-
-        header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
-            f"\n   path: {path}" if path else ""
-        )
-
-        content = (doc.get("content") or "").strip()
-        if content:
-            snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
-            if len(content) > _PER_DOC_SNIPPET_CHARS:
-                snippet += " ..."
-            body = "\n   " + snippet.replace("\n", "\n   ")
-        else:
-            body = "\n   (no preview available; read the document for details)"
-
-        entry = header + body
-        if total + len(entry) > _MAX_TOTAL_CHARS:
-            lines.append("\n<!-- additional matches truncated to fit context -->")
-            break
-        lines.append(entry)
-        total += len(entry)
-
-    lines.append(
-        "\n\nTo read a full document, delegate to the knowledge_base specialist "
-        "with `task`, referencing the path above."
+    document_types: tuple[str, ...] | None,
+    runtime: ToolRuntime[None, SurfSenseFilesystemState],
+) -> SearchScope:
+    """Assemble the retrieval scope: workspace document-type filter + @-mention pins."""
+    ctx = getattr(runtime, "context", None)
+    document_ids = await referenced_document_ids(
+        session,
+        search_space_id=search_space_id,
+        document_ids=getattr(ctx, "mentioned_document_ids", None),
+        folder_ids=getattr(ctx, "mentioned_folder_ids", None),
+    )
+    return SearchScope(
+        document_types=document_types,
+        document_ids=document_ids or None,
    )
-    lines.append("\n</knowledge_base_results>")
-    return "".join(lines)
-
-
-def _matched_chunk_ids(results: list[dict[str, Any]]) -> dict[int, list[int]]:
-    """Extract ``Document.id`` -> matched chunk ids for state hand-off."""
-    matched: dict[int, list[int]] = {}
-    for doc in results:
-        doc_id = (doc.get("document") or {}).get("id")
-        if not isinstance(doc_id, int):
-            continue
-        chunk_ids = doc.get("matched_chunk_ids") or []
-        normalized = [int(cid) for cid in chunk_ids if isinstance(cid, int | str)]
-        if normalized:
-            matched[doc_id] = normalized
-    return matched


 def create_search_knowledge_base_tool(
@ -176,8 +92,7 @@ def create_search_knowledge_base_tool(
    """Factory for the on-demand ``search_knowledge_base`` tool."""

    _space_id = search_space_id
-    _connectors = available_connectors
-    _doc_types = available_document_types
+    _document_types = _search_types(available_connectors, available_document_types)

    async def _impl(
        query: Annotated[
@ -195,34 +110,45 @@ def create_search_knowledge_base_tool(
            return "Error: provide a non-empty search query."

        clamped_top_k = min(max(1, top_k), _MAX_TOP_K)
-        t0 = time.perf_counter()
-        results = await _hybrid_search_kb(
-            query=cleaned_query,
-            search_space_id=_space_id,
-            available_connectors=_connectors,
-            available_document_types=_doc_types,
-            top_k=clamped_top_k,
-        )
+        registry = load_registry(getattr(runtime, "state", None))

-        paths = await _resolve_virtual_paths(results, search_space_id=_space_id)
-        rendered = _format_hits(results, paths=paths, query=cleaned_query)
-        matched = _matched_chunk_ids(results)
+        t0 = time.perf_counter()
+        async with shielded_async_session() as session:
+            scope = await _build_search_scope(
+                session,
+                search_space_id=_space_id,
+                document_types=_document_types,
+                runtime=runtime,
+            )
+            hits = await search_chunks(
+                session,
+                search_space_id=_space_id,
+                query=cleaned_query,
+                scope=scope,
+                top_k=clamped_top_k,
+            )
+            rendered = build_context(cleaned_query, hits, registry)

        _perf_log.info(
-            "[search_knowledge_base] tool query=%r results=%d chars=%d in %.3fs",
+            "[search_knowledge_base] tool query=%r docs=%d in %.3fs",
            cleaned_query[:60],
-            len(results),
-            len(rendered),
+            len(hits),
            time.perf_counter() - t0,
        )

+        if rendered is None:
+            return (
+                f"No knowledge-base matches found for query: {cleaned_query!r}.\n"
+                "Tell the user nothing relevant was found in their workspace, or "
+                "try a different query."
+            )
+
        update: dict[str, Any] = {
            "messages": [
                ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
            ],
+            "citation_registry": registry,
        }
-        if matched:
-            update["kb_matched_chunk_ids"] = matched
        return Command(update=update)

    return StructuredTool.from_function(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/init.py
@ -0,0 +1,22 @@
+"""Citation registry: maps model-facing ``[n]`` labels to real sources.
+
+Server-side only; the model sees only the bare ``[n]``.
+"""
+
+from __future__ import annotations
+
+from .markers import to_frontend_payload
+from .models import CitationEntry, CitationSourceType
+from .normalizer import normalize_citations
+from .registry import CitationRegistry, make_key
+from .state import load_registry
+
+__all__ = [
+    "CitationEntry",
+    "CitationRegistry",
+    "CitationSourceType",
+    "load_registry",
+    "make_key",
+    "normalize_citations",
+    "to_frontend_payload",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/markers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/markers.py
@ -0,0 +1,32 @@
+"""Map a registered citation to the frontend ``[citation:<payload>]`` payload.
+
+The citation renderer understands a chunk id (``42``), a negative chunk id for
+anonymous uploads (``-3``), and a URL. This is the seam that turns a server-side
+source into one the renderer can resolve; it grows as more source kinds become
+renderable. Kinds with no renderable form yet return ``None`` so the marker is
+dropped rather than emitted broken.
+"""
+
+from __future__ import annotations
+
+from .models import CitationEntry, CitationSourceType
+
+
+def to_frontend_payload(entry: CitationEntry) -> str | None:
+    """Inner payload for ``[citation:<payload>]``, or ``None`` if not renderable."""
+    locator = entry.locator
+    match entry.source_type:
+        case CitationSourceType.KB_CHUNK | CitationSourceType.ANON_CHUNK:
+            chunk_id = locator.get("chunk_id")
+            return str(chunk_id) if chunk_id is not None else None
+        case CitationSourceType.WEB_RESULT:
+            url = locator.get("url")
+            return url or None
+        case _:
+            # Connector items and chat turns have no client-side renderer yet
+            # (the frontend resolves only chunk ids and URLs), so they stay
+            # unmarked until both a registration path and a renderer exist.
+            return None
+
+
+__all__ = ["to_frontend_payload"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/models.py
@ -0,0 +1,31 @@
+"""Data shapes for the citation registry."""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class CitationSourceType(str, Enum):
+    """Source kind of a citable unit; the value is the stable wire/dedup form."""
+
+    KB_CHUNK = "kb_chunk"
+    KB_DOCUMENT = "kb_document"
+    CONNECTOR_ITEM = "connector_item"
+    WEB_RESULT = "web_result"
+    CHAT_TURN = "chat_turn"
+    ANON_CHUNK = "anon_chunk"
+
+
+class CitationEntry(BaseModel):
+    """A registered unit: ``n`` (the label), ``locator`` (identity), ``display`` (UI only)."""
+
+    n: int
+    source_type: CitationSourceType
+    locator: dict[str, Any]
+    display: dict[str, Any] = Field(default_factory=dict)
+
+
+__all__ = ["CitationEntry", "CitationSourceType"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/normalizer.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/normalizer.py
@ -0,0 +1,64 @@
+"""Rewrite model ``[n]`` citations into frontend ``[citation:<payload>]`` markers.
+
+The model cites with tiny ordinals ``[n]`` — one per bracket. Several citations
+are just several brackets (``[1][2]`` or ``[1], [2]``). Each ordinal is resolved
+through the registry and replaced with a marker the citation renderer
+understands. Unknown or not-yet-renderable ordinals are dropped, so a bad
+citation disappears rather than misleads. Code spans are left untouched.
+"""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Callable
+
+from .markers import to_frontend_payload
+from .registry import CitationRegistry
+
+# Fenced (```...```) and inline (`...`) code; mirrors the frontend's single
+# code-region pattern so ordinals inside examples are never rewritten.
+_CODE_REGION = re.compile(r"```[\s\S]*?```|`[^`\n]+`")
+
+# A single ordinal in a bracket: `[1]`, `[12]`. We deliberately match even when
+# glued to the preceding word (`docs[17]`) because the model very frequently
+# writes citations that way — requiring a non-word char before `[` (to dodge
+# `arr[1]`) silently dropped those citations, leaving raw `[n]` that both fails to
+# render and reads like array indexing. Genuine code/array syntax is instead
+# protected by the code-region carve-out below; an unresolved ordinal drops
+# harmlessly. Adjacent citations `[1][2]` are each rewritten.
+_ORDINAL = re.compile(r"\[\s*(\d+)\s*\]")
+
+
+def normalize_citations(text: str, registry: CitationRegistry) -> str:
+    """Replace each ``[n]`` with its resolved marker; drop the unresolved."""
+    if not text:
+        return text
+
+    rewrite = _ordinal_rewriter(registry)
+    return _outside_code(text, lambda span: _ORDINAL.sub(rewrite, span))
+
+
+def _ordinal_rewriter(registry: CitationRegistry) -> Callable[[re.Match[str]], str]:
+    """Build the substitution that turns one ordinal into a marker (or drops it)."""
+
+    def rewrite(match: re.Match[str]) -> str:
+        entry = registry.resolve(int(match.group(1)))
+        payload = to_frontend_payload(entry) if entry else None
+        return f"[citation:{payload}]" if payload is not None else ""
+
+    return rewrite
+
+
+def _outside_code(text: str, transform: Callable[[str], str]) -> str:
+    """Apply ``transform`` to non-code spans only; code regions pass through verbatim."""
+    parts = []
+    last = 0
+    for region in _CODE_REGION.finditer(text):
+        parts.append(transform(text[last : region.start()]))
+        parts.append(region.group(0))
+        last = region.end()
+    parts.append(transform(text[last:]))
+    return "".join(parts)
+
+
+__all__ = ["normalize_citations"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/registry.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/registry.py
@ -0,0 +1,91 @@
+"""Maps the model-facing ``[n]`` to its source.
+
+Pydantic for reliable serialization in checkpointed, cross-agent state.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+from .models import CitationEntry, CitationSourceType
+
+
+def make_key(source_type: CitationSourceType, locator: dict[str, Any]) -> str:
+    """Stable, order-insensitive dedup key; ``source_type`` prefix avoids cross-kind collisions."""
+    type_value = (
+        source_type.value
+        if isinstance(source_type, CitationSourceType)
+        else str(source_type)
+    )
+    return f"{type_value}|{json.dumps(locator, sort_keys=True, default=str)}"
+
+
+class CitationRegistry(BaseModel):
+    """Per-conversation ``[n]`` ↔ unit map (find-or-create, monotonic)."""
+
+    by_n: dict[int, CitationEntry] = Field(default_factory=dict)
+    by_key: dict[str, int] = Field(default_factory=dict)
+    next_n: int = 1
+
+    def register(
+        self,
+        source_type: CitationSourceType,
+        locator: dict[str, Any],
+        display: dict[str, Any] | None = None,
+    ) -> int:
+        """Return the ``[n]`` for this unit, minting a new one only if unseen."""
+        key = make_key(source_type, locator)
+        existing = self.by_key.get(key)
+        if existing is not None:
+            return existing
+
+        n = self.next_n
+        self.by_n[n] = CitationEntry(
+            n=n,
+            source_type=source_type,
+            locator=dict(locator),
+            display=dict(display or {}),
+        )
+        self.by_key[key] = n
+        self.next_n = n + 1
+        return n
+
+    def resolve(self, n: int) -> CitationEntry | None:
+        """Map ``[n]`` back to its source; unknown → ``None`` so bad citations drop."""
+        return self.by_n.get(n)
+
+    def merge(self, other: CitationRegistry) -> CitationRegistry:
+        """Union ``self`` with ``other`` (find-or-create), returning a new registry.
+
+        Needed because separate branches (parent + subagents, parallel tool calls)
+        each register into a registry forked from the same base. A plain replace
+        would drop one branch's mappings; this unions them so ``[n]`` stays globally
+        consistent and no source is lost:
+
+        - A source already in ``self`` keeps its existing ``[n]``.
+        - A source only in ``other`` keeps its ``[n]`` when that slot is free.
+        - A collision (same ``[n]``, different source on each side) re-mints the
+          ``other`` entry to a fresh ``[n]`` and advances ``next_n`` past both.
+
+        Pure: neither registry is mutated. Entries are folded in ascending ``[n]``
+        order so the result is deterministic.
+        """
+        merged = self.model_copy(deep=True)
+        for n in sorted(other.by_n):
+            entry = other.by_n[n]
+            key = make_key(entry.source_type, entry.locator)
+            if key in merged.by_key:
+                continue
+            if n in merged.by_n:
+                merged.register(entry.source_type, entry.locator, entry.display)
+            else:
+                merged.by_n[n] = entry.model_copy(deep=True)
+                merged.by_key[key] = n
+                merged.next_n = max(merged.next_n, n + 1)
+        return merged
+
+
+__all__ = ["CitationRegistry", "make_key"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/citations/state.py
@ -0,0 +1,26 @@
+"""Read the conversation's ``CitationRegistry`` out of graph state.
+
+The registry is checkpointed, so it may come back as a live ``CitationRegistry``
+or a plain dict (after (de)serialization). Both the search tool and the read
+path load it the same way before registering new ``[n]`` and writing it back.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+from .registry import CitationRegistry
+
+
+def load_registry(state: Mapping[str, Any] | None) -> CitationRegistry:
+    """Return the registry from ``state``, tolerating a serialized dict or absence."""
+    raw = state.get("citation_registry") if state else None
+    if isinstance(raw, CitationRegistry):
+        return raw
+    if isinstance(raw, dict):
+        return CitationRegistry.model_validate(raw)
+    return CitationRegistry()
+
+
+__all__ = ["load_registry"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/init.py
@ -0,0 +1,25 @@
+"""Render citable documents for the model: one shape for search, read, and web.
+
+``render_document`` emits one ``<document title=… source=… view="excerpt|full">``
+block whose passages carry server-assigned ``[n]`` labels. ``render_search_context``
+wraps KB excerpt blocks in ``<retrieved_context>``; ``render_web_results`` wraps web
+excerpt blocks in ``<web_results>``. Both cite with the same ``[n]`` spine.
+"""
+
+from __future__ import annotations
+
+from .document import render_document
+from .models import DocumentView, RenderableDocument, RenderablePassage
+from .search_context import render_search_context
+from .source_label import source_label
+from .web_results import render_web_results
+
+__all__ = [
+    "DocumentView",
+    "RenderableDocument",
+    "RenderablePassage",
+    "render_document",
+    "render_search_context",
+    "render_web_results",
+    "source_label",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/document.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/document.py
@ -0,0 +1,70 @@
+"""Render one citable document as a ``<document>`` block.
+
+Every citable surface (KB search excerpts, KB full reads, web results) uses the
+same block; ``view`` and the passages shown are what differ. Each passage is
+registered for citation as it renders, so its ``[n]`` resolves back to its source
+later.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
+from .models import DocumentView, RenderableDocument, RenderablePassage
+
+
+def render_document(
+    document: RenderableDocument,
+    *,
+    view: DocumentView,
+    registry: CitationRegistry,
+) -> str | None:
+    """Render one ``<document>`` block, registering each passage for citation.
+
+    Returns ``None`` when the document has no passage to show. Mutates ``registry``
+    (find-or-create).
+    """
+    if not document.passages:
+        return None
+
+    lines = [_open_tag(document, view)]
+    for passage in document.passages:
+        lines.append(_render_passage(document, passage, registry))
+    lines.append("</document>")
+    return "\n".join(lines)
+
+
+def _open_tag(document: RenderableDocument, view: DocumentView) -> str:
+    attrs = [f'title="{_attr(document.title)}"']
+    if document.source:
+        attrs.append(f'source="{_attr(document.source)}"')
+    attrs.append(f'view="{view}"')
+    return f"<document {' '.join(attrs)}>"
+
+
+def _render_passage(
+    document: RenderableDocument,
+    passage: RenderablePassage,
+    registry: CitationRegistry,
+) -> str:
+    n = registry.register(
+        passage.source_type,
+        passage.locator,
+        {"title": document.title, "source": document.source},
+    )
+    label = f"  [{n}] "
+    body = passage.content.strip().replace("\n", "\n" + " " * len(label))
+    return f"{label}{body}"
+
+
+def _attr(value: str) -> str:
+    collapsed = " ".join(str(value).split())
+    return (
+        collapsed.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+    )
+
+
+__all__ = ["render_document"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/models.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/models.py
@ -0,0 +1,42 @@
+"""Inputs for rendering a citable document for the model.
+
+A passage is one citable unit — what the model cites with ``[n]``. A document
+groups the passages shown from one source. The same shapes feed every citable
+surface: KB search excerpts, KB full reads, and web results.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationSourceType
+
+DocumentView = Literal["excerpt", "full"]
+"""How much of the source is shown: a search slice, or the whole object."""
+
+
+@dataclass(frozen=True)
+class RenderablePassage:
+    """One citable unit: what the model cites with ``[n]``.
+
+    ``locator`` is the source-specific identity registered for this passage (a KB
+    chunk's ``{document_id, chunk_id}``, a web result's ``{url}``). ``source_type``
+    selects how that locator resolves to a frontend payload.
+    """
+
+    content: str
+    locator: dict[str, Any]
+    source_type: CitationSourceType = CitationSourceType.KB_CHUNK
+
+
+@dataclass(frozen=True)
+class RenderableDocument:
+    """A source document and the passages to render from it, in order."""
+
+    title: str
+    source: str | None = None
+    passages: list[RenderablePassage] = field(default_factory=list)
+
+
+__all__ = ["DocumentView", "RenderableDocument", "RenderablePassage"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/search_context.py
@ -0,0 +1,53 @@
+"""Wrap search excerpts in the ``<retrieved_context>`` block.
+
+Each document renders through the shared ``render_document``; this module adds the
+container and the one-time header that teaches the model how to read and cite.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
+from .document import render_document
+from .models import RenderableDocument
+
+_HEADER = (
+    "These are excerpts from the user's knowledge base, selected for this query.\n"
+    "A document is a full source (a file, a Slack thread, a Notion page); each\n"
+    "<document> below is in excerpt view, so you are seeing only the chunks that\n"
+    "matched this query, not the whole source. Cite a chunk with its [n]. Read the\n"
+    "document for full context before claiming it only says X."
+)
+
+
+def render_search_context(
+    documents: list[RenderableDocument],
+    registry: CitationRegistry,
+) -> str | None:
+    """Render retrieved documents as excerpt blocks inside ``<retrieved_context>``.
+
+    Returns ``None`` when no document has a passage to show, so the caller can skip
+    the block. Mutates ``registry`` (find-or-create), so a passage seen again in a
+    later turn keeps its original ``[n]``.
+    """
+    blocks = [
+        block
+        for document in documents
+        if (
+            block := render_document(document, view="excerpt", registry=registry)
+        )
+        is not None
+    ]
+    if not blocks:
+        return None
+
+    return (
+        "<retrieved_context>\n"
+        + _HEADER
+        + "\n"
+        + "\n".join(blocks)
+        + "\n</retrieved_context>"
+    )
+
+
+__all__ = ["render_search_context"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/source_label.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/source_label.py
@ -0,0 +1,69 @@
+"""Build a short, honest source label for a knowledge-base document.
+
+A label orients the model about where a passage came from — e.g. ``Slack`` or
+``Web · docs.python.org``. It is derived only from the document's type and any
+URL in its metadata, so it never asserts detail we don't actually have. Search
+hits and full reads both build their ``<document source=…>`` from here, so the
+label a passage carries is identical whichever surface it arrives through.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from urllib.parse import urlparse
+
+_FRIENDLY_NAMES = {
+    "FILE": "File",
+    "NOTE": "Note",
+    "EXTENSION": "Saved page",
+    "CRAWLED_URL": "Web",
+    "YOUTUBE_VIDEO": "YouTube",
+    "SLACK_CONNECTOR": "Slack",
+    "TEAMS_CONNECTOR": "Teams",
+    "DISCORD_CONNECTOR": "Discord",
+    "NOTION_CONNECTOR": "Notion",
+    "GITHUB_CONNECTOR": "GitHub",
+    "LINEAR_CONNECTOR": "Linear",
+    "JIRA_CONNECTOR": "Jira",
+    "CONFLUENCE_CONNECTOR": "Confluence",
+    "CLICKUP_CONNECTOR": "ClickUp",
+    "AIRTABLE_CONNECTOR": "Airtable",
+    "OBSIDIAN_CONNECTOR": "Obsidian",
+    "BOOKSTACK_CONNECTOR": "BookStack",
+}
+
+_URL_KEYS = ("url", "source_url", "link", "source")
+
+
+def source_label(document_type: str | None, metadata: dict[str, Any]) -> str | None:
+    """``Source`` or ``Source · host``; ``None`` when nothing is known."""
+    name = _friendly_name(document_type)
+    host = _url_host(metadata)
+    if name and host:
+        return f"{name} · {host}"
+    return name or host
+
+
+def _friendly_name(document_type: str | None) -> str | None:
+    if not document_type:
+        return None
+    return _FRIENDLY_NAMES.get(document_type, _prettify(document_type))
+
+
+def _prettify(document_type: str) -> str:
+    """Fallback name for unmapped types: ``GOOGLE_DRIVE_FILE`` → ``Google Drive``."""
+    words = document_type.replace("_CONNECTOR", "").replace("_FILE", "").split("_")
+    return " ".join(word.capitalize() for word in words if word)
+
+
+def _url_host(metadata: dict[str, Any]) -> str | None:
+    for key in _URL_KEYS:
+        value = metadata.get(key)
+        if isinstance(value, str) and value.startswith(("http://", "https://")):
+            host = urlparse(value).netloc
+            if host:
+                return host.removeprefix("www.")
+    return None
+
+
+__all__ = ["source_label"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/document_render/web_results.py
@ -0,0 +1,54 @@
+"""Wrap live web-search results in a ``<web_results>`` block.
+
+Each result renders through the shared ``render_document`` (excerpt view), so a
+web result is cited with ``[n]`` exactly like a knowledge-base passage. Only the
+container and header differ — they tell the model these came from the public web,
+not the user's workspace.
+"""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
+from .document import render_document
+from .models import RenderableDocument
+
+_HEADER = (
+    "These are live results from a public web search for this query. Each\n"
+    "<document> below is one result in excerpt view; cite a result with its [n]\n"
+    "after the statement it supports. Scrape the URL for full context before\n"
+    "making a definitive claim from a snippet."
+)
+
+
+def render_web_results(
+    documents: list[RenderableDocument],
+    registry: CitationRegistry,
+) -> str | None:
+    """Render web results as excerpt blocks inside ``<web_results>``.
+
+    Returns ``None`` when no result has content to show, so the caller can skip
+    the block. Mutates ``registry`` (find-or-create), so a URL seen again keeps
+    its original ``[n]``.
+    """
+    blocks = [
+        block
+        for document in documents
+        if (
+            block := render_document(document, view="excerpt", registry=registry)
+        )
+        is not None
+    ]
+    if not blocks:
+        return None
+
+    return (
+        "<web_results>\n"
+        + _HEADER
+        + "\n"
+        + "\n".join(blocks)
+        + "\n</web_results>"
+    )
+
+
+__all__ = ["render_web_results"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
@ -53,14 +53,6 @@ class AgentFeatureFlags:
    # Skills + subagents
    enable_skills: bool = True
    enable_specialized_subagents: bool = True
-    enable_kb_planner_runnable: bool = True
-
-    # KB retrieval mode — when False (default), the main agent retrieves KB
-    # content lazily via the on-demand ``search_knowledge_base`` tool and the
-    # expensive per-turn pre-injection (planner LLM + embed + hybrid search,
-    # ~2.3s) is skipped; explicit @-mentions are still surfaced cheaply. Set
-    # True to restore the original eager ``<priority_documents>`` pre-injection.
-    enable_kb_priority_preinjection: bool = False

    # Snapshot / revert
    enable_action_log: bool = True
@ -118,9 +110,6 @@ class AgentFeatureFlags:
                enable_llm_tool_selector=False,
                enable_skills=False,
                enable_specialized_subagents=False,
-                enable_kb_planner_runnable=False,
-                # Full rollback restores the original eager KB pre-injection.
-                enable_kb_priority_preinjection=True,
                enable_action_log=False,
                enable_revert_route=False,
                enable_plugin_loader=False,
@ -156,12 +145,6 @@ class AgentFeatureFlags:
            enable_specialized_subagents=_env_bool(
                "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", True
            ),
-            enable_kb_planner_runnable=_env_bool(
-                "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", True
-            ),
-            enable_kb_priority_preinjection=_env_bool(
-                "SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION", False
-            ),
            # Snapshot / revert
            enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True),
            enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True),
@ -198,7 +181,6 @@ class AgentFeatureFlags:
                self.enable_llm_tool_selector,
                self.enable_skills,
                self.enable_specialized_subagents,
-                self.enable_kb_planner_runnable,
                self.enable_action_log,
                self.enable_revert_route,
                self.enable_plugin_loader,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/citation_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/citation_state.py
@ -0,0 +1,50 @@
+"""Contribute the ``citation_registry`` state channel to a subagent.
+
+The conversation's ``[n]`` -> source registry lives on graph state behind a
+merge reducer (see :mod:`app.agents.chat.multi_agent_chat.shared.state.reducers`).
+The orchestrator and the KB subagent get that channel for free via the filesystem
+state schema, but a citable subagent that does *not* use the filesystem (e.g.
+``research``) still needs the channel declared so its tools can register ``[n]``
+via ``Command(update={"citation_registry": ...})`` and have it merge back up.
+
+This middleware adds *only* that channel — no tools, no behavior — so any subagent
+that mints citations can opt in without inheriting filesystem semantics.
+"""
+
+from __future__ import annotations
+
+from typing import Annotated, NotRequired
+
+from langchain.agents.middleware import AgentMiddleware
+from typing_extensions import TypedDict
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.shared.state.reducers import (
+    _citation_registry_merge_reducer,
+)
+
+
+class CitationState(TypedDict):
+    """State carrying just the per-conversation ``[n]`` -> source registry."""
+
+    citation_registry: NotRequired[
+        Annotated[CitationRegistry, _citation_registry_merge_reducer]
+    ]
+
+
+class CitationStateMiddleware(AgentMiddleware):  # type: ignore[type-arg]
+    """Declare the ``citation_registry`` channel; no tools, no hooks."""
+
+    tools = ()
+    state_schema = CitationState
+
+
+def build_citation_state_mw() -> CitationStateMiddleware:
+    return CitationStateMiddleware()
+
+
+__all__ = [
+    "CitationState",
+    "CitationStateMiddleware",
+    "build_citation_state_mw",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/document_xml.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/document_xml.py
@ -1,103 +0,0 @@
-"""Shared XML builder for KB documents.
-
-Produces the citation-friendly XML used by every read of a knowledge-base
-document (lazy-loaded by :class:`KBPostgresBackend` and synthetic anonymous
-files). The XML carries a ``<chunk_index>`` near the top so the LLM can jump
-directly to matched-chunk line ranges via ``read_file(offset=…, limit=…)``.
-
-Extracted from the original ``knowledge_search.py`` so the backend, the
-priority middleware, and any future renderer share a single implementation.
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-
-def build_document_xml(
-    document: dict[str, Any],
-    matched_chunk_ids: set[int] | None = None,
-) -> str:
-    """Build citation-friendly XML with a ``<chunk_index>`` for smart seeking.
-
-    Args:
-        document: Dict shape produced by hybrid search / lazy-load helpers.
-            Expected keys: ``document`` (with ``id``, ``title``,
-            ``document_type``, ``metadata``) and ``chunks``
-            (list of ``{chunk_id, content}``).
-        matched_chunk_ids: Optional set of chunk IDs to flag as
-            ``matched="true"`` in the chunk index.
-    """
-    matched = matched_chunk_ids or set()
-
-    doc_meta = document.get("document") or {}
-    metadata = (doc_meta.get("metadata") or {}) if isinstance(doc_meta, dict) else {}
-    document_id = doc_meta.get("id", document.get("document_id", "unknown"))
-    document_type = doc_meta.get("document_type", document.get("source", "UNKNOWN"))
-    title = doc_meta.get("title") or metadata.get("title") or "Untitled Document"
-    url = (
-        metadata.get("url") or metadata.get("source") or metadata.get("page_url") or ""
-    )
-    metadata_json = json.dumps(metadata, ensure_ascii=False)
-
-    metadata_lines: list[str] = [
-        "<document>",
-        "<document_metadata>",
-        f"  <document_id>{document_id}</document_id>",
-        f"  <document_type>{document_type}</document_type>",
-        f"  <title><![CDATA[{title}]]></title>",
-        f"  <url><![CDATA[{url}]]></url>",
-        f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-        "</document_metadata>",
-        "",
-    ]
-
-    chunks = document.get("chunks") or []
-    chunk_entries: list[tuple[int | None, str]] = []
-    if isinstance(chunks, list):
-        for chunk in chunks:
-            if not isinstance(chunk, dict):
-                continue
-            chunk_id = chunk.get("chunk_id") or chunk.get("id")
-            chunk_content = str(chunk.get("content", "")).strip()
-            if not chunk_content:
-                continue
-            if chunk_id is None:
-                xml = f"  <chunk><![CDATA[{chunk_content}]]></chunk>"
-            else:
-                xml = f"  <chunk id='{chunk_id}'><![CDATA[{chunk_content}]]></chunk>"
-            chunk_entries.append((chunk_id, xml))
-
-    index_overhead = 1 + len(chunk_entries) + 1 + 1 + 1
-    first_chunk_line = len(metadata_lines) + index_overhead + 1
-
-    current_line = first_chunk_line
-    index_entry_lines: list[str] = []
-    for cid, xml_str in chunk_entries:
-        num_lines = xml_str.count("\n") + 1
-        end_line = current_line + num_lines - 1
-        matched_attr = ' matched="true"' if cid is not None and cid in matched else ""
-        if cid is not None:
-            index_entry_lines.append(
-                f'  <entry chunk_id="{cid}" lines="{current_line}-{end_line}"{matched_attr}/>'
-            )
-        else:
-            index_entry_lines.append(
-                f'  <entry lines="{current_line}-{end_line}"{matched_attr}/>'
-            )
-        current_line = end_line + 1
-
-    lines = metadata_lines.copy()
-    lines.append("<chunk_index>")
-    lines.extend(index_entry_lines)
-    lines.append("</chunk_index>")
-    lines.append("")
-    lines.append("<document_content>")
-    for _, xml_str in chunk_entries:
-        lines.append(xml_str)
-    lines.extend(["</document_content>", "</document>"])
-    return "\n".join(lines)
-
-
-__all__ = ["build_document_xml"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
@ -42,8 +42,15 @@ from langchain.tools import ToolRuntime
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
-    build_document_xml,
+from app.agents.chat.multi_agent_chat.shared.citations import (
+    CitationRegistry,
+    CitationSourceType,
+)
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    render_document,
+    source_label,
 )
 from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
@ -59,6 +66,21 @@ _TEMP_PREFIX = "temp_"
 _GREP_MAX_TOTAL_MATCHES = 50
 _GREP_MAX_PER_DOC = 5

+_EMPTY_DOCUMENT_NOTICE = "(This document has no readable content.)"
+
+
+def render_full_document(
+    document: RenderableDocument,
+    registry: CitationRegistry,
+) -> str:
+    """Render a whole KB document (``view="full"``), registering each chunk's ``[n]``.
+
+    Falls back to a short notice when the document has no chunks, so a read never
+    returns blank.
+    """
+    rendered = render_document(document, view="full", registry=registry)
+    return rendered if rendered is not None else _EMPTY_DOCUMENT_NOTICE
+

 def _basename(path: str) -> str:
    return path.rsplit("/", 1)[-1]
@ -127,13 +149,6 @@ class KBPostgresBackend(BackendProtocol):
        anon = self.state.get("kb_anon_doc")
        return anon if isinstance(anon, dict) else None

-    def _matched_chunk_ids(self, doc_id: int) -> set[int]:
-        mapping = self.state.get("kb_matched_chunk_ids") or {}
-        try:
-            return set(mapping.get(doc_id, []) or [])
-        except TypeError:
-            return set()
-
    @staticmethod
    def _file_data_size(file_data: dict[str, Any]) -> int:
        try:
@ -466,80 +481,93 @@ class KBPostgresBackend(BackendProtocol):
    def read(self, file_path: str, offset: int = 0, limit: int = 2000) -> str:  # type: ignore[override]
        return asyncio.run(self.aread(file_path, offset, limit))

-    async def _load_file_data(
+    async def aload_document(
        self,
        path: str,
-    ) -> tuple[dict[str, Any], int | None] | None:
-        """Lazy-load a virtual KB document into a deepagents ``FileData``.
+    ) -> tuple[RenderableDocument, int | None] | None:
+        """Lazy-load a virtual KB document as a :class:`RenderableDocument`.

-        Returns ``(file_data, doc_id)`` or ``None`` if the path doesn't map
-        to any known document. ``doc_id`` is ``None`` for the synthetic
-        anonymous document so the caller doesn't track it as a DB-backed file.
+        Returns ``(document, doc_id)`` with every chunk in document order, or
+        ``None`` if the path maps to no known document. ``doc_id`` is ``None``
+        for the synthetic anonymous upload so the caller doesn't track it as a
+        DB-backed file. Pure data — rendering and citation registration happen in
+        the caller (see :meth:`_load_file_data` and the ``read_file`` tool).
        """
        anon = self._kb_anon_doc()
        if anon and str(anon.get("path") or "") == path:
-            doc_payload = {
-                "document_id": -1,
-                "chunks": list(anon.get("chunks") or []),
-                "matched_chunk_ids": [],
-                "document": {
-                    "id": -1,
-                    "title": anon.get("title") or "uploaded_document",
-                    "document_type": "FILE",
-                    "metadata": {"source": "anonymous_upload"},
-                },
-                "source": "FILE",
-            }
-            xml = build_document_xml(doc_payload, matched_chunk_ids=set())
-            file_data = create_file_data(xml)
-            return file_data, None
+            document = RenderableDocument(
+                title=str(anon.get("title") or "uploaded_document"),
+                source="Uploaded file",
+                passages=[
+                    RenderablePassage(
+                        content=str(chunk.get("content", "")),
+                        locator={
+                            "document_id": -1,
+                            "chunk_id": int(chunk["chunk_id"]),
+                        },
+                        source_type=CitationSourceType.ANON_CHUNK,
+                    )
+                    for chunk in (anon.get("chunks") or [])
+                    if isinstance(chunk, dict) and chunk.get("chunk_id") is not None
+                ],
+            )
+            return document, None

        if not path.startswith(DOCUMENTS_ROOT):
            return None

        async with shielded_async_session() as session:
-            document = await virtual_path_to_doc(
+            document_row = await virtual_path_to_doc(
                session,
                search_space_id=self.search_space_id,
                virtual_path=path,
            )
-            if document is None:
+            if document_row is None:
                return None
            chunk_rows = await session.execute(
                select(Chunk.id, Chunk.content)
-                .where(Chunk.document_id == document.id)
+                .where(Chunk.document_id == document_row.id)
                .order_by(Chunk.position, Chunk.id)
            )
-            chunks = [
-                {"chunk_id": row.id, "content": row.content} for row in chunk_rows.all()
-            ]
+            chunks = chunk_rows.all()

-        doc_payload = {
-            "document_id": document.id,
-            "chunks": chunks,
-            "matched_chunk_ids": list(self._matched_chunk_ids(document.id)),
-            "document": {
-                "id": document.id,
-                "title": document.title,
-                "document_type": (
-                    document.document_type.value
-                    if getattr(document, "document_type", None) is not None
-                    else "UNKNOWN"
-                ),
-                "metadata": dict(document.document_metadata or {}),
-            },
-            "source": (
-                document.document_type.value
-                if getattr(document, "document_type", None) is not None
-                else "UNKNOWN"
-            ),
-        }
-        xml = build_document_xml(
-            doc_payload,
-            matched_chunk_ids=self._matched_chunk_ids(document.id),
+        document_type = (
+            document_row.document_type.value
+            if getattr(document_row, "document_type", None) is not None
+            else None
        )
-        file_data = create_file_data(xml)
-        return file_data, document.id
+        metadata = dict(document_row.document_metadata or {})
+        document = RenderableDocument(
+            title=document_row.title,
+            source=source_label(document_type, metadata),
+            passages=[
+                RenderablePassage(
+                    content=row.content,
+                    locator={"document_id": document_row.id, "chunk_id": row.id},
+                )
+                for row in chunks
+            ],
+        )
+        return document, document_row.id
+
+    async def _load_file_data(
+        self,
+        path: str,
+    ) -> tuple[dict[str, Any], int | None] | None:
+        """Render a virtual KB document into a deepagents ``FileData``.
+
+        Used by the filesystem ops (move/edit existence + content staging) and the
+        backend's own ``aread``/``aedit``. These have no conversation registry to
+        persist into, so the ``[n]`` labels are minted into a throwaway registry —
+        the canonical, citation-persisting read is the ``read_file`` tool, which
+        renders from :meth:`aload_document` against the state registry.
+        """
+        loaded = await self.aload_document(path)
+        if loaded is None:
+            return None
+        document, doc_id = loaded
+        rendered = render_full_document(document, CitationRegistry())
+        return create_file_data(rendered), doc_id

    # ------------------------------------------------------------------ writes

@ -1037,4 +1065,5 @@ __all__ = [
    "KBPostgresBackend",
    "list_tree_listing",
    "paginate_listing",
+    "render_full_document",
 ]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/resolver.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/resolver.py
@ -37,8 +37,8 @@ def build_backend_resolver(

    In cloud mode the resolver returns a fresh :class:`KBPostgresBackend`
    bound to the current ``runtime`` so the backend can read staging state
-    (``staged_dirs``, ``pending_moves``, ``files`` cache, ``kb_anon_doc``,
-    ``kb_matched_chunk_ids``) for each tool call. When no ``search_space_id``
+    (``staged_dirs``, ``pending_moves``, ``files`` cache, ``kb_anon_doc``)
+    for each tool call. When no ``search_space_id``
    is provided, the resolver falls back to :class:`StateBackend` (used by
    sub-agents and tests that don't need DB-backed reads).

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
@ -35,26 +35,14 @@ current working directory (`cwd`, default `/documents`).
  turn alongside any new/edited documents. Snapshot/revert is enabled
  for every destructive operation when action logging is on.

-## Reading Documents Efficiently
+## Reading Documents

-Documents are formatted as XML. Each document contains:
- `<document_metadata>` — title, type, URL, etc.
- `<chunk_index>` — a table of every chunk with its **line range** and a
-  `matched="true"` flag for chunks that matched the search query.
- `<document_content>` — the actual chunks in original document order.
-
-**Workflow**: when reading a large document, read the first ~20 lines to see
-the `<chunk_index>`, identify chunks marked `matched="true"`, then use
-`read_file(path, offset=<start_line>, limit=<lines>)` to jump directly to
-those sections instead of reading the entire file sequentially.
-
-Use `<chunk id='...'>` values as citation IDs in your answers.
-
-## Priority List
-
-You receive a `<priority_documents>` system message each turn listing the
-top-K paths most relevant to the user's query (by hybrid search). Read those
-first — matched sections are flagged inside each document's `<chunk_index>`.
+A knowledge-base document is returned as a `<document … view="full">` block —
+the whole source, with each passage labelled `[n]`. `view="full"` means you are
+seeing the complete document, not an excerpt. Use `read_file(path, offset, limit)`
+to page through a large document. Cite a passage by writing its `[n]` after the
+statement it supports — the same `[n]` that passage had in
+`search_knowledge_base` results.

 ## Workspace Tree

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
@ -37,13 +37,4 @@ directory (`cwd`).
 - Cross-mount moves are not supported.
 - Desktop deletes hit disk immediately and cannot be undone via the
  agent's revert flow — confirm before calling `rm`/`rmdir`.
-
-## Priority List
-
-You may receive a `<priority_documents>` system message listing the top-K
-documents from the user's SurfSense knowledge base — these are cloud-ingested
-via connectors (Notion, Slack, etc.), not local files. Treat it as a hint:
-consult it when the task spans both local and cloud sources (e.g. drafting a
-local note from a Notion summary); skip when the task is purely about local
-files.
 """
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/description.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/description.py
@ -10,11 +10,11 @@ Usage:
 - By default, reads up to 100 lines from the beginning.
 - Use `offset` and `limit` for pagination when files are large.
 - Results include line numbers.
- Documents contain a `<chunk_index>` near the top listing every chunk with
-  its line range and a `matched="true"` flag for search-relevant chunks.
-  Read the index first, then jump to matched chunks with
-  `read_file(path, offset=<start_line>, limit=<num_lines>)`.
- Use chunk IDs (`<chunk id='...'>`) as citations in answers.
+- A knowledge-base document is returned as a `<document … view="full">` block:
+  the whole source, with each passage labelled `[n]`. `view="full"` means you are
+  seeing the complete document, not an excerpt.
+- Cite a passage by writing its `[n]` after the statement it supports — the same
+  `[n]` you would use for that passage from `search_knowledge_base`.
 """


--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
@ -4,14 +4,20 @@ from __future__ import annotations

 from typing import TYPE_CHECKING, Annotated, Any

-from deepagents.backends.utils import format_read_response, validate_path
+from deepagents.backends.utils import (
+    create_file_data,
+    format_read_response,
+    validate_path,
+)
 from langchain.tools import ToolRuntime
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command

+from app.agents.chat.multi_agent_chat.shared.citations import load_registry
 from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.kb_postgres import (
    KBPostgresBackend,
+    render_full_document,
 )
 from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
    SurfSenseFilesystemState,
@ -55,10 +61,12 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:

        backend = mw._get_backend(runtime)
        if isinstance(backend, KBPostgresBackend):
-            loaded = await backend._load_file_data(validated)
+            loaded = await backend.aload_document(validated)
            if loaded is None:
                return f"Error: File '{validated}' not found"
-            file_data, doc_id = loaded
+            document, doc_id = loaded
+            registry = load_registry(runtime.state)
+            file_data = create_file_data(render_full_document(document, registry))
            rendered = format_read_response(file_data, offset, limit)
            update: dict[str, Any] = {
                "files": {validated: file_data},
@ -68,6 +76,7 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
                        tool_call_id=runtime.tool_call_id,
                    )
                ],
+                "citation_registry": registry,
            }
            if doc_id is not None:
                update["doc_id_by_path"] = {validated: doc_id}
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
@ -1,4 +1,4 @@
-"""Project ``workspace_tree_text`` + ``kb_priority`` from state into SystemMessages."""
+"""Project ``workspace_tree_text`` from state into a SystemMessage."""

 from __future__ import annotations

@ -14,18 +14,15 @@ from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
 )
 from app.utils.perf import get_perf_logger

-from .knowledge_search import _render_priority_message
-
 _perf_log = get_perf_logger()


 class KbContextProjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
-    """Emit ``<workspace_tree>`` + ``<priority_documents>`` from shared state.
+    """Emit the ``<workspace_tree>`` from shared state.

    Read-only consumer: no DB, no LLM, no state writes. The orchestrator's
-    renderer middlewares populate the source fields; this projection lets any
-    agent (orchestrator or subagent) put the same content in front of its
-    own LLM call.
+    ``KnowledgeTreeMiddleware`` populates ``workspace_tree_text``; this
+    projection lets a subagent put the same tree in front of its own LLM call.
    """

    tools = ()
@ -39,28 +36,19 @@ class KbContextProjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        del runtime
        start = time.perf_counter()
        tree_text = state.get("workspace_tree_text")
-        priority = state.get("kb_priority")
-        if not tree_text and not priority:
+        if not tree_text:
            _perf_log.info(
-                "[kb_context_projection] tree=0 priority=0 elapsed=%.3fs",
+                "[kb_context_projection] tree=0 elapsed=%.3fs",
                time.perf_counter() - start,
            )
            return None

        messages = list(state.get("messages") or [])
        insert_at = max(len(messages) - 1, 0)
-        tree_chars = 0
-        if tree_text:
-            tree_chars = len(tree_text)
-            messages.insert(insert_at, SystemMessage(content=tree_text))
-        priority_count = 0
-        if priority:
-            priority_count = len(priority) if hasattr(priority, "__len__") else 1
-            messages.insert(insert_at, _render_priority_message(priority))
+        messages.insert(insert_at, SystemMessage(content=tree_text))
        _perf_log.info(
-            "[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",
-            tree_chars,
-            priority_count,
+            "[kb_context_projection] tree_chars=%d elapsed=%.3fs",
+            len(tree_text),
            time.perf_counter() - start,
        )
        return {"messages": messages}
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/init.py
@ -0,0 +1,18 @@
+"""Knowledge-base retrieval: hybrid search rendered as citable evidence.
+
+Public surface is the service (``search_knowledge_base_context``) and its input
+value object (``SearchScope``); the rest are building blocks.
+"""
+
+from __future__ import annotations
+
+from .models import ChunkHit, DocumentHit, SearchScope
+from .service import build_context, search_knowledge_base_context
+
+__all__ = [
+    "ChunkHit",
+    "DocumentHit",
+    "SearchScope",
+    "build_context",
+    "search_knowledge_base_context",
+]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/adapter.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/adapter.py
@ -0,0 +1,29 @@
+"""Turn retriever ``DocumentHit``s into renderable documents."""
+
+from __future__ import annotations
+
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    RenderableDocument,
+    RenderablePassage,
+    source_label,
+)
+
+from .models import DocumentHit
+
+
+def to_renderable_document(hit: DocumentHit) -> RenderableDocument:
+    """Map one hit to the shape the document-fragment renderer consumes."""
+    return RenderableDocument(
+        title=hit.title,
+        source=source_label(hit.document_type, hit.metadata),
+        passages=[
+            RenderablePassage(
+                content=chunk.content,
+                locator={"document_id": hit.document_id, "chunk_id": chunk.chunk_id},
+            )
+            for chunk in hit.chunks
+        ],
+    )
+
+
+__all__ = ["to_renderable_document"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/hybrid_search.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/hybrid_search.py
@ -0,0 +1,250 @@
+"""Hybrid (semantic + keyword) chunk search with reciprocal-rank fusion.
+
+Only matched chunks are citable, so the fused result already holds every passage
+shown — there is no second per-document fetch. Returns the top ``top_k``
+documents, each carrying its matched chunks in reading order.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import time
+
+from sqlalchemy import func, select, text
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import joinedload
+
+from app.config import config
+from app.db import Chunk, Document, DocumentType
+from app.observability import metrics, otel
+from app.utils.perf import get_perf_logger
+
+from .models import ChunkHit, DocumentHit, SearchScope
+
+_RRF_K = 60
+_CANDIDATE_MULTIPLIER = 5  # fused-chunk pool size relative to top_k
+_MAX_PASSAGES_PER_DOC = 12
+_SURFACE = "chunks"
+
+
+async def search_chunks(
+    db_session: AsyncSession,
+    *,
+    search_space_id: int,
+    query: str,
+    scope: SearchScope,
+    top_k: int,
+    query_embedding: list[float] | None = None,
+) -> list[DocumentHit]:
+    """Top ``top_k`` documents for ``query`` within scope, each with its chunks.
+
+    Instrumented seam: traces the search, records its duration, and logs a
+    timing line. The fusion logic lives in :func:`_search`.
+    """
+    started = time.perf_counter()
+    with otel.kb_search_span(
+        search_space_id=search_space_id,
+        query_chars=len(query),
+        extra={"search.surface": _SURFACE, "search.mode": "hybrid"},
+    ) as span:
+        try:
+            documents = await _search(
+                db_session,
+                search_space_id=search_space_id,
+                query=query,
+                scope=scope,
+                top_k=top_k,
+                query_embedding=query_embedding,
+            )
+        finally:
+            elapsed_ms = (time.perf_counter() - started) * 1000
+            metrics.record_kb_search_duration(
+                elapsed_ms, search_space_id=search_space_id, surface=_SURFACE
+            )
+        span.set_attribute("result.count", len(documents))
+        get_perf_logger().info(
+            "[chunk_search] hybrid in %.3fs docs=%d space=%d",
+            elapsed_ms / 1000,
+            len(documents),
+            search_space_id,
+        )
+        return documents
+
+
+async def _search(
+    db_session: AsyncSession,
+    *,
+    search_space_id: int,
+    query: str,
+    scope: SearchScope,
+    top_k: int,
+    query_embedding: list[float] | None,
+) -> list[DocumentHit]:
+    """Fusion search itself: resolve scope, fuse the two legs, group by document."""
+    document_types = _resolve_document_types(scope.document_types)
+    if document_types == []:  # types requested, none recognized → nothing matches
+        return []
+
+    if query_embedding is None:
+        query_embedding = await asyncio.to_thread(
+            config.embedding_model_instance.embed, query
+        )
+
+    conditions = _base_conditions(search_space_id, scope, document_types)
+    rows = await _fused_chunks(
+        db_session,
+        query=query,
+        query_embedding=query_embedding,
+        conditions=conditions,
+        candidate_pool=top_k * _CANDIDATE_MULTIPLIER,
+    )
+    return _group_into_documents(rows, top_k=top_k)
+
+
+def _resolve_document_types(
+    raw: tuple[str, ...] | None,
+) -> list[DocumentType] | None:
+    """Map type names to enum members; ``None`` when unfiltered, ``[]`` if all unknown."""
+    if not raw:
+        return None
+    resolved: list[DocumentType] = []
+    for name in raw:
+        with contextlib.suppress(KeyError):
+            resolved.append(DocumentType[name])
+    return resolved
+
+
+def _base_conditions(
+    search_space_id: int,
+    scope: SearchScope,
+    document_types: list[DocumentType] | None,
+) -> list:
+    """Filters shared by both search legs."""
+    conditions = [
+        Document.search_space_id == search_space_id,
+        func.coalesce(Document.status["state"].astext, "ready") != "deleting",
+    ]
+    if document_types:
+        conditions.append(Document.document_type.in_(document_types))
+    if scope.document_ids:
+        conditions.append(Document.id.in_(scope.document_ids))
+    if scope.start_date is not None:
+        conditions.append(Document.updated_at >= scope.start_date)
+    if scope.end_date is not None:
+        conditions.append(Document.updated_at <= scope.end_date)
+    return conditions
+
+
+async def _fused_chunks(
+    db_session: AsyncSession,
+    *,
+    query: str,
+    query_embedding: list[float],
+    conditions: list,
+    candidate_pool: int,
+):
+    """Run semantic + keyword legs and fuse them with RRF; return (Chunk, score) rows."""
+    tsvector = func.to_tsvector("english", Chunk.content)
+    tsquery = func.plainto_tsquery("english", query)
+
+    semantic = (
+        select(
+            Chunk.id,
+            func.rank()
+            .over(order_by=Chunk.embedding.op("<=>")(query_embedding))
+            .label("rank"),
+        )
+        .join(Document, Chunk.document_id == Document.id)
+        .where(*conditions)
+        .order_by(Chunk.embedding.op("<=>")(query_embedding))
+        .limit(candidate_pool)
+        .cte("semantic_search")
+    )
+
+    keyword = (
+        select(
+            Chunk.id,
+            func.rank()
+            .over(order_by=func.ts_rank_cd(tsvector, tsquery).desc())
+            .label("rank"),
+        )
+        .join(Document, Chunk.document_id == Document.id)
+        .where(*conditions)
+        .where(tsvector.op("@@")(tsquery))
+        .order_by(func.ts_rank_cd(tsvector, tsquery).desc())
+        .limit(candidate_pool)
+        .cte("keyword_search")
+    )
+
+    fused = (
+        select(
+            Chunk,
+            (
+                func.coalesce(1.0 / (_RRF_K + semantic.c.rank), 0.0)
+                + func.coalesce(1.0 / (_RRF_K + keyword.c.rank), 0.0)
+            ).label("score"),
+        )
+        .select_from(
+            semantic.outerjoin(keyword, semantic.c.id == keyword.c.id, full=True)
+        )
+        .join(Chunk, Chunk.id == func.coalesce(semantic.c.id, keyword.c.id))
+        .options(joinedload(Chunk.document))
+        .order_by(text("score DESC"))
+        .limit(candidate_pool)
+    )
+
+    result = await db_session.execute(fused)
+    return result.all()
+
+
+def _group_into_documents(rows, *, top_k: int) -> list[DocumentHit]:
+    """Group fused chunks by document, keep the top_k best, order chunks for reading."""
+    chunks_by_doc: dict[int, list[ChunkHit]] = {}
+    document_by_id: dict[int, Document] = {}
+    best_score: dict[int, float] = {}
+    order: list[int] = []
+
+    for chunk, score in rows:
+        document_id = chunk.document.id
+        if document_id not in chunks_by_doc:
+            chunks_by_doc[document_id] = []
+            document_by_id[document_id] = chunk.document
+            best_score[document_id] = float(score)
+            order.append(document_id)
+        chunks_by_doc[document_id].append(
+            ChunkHit(
+                chunk_id=chunk.id,
+                content=chunk.content,
+                position=chunk.position,
+                score=float(score),
+            )
+        )
+
+    return [
+        DocumentHit(
+            document_id=document_id,
+            title=document_by_id[document_id].title,
+            document_type=_type_value(document_by_id[document_id]),
+            metadata=document_by_id[document_id].document_metadata or {},
+            score=best_score[document_id],
+            chunks=_reading_order(chunks_by_doc[document_id]),
+        )
+        for document_id in order[:top_k]
+    ]
+
+
+def _reading_order(chunks: list[ChunkHit]) -> list[ChunkHit]:
+    """Keep the most relevant chunks, then present them in document order."""
+    most_relevant = sorted(chunks, key=lambda c: c.score, reverse=True)[
+        :_MAX_PASSAGES_PER_DOC
+    ]
+    return sorted(most_relevant, key=lambda c: c.position)
+
+
+def _type_value(document: Document) -> str | None:
+    document_type = getattr(document, "document_type", None)
+    return document_type.value if document_type is not None else None
+
+
+__all__ = ["search_chunks"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/models.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/models.py
@ -0,0 +1,47 @@
+"""Value objects for knowledge-base retrieval: the query scope and raw hits.
+
+``SearchScope`` is the optional filter a search runs under. ``DocumentHit`` /
+``ChunkHit`` are the retriever's typed output — matched chunks grouped by their
+document — which the adapter turns into renderable ``RenderableDocument``s.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
+
+
+@dataclass(frozen=True)
+class SearchScope:
+    """Filters narrowing a search; ``None``/empty means "whole knowledge base"."""
+
+    document_types: tuple[str, ...] | None = None
+    document_ids: tuple[int, ...] | None = None
+    start_date: datetime | None = None
+    end_date: datetime | None = None
+
+
+@dataclass(frozen=True)
+class ChunkHit:
+    """One matched chunk, with the position that orders it within its document."""
+
+    chunk_id: int
+    content: str
+    position: int
+    score: float
+
+
+@dataclass(frozen=True)
+class DocumentHit:
+    """A document and the chunks that matched the query, ordered by position."""
+
+    document_id: int
+    title: str
+    document_type: str | None
+    metadata: dict[str, Any]
+    score: float
+    chunks: list[ChunkHit] = field(default_factory=list)
+
+
+__all__ = ["ChunkHit", "DocumentHit", "SearchScope"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/reranking.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/reranking.py
@ -0,0 +1,51 @@
+"""Reorder retrieved documents with the configured reranker (no-op if disabled).
+
+Ranking is by concatenated matched-chunk content; ``DocumentHit`` order is
+rewritten to follow the reranker's result.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from .models import DocumentHit
+
+if TYPE_CHECKING:
+    from app.services.reranker_service import RerankerService
+
+
+def rerank_hits(
+    query: str,
+    hits: list[DocumentHit],
+    reranker: RerankerService | None,
+) -> list[DocumentHit]:
+    """Return ``hits`` reordered by the reranker; unchanged when none is set."""
+    if reranker is None or len(hits) < 2:
+        return hits
+
+    hit_by_id = {hit.document_id: hit for hit in hits}
+    ranked = reranker.rerank_documents(query, [_as_document(hit) for hit in hits])
+    reordered = [
+        hit_by_id[doc["document_id"]]
+        for doc in ranked
+        if doc.get("document_id") in hit_by_id
+    ]
+    # Fall back to the original order if the reranker dropped or garbled ids.
+    return reordered if len(reordered) == len(hits) else hits
+
+
+def _as_document(hit: DocumentHit) -> dict[str, Any]:
+    """The minimal dict shape ``RerankerService.rerank_documents`` scores on."""
+    return {
+        "document_id": hit.document_id,
+        "content": "\n\n".join(chunk.content for chunk in hit.chunks),
+        "score": hit.score,
+        "document": {
+            "id": hit.document_id,
+            "title": hit.title,
+            "document_type": hit.document_type,
+        },
+    }
+
+
+__all__ = ["rerank_hits"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/service.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/retrieval/service.py
@ -0,0 +1,66 @@
+"""Search the knowledge base and render it as model-facing ``<retrieved_context>``.
+
+The retrieval spine end to end: hybrid search → rerank → adapt → render, with
+each shown passage registered for ``[n]`` citation along the way.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+from app.agents.chat.multi_agent_chat.shared.document_render import (
+    render_search_context,
+)
+
+from .adapter import to_renderable_document
+from .hybrid_search import search_chunks
+from .models import DocumentHit, SearchScope
+from .reranking import rerank_hits
+
+if TYPE_CHECKING:
+    from app.services.reranker_service import RerankerService
+
+_DEFAULT_TOP_K = 10
+
+
+async def search_knowledge_base_context(
+    db_session: AsyncSession,
+    *,
+    search_space_id: int,
+    query: str,
+    registry: CitationRegistry,
+    scope: SearchScope | None = None,
+    reranker: RerankerService | None = None,
+    top_k: int = _DEFAULT_TOP_K,
+) -> str | None:
+    """Retrieve KB evidence for ``query`` and render it, registering each ``[n]``.
+
+    Returns ``None`` when nothing matched, so the caller can skip the block.
+    """
+    hits = await search_chunks(
+        db_session,
+        search_space_id=search_space_id,
+        query=query,
+        scope=scope or SearchScope(),
+        top_k=top_k,
+    )
+    return build_context(query, hits, registry, reranker=reranker)
+
+
+def build_context(
+    query: str,
+    hits: list[DocumentHit],
+    registry: CitationRegistry,
+    *,
+    reranker: RerankerService | None = None,
+) -> str | None:
+    """Rerank → adapt → render. Pure given ``hits``, so it is unit-testable."""
+    ranked = rerank_hits(query, hits, reranker)
+    documents = [to_renderable_document(hit) for hit in ranked]
+    return render_search_context(documents, registry)
+
+
+__all__ = ["build_context", "search_knowledge_base_context"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@ -13,9 +13,8 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
 * ``dirty_paths`` — paths whose state file content differs from DB.
 * ``dirty_path_tool_calls`` — sidecar map ``path -> latest tool_call_id`` for
  dirty paths; used to bind the per-path snapshot to an action_id.
-* ``kb_priority`` — top-K priority hints rendered into a system message.
-* ``kb_matched_chunk_ids`` — internal hand-off for matched-chunk highlighting.
 * ``kb_anon_doc`` — Redis-loaded anonymous document (if any).
+* ``citation_registry`` — per-conversation ``[n]`` -> source map for citations.
 * ``tree_version`` — bumped by persistence; invalidates the tree render cache.
 * ``workspace_tree_text`` — pre-rendered ``<workspace_tree>`` body for the turn.

@ -30,9 +29,11 @@ from typing import Annotated, Any, NotRequired
 from deepagents.middleware.filesystem import FilesystemState
 from typing_extensions import TypedDict

+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
 from app.agents.chat.multi_agent_chat.shared.receipts.receipt import Receipt
 from app.agents.chat.multi_agent_chat.shared.state.reducers import (
    _add_unique_reducer,
+    _citation_registry_merge_reducer,
    _dict_merge_with_tombstones_reducer,
    _int_counter_merge_reducer,
    _list_append_reducer,
@ -67,14 +68,6 @@ class PendingDelete(TypedDict, total=False):
    tool_call_id: str


-class KbPriorityEntry(TypedDict, total=False):
-    path: str
-    score: float
-    document_id: int | None
-    title: str
-    mentioned: bool
-
-
 class KbAnonDoc(TypedDict, total=False):
    """In-memory anonymous-session document loaded from Redis."""

@ -159,15 +152,16 @@ class SurfSenseFilesystemState(FilesystemState):
    to the latest action_id (the one the user is most likely to revert).
    """

-    kb_priority: NotRequired[Annotated[list[KbPriorityEntry], _replace_reducer]]
-    """Top-K priority hints rendered as a system message before the user turn."""
-
-    kb_matched_chunk_ids: NotRequired[Annotated[dict[int, list[int]], _replace_reducer]]
-    """Internal: ``Document.id`` -> list of matched chunk IDs from hybrid search."""
-
    kb_anon_doc: NotRequired[Annotated[KbAnonDoc | None, _replace_reducer]]
    """Anonymous-session document loaded from Redis (read-only, no DB row)."""

+    citation_registry: NotRequired[
+        Annotated[CitationRegistry, _citation_registry_merge_reducer]
+    ]
+    """Per-conversation ``[n]`` -> source map; written by retrieval, read by the
+    normalizer. Merges (union, find-or-create) so parallel/subagent registrations
+    stay globally consistent instead of clobbering each other."""
+
    tree_version: NotRequired[Annotated[int, _replace_reducer]]
    """Monotonically increasing counter; bumped when commits change the KB tree."""

@ -206,7 +200,6 @@ class SurfSenseFilesystemState(FilesystemState):

 __all__ = [
    "KbAnonDoc",
-    "KbPriorityEntry",
    "PendingDelete",
    "PendingMove",
    "SurfSenseFilesystemState",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
@ -2,7 +2,7 @@

 These reducers back the extra state fields used by the cloud-mode filesystem
 agent (`cwd`, `staged_dirs`, `pending_moves`, `dirty_paths`, `doc_id_by_path`,
-`kb_priority`, `kb_matched_chunk_ids`, `kb_anon_doc`, `tree_version`).
+`kb_anon_doc`, `tree_version`).

 Tools mutate these fields ONLY via `Command(update={...})` returns; the
 reducers are responsible for merging successive updates atomically and for
@ -20,6 +20,8 @@ from __future__ import annotations

 from typing import Any, Final, TypeVar

+from app.agents.chat.multi_agent_chat.shared.citations import CitationRegistry
+
 _CLEAR: Final[str] = "\x00__SURFSENSE_FILESYSTEM_CLEAR__\x00"
 """Reset sentinel; pass it inside a list/dict update to request a reset.

@ -204,6 +206,41 @@ def _int_counter_merge_reducer(
    return base


+def _as_registry(value: Any) -> CitationRegistry | None:
+    """Coerce a state value into a ``CitationRegistry``.
+
+    The checkpointer serializes ``Command.update`` via ``ormsgpack`` *before*
+    reducers run, so an update can arrive as a plain ``dict`` rather than a model.
+    """
+    if value is None:
+        return None
+    if isinstance(value, CitationRegistry):
+        return value
+    if isinstance(value, dict):
+        return CitationRegistry.model_validate(value)
+    return None
+
+
+def _citation_registry_merge_reducer(
+    left: Any,
+    right: Any,
+) -> CitationRegistry | None:
+    """Union two citation registries instead of replacing.
+
+    Find-or-create across both sides so ``[n]`` stays globally consistent when
+    branches (parent + subagents, parallel tool calls) each register into a
+    registry forked from the same base. Collisions re-mint rather than drop. See
+    :meth:`CitationRegistry.merge`.
+    """
+    right_reg = _as_registry(right)
+    left_reg = _as_registry(left)
+    if right_reg is None:
+        return left_reg
+    if left_reg is None:
+        return right_reg
+    return left_reg.merge(right_reg)
+
+
 def _initial_filesystem_state() -> dict[str, Any]:
    """Default empty values for SurfSense filesystem state fields.

@ -221,8 +258,6 @@ def _initial_filesystem_state() -> dict[str, Any]:
        "doc_id_by_path": {},
        "dirty_paths": [],
        "dirty_path_tool_calls": {},
-        "kb_priority": [],
-        "kb_matched_chunk_ids": {},
        "kb_anon_doc": None,
        "tree_version": 0,
    }
@ -231,6 +266,7 @@ def _initial_filesystem_state() -> dict[str, Any]:
 __all__ = [
    "_CLEAR",
    "_add_unique_reducer",
+    "_citation_registry_merge_reducer",
    "_dict_merge_with_tombstones_reducer",
    "_initial_filesystem_state",
    "_int_counter_merge_reducer",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/knowledge_base.py
@ -1,762 +0,0 @@
-"""
-Knowledge base search tool for the SurfSense agent.
-
-This module provides:
- Connector constants and normalization
- Async knowledge base search across multiple connectors
- Document formatting for LLM context
-"""
-
-import asyncio
-import contextlib
-import json
-import re
-import time
-from datetime import datetime
-from typing import Any
-
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.db import NATIVE_TO_LEGACY_DOCTYPE, shielded_async_session
-from app.services.connector_service import ConnectorService
-from app.utils.perf import get_perf_logger
-
-# Connectors that call external live-search APIs. These are handled by the
-# ``web_search`` tool and must be excluded from knowledge-base searches.
-_LIVE_SEARCH_CONNECTORS: set[str] = {
-    "TAVILY_API",
-    "LINKUP_API",
-    "BAIDU_SEARCH_API",
-}
-
-# Patterns that indicate the query has no meaningful search signal.
-# plainto_tsquery('english', '*') produces an empty tsquery and an embedding
-# of '*' is random noise, so both keyword and semantic search degrade to
-# arbitrary ordering — large documents (many chunks) dominate by chance.
-_DEGENERATE_QUERY_RE = re.compile(
-    r"^[\s*?_.#@!\-/\\]+$"  # only wildcards, punctuation, whitespace
-)
-
-# Max chunks per document when doing a recency-based browse instead of
-# a real search.  We want breadth (many docs) over depth (many chunks).
-_BROWSE_MAX_CHUNKS_PER_DOC = 5
-
-
-def _is_degenerate_query(query: str) -> bool:
-    """Return True when the query carries no meaningful search signal.
-
-    Catches wildcard patterns (``*``, ``**``), empty / whitespace-only
-    strings, and single-character non-word tokens.  These queries cause
-    both keyword search (empty tsquery) and semantic search (meaningless
-    embedding) to return effectively random results.
-    """
-    stripped = query.strip()
-    if not stripped:
-        return True
-    return bool(_DEGENERATE_QUERY_RE.match(stripped))
-
-
-async def _browse_recent_documents(
-    search_space_id: int,
-    document_type: str | list[str] | None,
-    top_k: int,
-    start_date: datetime | None,
-    end_date: datetime | None,
-) -> list[dict[str, Any]]:
-    """Return the most-recent documents (recency-ordered, no search ranking).
-
-    Used as a fallback when the search query is degenerate (e.g. ``*``) and
-    semantic / keyword search would produce arbitrary results.  Returns
-    document-grouped dicts in the same shape as ``_combined_rrf_search``
-    so the rest of the pipeline works unchanged.
-    """
-    from sqlalchemy import select
-    from sqlalchemy.orm import joinedload
-
-    from app.db import Chunk, Document, DocumentType
-
-    perf = get_perf_logger()
-    t0 = time.perf_counter()
-
-    base_conditions = [Document.search_space_id == search_space_id]
-
-    if document_type is not None:
-        type_list = (
-            document_type if isinstance(document_type, list) else [document_type]
-        )
-        doc_type_enums = []
-        for dt in type_list:
-            if isinstance(dt, str):
-                with contextlib.suppress(KeyError):
-                    doc_type_enums.append(DocumentType[dt])
-            else:
-                doc_type_enums.append(dt)
-        if not doc_type_enums:
-            return []
-        if len(doc_type_enums) == 1:
-            base_conditions.append(Document.document_type == doc_type_enums[0])
-        else:
-            base_conditions.append(Document.document_type.in_(doc_type_enums))
-
-    if start_date is not None:
-        base_conditions.append(Document.updated_at >= start_date)
-    if end_date is not None:
-        base_conditions.append(Document.updated_at <= end_date)
-
-    async with shielded_async_session() as session:
-        doc_query = (
-            select(Document)
-            .options(joinedload(Document.search_space))
-            .where(*base_conditions)
-            .order_by(Document.updated_at.desc())
-            .limit(top_k)
-        )
-        result = await session.execute(doc_query)
-        documents = result.scalars().unique().all()
-
-        if not documents:
-            return []
-
-        doc_ids = [d.id for d in documents]
-
-        chunk_query = (
-            select(Chunk)
-            .where(Chunk.document_id.in_(doc_ids))
-            .order_by(Chunk.document_id, Chunk.position, Chunk.id)
-        )
-        chunk_result = await session.execute(chunk_query)
-        raw_chunks = chunk_result.scalars().all()
-
-    doc_chunk_counts: dict[int, int] = {}
-    doc_chunks: dict[int, list[dict]] = {d.id: [] for d in documents}
-    for chunk in raw_chunks:
-        did = chunk.document_id
-        count = doc_chunk_counts.get(did, 0)
-        if count < _BROWSE_MAX_CHUNKS_PER_DOC:
-            doc_chunks[did].append({"chunk_id": chunk.id, "content": chunk.content})
-            doc_chunk_counts[did] = count + 1
-
-    results: list[dict[str, Any]] = []
-    for doc in documents:
-        chunks_list = doc_chunks.get(doc.id, [])
-        results.append(
-            {
-                "document_id": doc.id,
-                "content": "\n\n".join(
-                    c["content"] for c in chunks_list if c.get("content")
-                ),
-                "score": 0.0,
-                "chunks": chunks_list,
-                "document": {
-                    "id": doc.id,
-                    "title": doc.title,
-                    "document_type": doc.document_type.value
-                    if getattr(doc, "document_type", None)
-                    else None,
-                    "metadata": doc.document_metadata or {},
-                },
-                "source": doc.document_type.value
-                if getattr(doc, "document_type", None)
-                else None,
-            }
-        )
-
-    perf.info(
-        "[kb_browse] recency browse in %.3fs docs=%d space=%d type=%s",
-        time.perf_counter() - t0,
-        len(results),
-        search_space_id,
-        document_type,
-    )
-    return results
-
-
-# =============================================================================
-# Connector Constants and Normalization
-# =============================================================================
-
-# Canonical connector values used internally by ConnectorService
-# Includes all document types and search source connectors
-_ALL_CONNECTORS: list[str] = [
-    "EXTENSION",
-    "FILE",
-    "SLACK_CONNECTOR",
-    "TEAMS_CONNECTOR",
-    "NOTION_CONNECTOR",
-    "YOUTUBE_VIDEO",
-    "GITHUB_CONNECTOR",
-    "ELASTICSEARCH_CONNECTOR",
-    "LINEAR_CONNECTOR",
-    "JIRA_CONNECTOR",
-    "CONFLUENCE_CONNECTOR",
-    "CLICKUP_CONNECTOR",
-    "GOOGLE_CALENDAR_CONNECTOR",
-    "GOOGLE_GMAIL_CONNECTOR",
-    "GOOGLE_DRIVE_FILE",
-    "DISCORD_CONNECTOR",
-    "AIRTABLE_CONNECTOR",
-    "LUMA_CONNECTOR",
-    "NOTE",
-    "BOOKSTACK_CONNECTOR",
-    "CRAWLED_URL",
-    "CIRCLEBACK",
-    "OBSIDIAN_CONNECTOR",
-    "ONEDRIVE_FILE",
-    "DROPBOX_FILE",
-]
-
-# Human-readable descriptions for each connector type
-# Used for generating dynamic docstrings and informing the LLM
-CONNECTOR_DESCRIPTIONS: dict[str, str] = {
-    "EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
-    "FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
-    "NOTE": "SurfSense Notes (notes created inside SurfSense)",
-    "SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
-    "TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
-    "NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
-    "YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
-    "GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
-    "ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
-    "LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
-    "JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
-    "CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
-    "CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
-    "GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
-    "GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
-    "GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
-    "DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
-    "AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
-    "LUMA_CONNECTOR": "Luma events and meetings",
-    "WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
-    "CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
-    "BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
-    "CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
-    "OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
-    "ONEDRIVE_FILE": "Microsoft OneDrive files and documents (personal cloud storage)",
-    "DROPBOX_FILE": "Dropbox files and documents (cloud storage)",
-}
-
-
-def _normalize_connectors(
-    connectors_to_search: list[str] | None,
-    available_connectors: list[str] | None = None,
-) -> list[str]:
-    """Normalize model-supplied connectors to canonical ConnectorService types.
-
-    Maps user-facing aliases (e.g. WEBCRAWLER_CONNECTOR), drops unknowns, and
-    constrains to ``available_connectors`` when given. Empty input defaults to
-    all available connectors (minus live-search ones).
-    """
-    valid_set = (
-        set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
-    )
-    valid_set -= _LIVE_SEARCH_CONNECTORS
-
-    if not connectors_to_search:
-        base = (
-            list(available_connectors)
-            if available_connectors
-            else list(_ALL_CONNECTORS)
-        )
-        return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
-
-    normalized: list[str] = []
-    for raw in connectors_to_search:
-        c = (raw or "").strip().upper()
-        if not c:
-            continue
-        if c == "WEBCRAWLER_CONNECTOR":
-            c = "CRAWLED_URL"
-        normalized.append(c)
-
-    # De-dupe (order-preserving), keeping only known + available connectors.
-    seen: set[str] = set()
-    out: list[str] = []
-    for c in normalized:
-        if c in seen:
-            continue
-        if c not in _ALL_CONNECTORS:
-            continue
-        if c not in valid_set:
-            continue
-        seen.add(c)
-        out.append(c)
-
-    # Nothing matched: fall back to all available.
-    if not out:
-        base = (
-            list(available_connectors)
-            if available_connectors
-            else list(_ALL_CONNECTORS)
-        )
-        return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
-    return out
-
-
-# =============================================================================
-# Document Formatting
-# =============================================================================
-
-
-# Fraction of the model's context window (in characters) that a single tool
-# result is allowed to occupy.  The remainder is reserved for system prompt,
-# conversation history, and model output.  With ~4 chars/token this gives a
-# tool result ≈ 25 % of the context budget in tokens.
-_TOOL_OUTPUT_CONTEXT_FRACTION = 0.25
-_CHARS_PER_TOKEN = 4
-
-# Hard-floor / ceiling so the budget is always sensible regardless of what
-# the model reports.
-_MIN_TOOL_OUTPUT_CHARS = 20_000  # ~5K tokens
-_MAX_TOOL_OUTPUT_CHARS = 200_000  # ~50K tokens
-_MAX_CHUNK_CHARS = 8_000
-
-# Rank-adaptive per-document budget allocation.
-# Top-ranked (most relevant) documents get a larger share of the budget so
-# we pack as much high-quality context as possible.
-#
-#   fraction(rank) = _TOP_DOC_BUDGET_FRACTION / (1 + rank * _RANK_DECAY)
-#
-# Examples (128K budget, 8K chunk cap):
-#   rank 0 → 40% → 6 chunks   |  rank 3 → 19% → 3 chunks
-#   rank 1 → 30% → 4 chunks   |  rank 10 → 10% → 3 chunks (floor)
-#   rank 2 → 24% → 3 chunks   |
-_TOP_DOC_BUDGET_FRACTION = 0.40
-_RANK_DECAY = 0.35
-_MIN_CHUNKS_PER_DOC = 3
-
-
-def _compute_tool_output_budget(max_input_tokens: int | None) -> int:
-    """Derive a character budget from the model's context window.
-
-    Uses ``litellm.get_model_info`` via the value already resolved by
-    ``ChatLiteLLMRouter`` / ``ChatLiteLLM`` and passed through the dependency
-    chain as ``max_input_tokens``.  Falls back to a conservative default when
-    the value is unavailable.
-    """
-    if max_input_tokens is None or max_input_tokens <= 0:
-        return _MIN_TOOL_OUTPUT_CHARS  # conservative fallback
-
-    budget = int(max_input_tokens * _CHARS_PER_TOKEN * _TOOL_OUTPUT_CONTEXT_FRACTION)
-    return max(_MIN_TOOL_OUTPUT_CHARS, min(budget, _MAX_TOOL_OUTPUT_CHARS))
-
-
-_INTERNAL_METADATA_KEYS: frozenset[str] = frozenset(
-    {
-        "message_id",
-        "thread_id",
-        "event_id",
-        "calendar_id",
-        "google_drive_file_id",
-        "onedrive_file_id",
-        "dropbox_file_id",
-        "page_id",
-        "issue_id",
-        "connector_id",
-    }
-)
-
-
-def format_documents_for_context(
-    documents: list[dict[str, Any]],
-    *,
-    max_chars: int = _MAX_TOOL_OUTPUT_CHARS,
-    max_chunk_chars: int = _MAX_CHUNK_CHARS,
-    max_chunks_per_doc: int = 0,
-) -> str:
-    """Format retrieved documents into an XML context string for the LLM.
-
-    Documents are emitted highest-relevance first until ``max_chars`` is hit.
-    ``max_chunks_per_doc=0`` auto-computes a rank-adaptive cap so top results get
-    more chunks and no single large document monopolizes the budget.
-    """
-    if not documents:
-        return ""
-
-    # Group chunks by document id, preserving chunk_id so [citation:123] works.
-    # ConnectorService returns document-grouped results ({document, chunks, source}).
-    grouped: dict[str, dict[str, Any]] = {}
-
-    for doc in documents:
-        document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {}
-        metadata = (
-            (document_info.get("metadata") or {})
-            if isinstance(document_info, dict)
-            else {}
-        )
-        if not metadata and isinstance(doc, dict):
-            # Some result shapes may place metadata at the top level.
-            metadata = doc.get("metadata") or {}
-
-        source = (
-            (doc.get("source") if isinstance(doc, dict) else None)
-            or document_info.get("document_type")
-            or metadata.get("document_type")
-            or "UNKNOWN"
-        )
-
-        # Identity: prefer document_id, else type+title+url.
-        document_id_val = document_info.get("id")
-        title = (
-            document_info.get("title") or metadata.get("title") or "Untitled Document"
-        )
-        url = (
-            metadata.get("url")
-            or metadata.get("source")
-            or metadata.get("page_url")
-            or ""
-        )
-
-        doc_key = (
-            str(document_id_val)
-            if document_id_val is not None
-            else f"{source}::{title}::{url}"
-        )
-
-        if doc_key not in grouped:
-            grouped[doc_key] = {
-                "document_id": document_id_val
-                if document_id_val is not None
-                else doc_key,
-                "document_type": metadata.get("document_type") or source,
-                "title": title,
-                "url": url,
-                "metadata": metadata,
-                "chunks": [],
-            }
-
-        # Prefer document-grouped chunks when present.
-        chunks_list = doc.get("chunks") if isinstance(doc, dict) else None
-        if isinstance(chunks_list, list) and chunks_list:
-            for ch in chunks_list:
-                if not isinstance(ch, dict):
-                    continue
-                chunk_id = ch.get("chunk_id") or ch.get("id")
-                content = (ch.get("content") or "").strip()
-                if not content:
-                    continue
-                grouped[doc_key]["chunks"].append(
-                    {"chunk_id": chunk_id, "content": content}
-                )
-            continue
-
-        # Fallback: treat this as a flat chunk-like object
-        if not isinstance(doc, dict):
-            continue
-        chunk_id = doc.get("chunk_id") or doc.get("id")
-        content = (doc.get("content") or "").strip()
-        if not content:
-            continue
-        grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content})
-
-    # Live search connectors whose results should be cited by URL rather than
-    # a numeric chunk_id (the numeric IDs are meaningless auto-incremented counters).
-    live_search_connectors = {
-        "TAVILY_API",
-        "LINKUP_API",
-        "BAIDU_SEARCH_API",
-    }
-
-    parts: list[str] = []
-    total_chars = 0
-    total_docs = len(grouped)
-
-    for doc_idx, g in enumerate(grouped.values()):
-        metadata_clean = {
-            k: v for k, v in g["metadata"].items() if k not in _INTERNAL_METADATA_KEYS
-        }
-        metadata_json = json.dumps(metadata_clean, ensure_ascii=False)
-        is_live_search = g["document_type"] in live_search_connectors
-
-        doc_lines: list[str] = [
-            "<document>",
-            "<document_metadata>",
-            f"  <document_id>{g['document_id']}</document_id>",
-            f"  <document_type>{g['document_type']}</document_type>",
-            f"  <title><![CDATA[{g['title']}]]></title>",
-            f"  <url><![CDATA[{g['url']}]]></url>",
-            f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-            "</document_metadata>",
-            "",
-            "<document_content>",
-        ]
-
-        # Rank-adaptive per-document chunk cap: top results get more chunks.
-        if max_chunks_per_doc > 0:
-            chunks_allowed = max_chunks_per_doc
-        else:
-            doc_fraction = _TOP_DOC_BUDGET_FRACTION / (1 + doc_idx * _RANK_DECAY)
-            max_doc_chars = int(max_chars * doc_fraction)
-            xml_overhead = 500
-            chunks_allowed = max(
-                (max_doc_chars - xml_overhead) // max(max_chunk_chars, 1),
-                _MIN_CHUNKS_PER_DOC,
-            )
-
-        chunks = g["chunks"]
-        if len(chunks) > chunks_allowed:
-            chunks = chunks[:chunks_allowed]
-
-        for ch in chunks:
-            ch_content = ch["content"]
-            if max_chunk_chars and len(ch_content) > max_chunk_chars:
-                ch_content = ch_content[:max_chunk_chars] + "\n...(truncated)"
-            ch_id = g["url"] if (is_live_search and g["url"]) else ch["chunk_id"]
-            if ch_id is None:
-                doc_lines.append(f"  <chunk><![CDATA[{ch_content}]]></chunk>")
-            else:
-                doc_lines.append(
-                    f"  <chunk id='{ch_id}'><![CDATA[{ch_content}]]></chunk>"
-                )
-
-        doc_lines.extend(["</document_content>", "</document>", ""])
-
-        doc_xml = "\n".join(doc_lines)
-        doc_len = len(doc_xml)
-
-        if total_chars + doc_len > max_chars:
-            remaining = total_docs - doc_idx
-            if doc_idx == 0:
-                parts.append(doc_xml)
-                total_chars += doc_len
-            parts.append(
-                f"<!-- Output truncated: {remaining} more document(s) omitted "
-                f"(budget {max_chars} chars). Refine your query or reduce top_k "
-                f"to retrieve different results. -->"
-            )
-            break
-
-        parts.append(doc_xml)
-        total_chars += doc_len
-
-    result = "\n".join(parts).strip()
-
-    # Hard safety net: if the result is still over budget (e.g. a single massive
-    # first document), forcibly truncate with a closing comment.
-    if len(result) > max_chars:
-        truncation_msg = "\n<!-- ...output forcibly truncated to fit context window -->"
-        result = result[: max_chars - len(truncation_msg)] + truncation_msg
-
-    return result
-
-
-# =============================================================================
-# Knowledge Base Search
-# =============================================================================
-
-
-async def search_knowledge_base_async(
-    query: str,
-    search_space_id: int,
-    db_session: AsyncSession,
-    connector_service: ConnectorService,
-    connectors_to_search: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    max_input_tokens: int | None = None,
-) -> str:
-    """Search the knowledge base across connectors and return formatted results.
-
-    ``available_document_types`` lets local connectors with no indexed data be
-    skipped (no embedding / DB round-trip), and ``max_input_tokens`` sizes the
-    output to the model's context window.
-    """
-    perf = get_perf_logger()
-    t0 = time.perf_counter()
-
-    deduplicated = await search_knowledge_base_raw_async(
-        query=query,
-        search_space_id=search_space_id,
-        db_session=db_session,
-        connector_service=connector_service,
-        connectors_to_search=connectors_to_search,
-        top_k=top_k,
-        start_date=start_date,
-        end_date=end_date,
-        available_connectors=available_connectors,
-        available_document_types=available_document_types,
-    )
-
-    if not deduplicated:
-        return "No documents found in the knowledge base. The search space has no indexed content yet."
-
-    # Use browse chunk cap for degenerate queries, otherwise adaptive chunking.
-    max_chunks_per_doc = (
-        _BROWSE_MAX_CHUNKS_PER_DOC if _is_degenerate_query(query) else 0
-    )
-    output_budget = _compute_tool_output_budget(max_input_tokens)
-    result = format_documents_for_context(
-        deduplicated,
-        max_chars=output_budget,
-        max_chunks_per_doc=max_chunks_per_doc,
-    )
-
-    if len(result) > output_budget:
-        perf.warning(
-            "[kb_search] output STILL exceeds budget after format (%d > %d), "
-            "hard truncation should have fired",
-            len(result),
-            output_budget,
-        )
-
-    perf.info(
-        "[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
-        "budget=%d max_input_tokens=%s space=%d",
-        time.perf_counter() - t0,
-        len(deduplicated),
-        len(deduplicated),
-        len(result),
-        output_budget,
-        max_input_tokens,
-        search_space_id,
-    )
-    return result
-
-
-async def search_knowledge_base_raw_async(
-    query: str,
-    search_space_id: int,
-    db_session: AsyncSession,
-    connector_service: ConnectorService,
-    connectors_to_search: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    query_embedding: list[float] | None = None,
-) -> list[dict[str, Any]]:
-    """Search knowledge base and return raw document dicts (no XML formatting)."""
-    perf = get_perf_logger()
-    t0 = time.perf_counter()
-    all_documents: list[dict[str, Any]] = []
-
-    # Preserve the public signature for compatibility even if values are unused.
-    _ = (db_session, connector_service)
-
-    from app.agents.chat.multi_agent_chat.shared.date_filters import resolve_date_range
-
-    resolved_start_date, resolved_end_date = resolve_date_range(
-        start_date=start_date,
-        end_date=end_date,
-    )
-
-    connectors = _normalize_connectors(connectors_to_search, available_connectors)
-
-    if available_document_types:
-        doc_types_set = set(available_document_types)
-        connectors = [
-            c
-            for c in connectors
-            if c in doc_types_set
-            or NATIVE_TO_LEGACY_DOCTYPE.get(c, "") in doc_types_set
-        ]
-
-    if not connectors:
-        return []
-
-    if _is_degenerate_query(query):
-        perf.info(
-            "[kb_search_raw] degenerate query %r detected - recency browse",
-            query,
-        )
-        browse_connectors = connectors if connectors else [None]  # type: ignore[list-item]
-        expanded_browse = []
-        for connector in browse_connectors:
-            if connector is not None and connector in NATIVE_TO_LEGACY_DOCTYPE:
-                expanded_browse.append([connector, NATIVE_TO_LEGACY_DOCTYPE[connector]])
-            else:
-                expanded_browse.append(connector)
-        browse_results = await asyncio.gather(
-            *[
-                _browse_recent_documents(
-                    search_space_id=search_space_id,
-                    document_type=connector,
-                    top_k=top_k,
-                    start_date=resolved_start_date,
-                    end_date=resolved_end_date,
-                )
-                for connector in expanded_browse
-            ]
-        )
-        for docs in browse_results:
-            all_documents.extend(docs)
-    else:
-        if query_embedding is None:
-            from app.config import config as app_config
-
-            query_embedding = app_config.embedding_model_instance.embed(query)
-
-        max_parallel_searches = 4
-        semaphore = asyncio.Semaphore(max_parallel_searches)
-
-        async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
-            try:
-                async with semaphore, shielded_async_session() as isolated_session:
-                    svc = ConnectorService(isolated_session, search_space_id)
-                    return await svc._combined_rrf_search(
-                        query_text=query,
-                        search_space_id=search_space_id,
-                        document_type=connector,
-                        top_k=top_k,
-                        start_date=resolved_start_date,
-                        end_date=resolved_end_date,
-                        query_embedding=query_embedding,
-                    )
-            except Exception as exc:
-                perf.warning("[kb_search_raw] connector=%s FAILED: %s", connector, exc)
-                return []
-
-        connector_results = await asyncio.gather(
-            *[_search_one_connector(connector) for connector in connectors]
-        )
-        for docs in connector_results:
-            all_documents.extend(docs)
-
-    seen_doc_ids: set[Any] = set()
-    seen_content_hashes: set[int] = set()
-    deduplicated: list[dict[str, Any]] = []
-
-    def _content_fingerprint(document: dict[str, Any]) -> int | None:
-        chunks = document.get("chunks")
-        if isinstance(chunks, list):
-            chunk_texts = []
-            for chunk in chunks:
-                if not isinstance(chunk, dict):
-                    continue
-                chunk_content = (chunk.get("content") or "").strip()
-                if chunk_content:
-                    chunk_texts.append(chunk_content)
-            if chunk_texts:
-                return hash("||".join(chunk_texts))
-        flat_content = (document.get("content") or "").strip()
-        if flat_content:
-            return hash(flat_content)
-        return None
-
-    for doc in all_documents:
-        doc_id = (doc.get("document", {}) or {}).get("id")
-        if doc_id is not None:
-            if doc_id in seen_doc_ids:
-                continue
-            seen_doc_ids.add(doc_id)
-            deduplicated.append(doc)
-            continue
-        content_hash = _content_fingerprint(doc)
-        if content_hash is not None and content_hash in seen_content_hashes:
-            continue
-        if content_hash is not None:
-            seen_content_hashes.add(content_hash)
-        deduplicated.append(doc)
-
-    deduplicated.sort(key=lambda doc: doc.get("score", 0), reverse=True)
-    perf.info(
-        "[kb_search_raw] done in %.3fs total=%d deduped=%d",
-        time.perf_counter() - t0,
-        len(all_documents),
-        len(deduplicated),
-    )
-    return deduplicated
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/report.py
@ -23,6 +23,45 @@ from app.services.llm_service import get_agent_llm

 logger = logging.getLogger(__name__)

+
+def _report_search_types(
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> tuple[str, ...] | None:
+    """Build the document-type scope for the shared KB search.
+
+    ``None`` means "search every indexed type"; a tuple narrows the scope to the
+    connectors/document types the search space actually has.
+    """
+    types: set[str] = set()
+    if available_document_types:
+        types.update(available_document_types)
+    if available_connectors:
+        types.update(available_connectors)
+    return tuple(sorted(types)) or None
+
+
+def _render_kb_hits_for_report(hits: list[Any]) -> str:
+    """Render KB hits as plain titled source text for the report writer.
+
+    Citations are intentionally omitted from reports for now, so no ``[n]``
+    labels or chunk ids are emitted — just titled document content for grounding.
+    """
+    from app.agents.chat.multi_agent_chat.shared.document_render import source_label
+
+    blocks: list[str] = []
+    for hit in hits:
+        label = source_label(hit.document_type, hit.metadata)
+        header = f"{hit.title} ({label})" if label else hit.title
+        body = "\n\n".join(
+            chunk.content.strip() for chunk in hit.chunks if chunk.content.strip()
+        )
+        if not body:
+            continue
+        blocks.append(f"## {header}\n\n{body}")
+    return "\n\n".join(blocks)
+
+
 # ─── Shared Formatting Rules ────────────────────────────────────────────────
 # Reusable formatting instructions appended to section-level and review prompts.

@ -788,31 +827,46 @@ def create_generate_report_tool(
                    f"{query_count} queries: {search_queries[:5]}"
                )
                try:
-                    from .knowledge_base import search_knowledge_base_async
+                    from app.agents.chat.multi_agent_chat.shared.retrieval.hybrid_search import (
+                        search_chunks,
+                    )
+                    from app.agents.chat.multi_agent_chat.shared.retrieval.models import (
+                        DocumentHit,
+                        SearchScope,
+                    )
+
+                    scope = SearchScope(
+                        document_types=_report_search_types(
+                            available_connectors, available_document_types
+                        )
+                    )

                    # Each query gets its own short-lived session.
-                    async def _run_single_query(q: str) -> str:
+                    async def _run_single_query(q: str) -> list[DocumentHit]:
                        async with shielded_async_session() as kb_session:
-                            kb_connector_svc = ConnectorService(
-                                kb_session, search_space_id
-                            )
-                            return await search_knowledge_base_async(
-                                query=q,
+                            return await search_chunks(
+                                kb_session,
                                search_space_id=search_space_id,
-                                db_session=kb_session,
-                                connector_service=kb_connector_svc,
+                                query=q,
+                                scope=scope,
                                top_k=10,
-                                available_connectors=available_connectors,
-                                available_document_types=available_document_types,
                            )

-                    kb_results = await asyncio.gather(
+                    hits_per_query = await asyncio.gather(
                        *[_run_single_query(q) for q in search_queries[:5]]
                    )

-                    kb_text_parts = [r for r in kb_results if r and r.strip()]
-                    if kb_text_parts:
-                        kb_combined = "\n\n---\n\n".join(kb_text_parts)
+                    seen_doc_ids: set[int] = set()
+                    merged_hits: list[DocumentHit] = []
+                    for hits in hits_per_query:
+                        for hit in hits:
+                            if hit.document_id in seen_doc_ids:
+                                continue
+                            seen_doc_ids.add(hit.document_id)
+                            merged_hits.append(hit)
+
+                    kb_combined = _render_kb_hits_for_report(merged_hits)
+                    if kb_combined.strip():
                        if effective_source.strip():
                            effective_source = (
                                effective_source
@ -822,20 +876,17 @@ def create_generate_report_tool(
                        else:
                            effective_source = kb_combined

-                        # Count docs found (rough: count <document> tags)
-                        doc_count = kb_combined.count("<document>")
+                        doc_count = len(merged_hits)
                        dispatch_custom_event(
                            "report_progress",
                            {
                                "phase": "kb_search_done",
-                                "message": f"Found {doc_count} relevant documents"
-                                if doc_count
-                                else f"Found results from {len(kb_text_parts)} queries",
+                                "message": f"Found {doc_count} relevant documents",
                            },
                        )
                        logger.info(
                            f"[generate_report] KB search added ~{len(kb_combined)} chars "
-                            f"from {len(kb_text_parts)} queries"
+                            f"from {doc_count} documents"
                        )
                    else:
                        dispatch_custom_event(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md
@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi

 Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs.

-The specialist returns plain prose with absolute paths and `[citation:<chunk_id>]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer.
+The specialist returns plain prose with absolute paths and `[n]` citation labels when claims came from KB-indexed documents. Preserve those labels verbatim if you forward the answer.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@ -6,10 +6,9 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor

 - If the supervisor already provided a precise path (e.g. `/documents/notes/2026-05-11.md`), use it directly — skip the lookup steps below.
 - Otherwise, most requests reference documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
-  1. Consult `<priority_documents>` — it's a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit the task.
-  2. Walk `<workspace_tree>` for descriptive folder/filename matches.
-  3. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
-  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
+  1. Walk `<workspace_tree>` for descriptive folder/filename matches.
+  2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
+  3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.

 For writes (where you choose the path yourself):

@ -35,42 +34,31 @@ Map outcomes to your `status`:

 You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.

-## Chunk citations in your prose
+## Citations in your prose

-When `read_file` returns a KB-indexed document under `/documents/`, the response includes `<chunk id='…'>` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:<chunk_id>]` to the sentence stating that fact, using the **exact** id from the `<chunk id='…'>` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When `read_file` returns a KB-indexed document under `/documents/`, it comes back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific passage, append its `[n]` to the sentence stating that fact, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.

-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output

-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:

 ```
-<document>
-<document_metadata>
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
-  ...
-</document_metadata>
-<chunk_index>
-  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
-  <entry chunk_id="129" lines="23-30" matched="true"/>
-</chunk_index>
-<document_content>
-  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
-  <chunk id='129'><![CDATA[…]]></chunk>
-</document_content>
+<document title="Q2 Roadmap" source="File" view="full">
+  [3] First milestone is …
+  [4] Second milestone is …
 </document>
 ```

 ### Rules

- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
- Never cite `<document_id>` — that's the parent doc, not a chunk.
- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
 - Prefer **fewer accurate citations** over many speculative ones.
- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
- Tool results without `<chunk id='…'>` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.
+- Tool results without `[n]` labels (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no label and need none.
+- Populate `evidence.citations` with **only** the labels you actually emitted — same numbers.

 ## Examples

@ -89,7 +77,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
      "path": "/documents/meetings/2026-05-11-meeting.md",
      "matched_candidates": null,
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": null,
    "missing_fields": null,
@ -100,7 +88,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
 **Example 2 — edit by inference:**

 - *Supervisor task:* `"Add a bullet about the new feature flag to my Q2 roadmap"`
- *You:* search for the roadmap doc — check `<priority_documents>` and `<workspace_tree>` first; if neither surfaces it, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose `<priority_documents>` hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
+- *You:* search for the roadmap doc — check `<workspace_tree>` first; if it doesn't surface the doc, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose the tree hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
 - *Output:* `status=success`, evidence includes path and the inserted snippet.

 **Example 3 — blocked, multiple candidates:**
@ -121,7 +109,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
        { "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
      ],
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": "Ask the user which design doc to update.",
    "missing_fields": ["path"],
@ -142,7 +130,7 @@ Return **only** one JSON object (no markdown or prose outside it):
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@ -9,8 +9,7 @@ You are the SurfSense workspace specialist for the user's local folders.
  1. If you do not know which mounts exist, call `ls('/')` first.
  2. Walk likely folders with the `ls` and `list_tree` tools.
  3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
-  4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
-  5. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
+  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.

 For writes (where you choose the path yourself):

@ -33,11 +32,11 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.

-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)

-## Chunk citations in your prose
+## Citations in your prose

-In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `<chunk id='…'>` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Do not emit `[n]` or `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.

 ## Examples

@ -56,7 +55,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
      "path": "/notes/meetings/2026-05-11-meeting.md",
      "matched_candidates": null,
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": null,
    "missing_fields": null,
@ -88,7 +87,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
        { "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
      ],
      "content_excerpt": null,
-      "chunk_ids": null
+      "citations": null
    },
    "next_step": "Ask the user which design doc to update.",
    "missing_fields": ["path"],
@ -109,7 +108,7 @@ Return **only** one JSON object (no markdown or prose outside it):
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
-    "chunk_ids": string[] | null
+    "citations": number[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@ -6,9 +6,8 @@ You answer workspace questions for another agent. The end user does **not** see

 The caller's question often references documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:

-1. Consult `<priority_documents>` — a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit.
-2. Walk `<workspace_tree>` for descriptive folder/filename matches.
-3. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
+1. Walk `<workspace_tree>` for descriptive folder/filename matches.
+2. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.

 If a precise path was already given, use it directly — skip the lookup.

@ -28,41 +27,30 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.

-## Chunk citations
+## Citations

-When the evidence for a claim came from a `read_file` response that included `<chunk id='…'>` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:<chunk_id>]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+When the evidence for a claim came from a `read_file` response for a KB-indexed document under `/documents/`, the document reads back as a `<document … view="full">` block whose passages are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. Append the relevant `[n]` to the sentence stating the claim, copying it **exactly** as shown. The caller passes these labels through verbatim and the server resolves each one, so a wrong number silently breaks the citation.

-### Where chunk ids live in `read_file` output
+### Where the labels live in `read_file` output

-A KB document's XML has three numeric attributes — only **one** is a citation source:
+A KB document reads back like this — only the bracketed `[n]` is a citation label:

 ```
-<document>
-<document_metadata>
-  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
-  ...
-</document_metadata>
-<chunk_index>
-  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
-  <entry chunk_id="129" lines="23-30" matched="true"/>
-</chunk_index>
-<document_content>
-  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
-  <chunk id='129'><![CDATA[…]]></chunk>
-</document_content>
+<document title="Q2 Roadmap" source="File" view="full">
+  [3] First milestone is …
+  [4] Second milestone is …
 </document>
 ```

 ### Rules

- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
- Never cite `<document_id>` — that's the parent doc, not a chunk.
- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without `<chunk id='…'>`), skip the citation.
- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.
+- Use the **exact** `[n]` shown next to the passage you actually quoted or paraphrased. Copy it digit-for-digit; do **not** retype from memory or renumber.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `read_file` output you are summarising this turn. If you can't see it, omit the citation.
+- Labels are **not** sequential by position — a passage may be `[7]` while the one above it is `[3]` (numbering is shared across the whole conversation). Copy what you see; never guess an adjacent number.
+- Prefer **fewer accurate citations** over many speculative ones. One correct `[3]` is more useful than a string of wrong numbers.
+- Several passages behind one point → each in its own brackets with nothing between: `[3][4]`. Never `[3, 4]` and never a range like `[3-4]`.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links, no parentheses, no footnote numbers.
+- If a claim came from a tool result that did **not** carry `[n]` labels (`ls`, `glob`, `grep` listings, error strings), skip the citation.
+- The absolute path under `/documents/` is always required; `[n]` labels are additive, they do not replace the path reference.

-Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`
+Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [3][4].`
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@ -9,7 +9,6 @@ The caller's question often references files by description (`"my meeting notes
 1. If you do not know which mounts exist, call `ls('/')` first.
 2. Walk likely folders with the `ls` and `list_tree` tools.
 3. Use `glob` for filename patterns; use `grep` when the description points at *content* rather than a name.
-4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.

 If a precise path was already given, use it directly — skip the lookup.

@ -29,6 +28,6 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.

-## Chunk citations
+## Citations

-In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `<chunk id='…'>` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against.
+In desktop mode your filesystem tools read local files only, which are not KB-indexed and carry no `[n]` citation labels. Cite each claim with the absolute local path; do not emit `[n]` or `[citation:…]` markers — your caller has nothing to resolve them against.
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/agent.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/agent.py
@ -7,6 +7,9 @@ from typing import Any
 from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool

+from app.agents.chat.multi_agent_chat.shared.middleware.citation_state import (
+    build_citation_state_mw,
+)
 from app.agents.chat.multi_agent_chat.subagents.shared.md_file_reader import (
    read_md_file,
 )
@ -31,6 +34,12 @@ def build_subagent(
        or "Handles research tasks for this workspace."
    )
    system_prompt = read_md_file(__package__, "system_prompt").strip()
+    # web_search registers WEB_RESULT citations via Command(update=...); the
+    # citation-state middleware declares the channel so those [n] merge back up.
+    middleware_with_citations = {
+        **(middleware_stack or {}),
+        "citation_state": build_citation_state_mw(),
+    }
    return pack_subagent(
        name=NAME,
        description=description,
@ -39,5 +48,5 @@ def build_subagent(
        ruleset=RULESET,
        dependencies=dependencies,
        model=model,
-        middleware_stack=middleware_stack,
+        middleware_stack=middleware_with_citations,
    )
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/system_prompt.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/system_prompt.md
@ -17,6 +17,16 @@ Gather and synthesize evidence using SurfSense research tools with clear citatio
 - Never fabricate facts, citations, URLs, or quote text.
 </tool_policy>

+<citations>
+`web_search` returns a `<web_results>` block whose results are each prefixed with a bracketed label — `[1]`, `[2]`, `[3]`. That `[n]` is the citation label. When a finding came from a specific result, append its `[n]` to that finding, copying the label **exactly** as shown. The caller relays these labels verbatim and the server resolves each one, so a wrong number silently breaks the citation.
+
+- Use the exact `[n]` shown next to the result you actually used; never renumber, guess, or invent a label.
+- Before emitting an `[n]`, confirm that bracketed label appears in the `web_search` output this turn. If you can't see it, omit it.
+- Write the bare label `[n]` only — no `[citation:…]` wrapper, no markdown links.
+- Several results behind one finding → each in its own brackets with nothing between: `[1][2]`.
+- `scrape_webpage` returns raw page text with no `[n]` labels; a fact drawn only from a scrape carries no citation (report the URL in `evidence.sources` instead).
+</citations>
+
 <out_of_scope>
 - Do not execute connector mutations (email/calendar/docs/chat writes) or deliverable generation.
 </out_of_scope>
@ -47,6 +57,6 @@ Return **only** one JSON object (no markdown/prose):
 }
 <include snippet="output_contract_base"/>
 Route-specific rules:
- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks.
- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once.
+- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Append the supporting `[n]` to each finding drawn from a `web_search` result. Do not paste raw paragraphs, scraped pages, or quote blocks.
+- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once. (Citations travel as `[n]`; `sources` is for transparency and for scrape-only facts that carry no `[n]`.)
 </output_contract>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/init.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/init.py
@ -1,7 +1,8 @@
-"""Research-stage tools: web search and scrape."""
+"""Research-stage tools: web search (shared) and scrape."""
+
+from app.agents.chat.shared.tools.web_search import create_web_search_tool

 from .scrape_webpage import create_scrape_webpage_tool
-from .web_search import create_web_search_tool

 __all__ = [
    "create_scrape_webpage_tool",
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/index.py
@ -7,9 +7,9 @@ from typing import Any
 from langchain_core.tools import BaseTool

 from app.agents.chat.multi_agent_chat.shared.permissions import Ruleset
+from app.agents.chat.shared.tools.web_search import create_web_search_tool

 from .scrape_webpage import create_scrape_webpage_tool
-from .web_search import create_web_search_tool

 NAME = "research"

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/web_search.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/research/tools/web_search.py
@ -1,241 +0,0 @@
-"""Real-time web search: SearXNG plus configured live-search connectors (Tavily, Linkup, Baidu, etc.)."""
-
-import asyncio
-import json
-import time
-from typing import Any
-
-from langchain_core.tools import StructuredTool
-from pydantic import BaseModel, Field
-
-from app.db import shielded_async_session
-from app.services.connector_service import ConnectorService
-from app.utils.perf import get_perf_logger
-
-_LIVE_SEARCH_CONNECTORS: set[str] = {
-    "TAVILY_API",
-    "LINKUP_API",
-    "BAIDU_SEARCH_API",
-}
-
-_LIVE_CONNECTOR_SPECS: dict[str, tuple[str, bool, bool, dict[str, Any]]] = {
-    "TAVILY_API": ("search_tavily", False, True, {}),
-    "LINKUP_API": ("search_linkup", False, False, {"mode": "standard"}),
-    "BAIDU_SEARCH_API": ("search_baidu", False, True, {}),
-}
-
-_CONNECTOR_LABELS: dict[str, str] = {
-    "TAVILY_API": "Tavily",
-    "LINKUP_API": "Linkup",
-    "BAIDU_SEARCH_API": "Baidu",
-}
-
-
-class WebSearchInput(BaseModel):
-    """Input schema for the web_search tool."""
-
-    query: str = Field(
-        description="The search query to look up on the web. Use specific, descriptive terms.",
-    )
-    top_k: int = Field(
-        default=10,
-        description="Number of results to retrieve (default: 10, max: 50).",
-    )
-
-
-def _format_web_results(
-    documents: list[dict[str, Any]],
-    *,
-    max_chars: int = 50_000,
-) -> str:
-    """Format web search results into XML suitable for the LLM context."""
-    if not documents:
-        return "No web search results found."
-
-    parts: list[str] = []
-    total_chars = 0
-
-    for doc in documents:
-        doc_info = doc.get("document") or {}
-        metadata = doc_info.get("metadata") or {}
-        title = doc_info.get("title") or "Web Result"
-        url = metadata.get("url") or ""
-        content = (doc.get("content") or "").strip()
-        source = metadata.get("document_type") or doc.get("source") or "WEB_SEARCH"
-        if not content:
-            continue
-
-        metadata_json = json.dumps(metadata, ensure_ascii=False)
-        doc_xml = "\n".join(
-            [
-                "<document>",
-                "<document_metadata>",
-                f"  <document_type>{source}</document_type>",
-                f"  <title><![CDATA[{title}]]></title>",
-                f"  <url><![CDATA[{url}]]></url>",
-                f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-                "</document_metadata>",
-                "<document_content>",
-                f"  <chunk id='{url}'><![CDATA[{content}]]></chunk>",
-                "</document_content>",
-                "</document>",
-                "",
-            ]
-        )
-
-        if total_chars + len(doc_xml) > max_chars:
-            parts.append("<!-- Output truncated to fit context window -->")
-            break
-
-        parts.append(doc_xml)
-        total_chars += len(doc_xml)
-
-    return "\n".join(parts).strip() or "No web search results found."
-
-
-async def _search_live_connector(
-    connector: str,
-    query: str,
-    search_space_id: int,
-    top_k: int,
-    semaphore: asyncio.Semaphore,
-) -> list[dict[str, Any]]:
-    """Dispatch a single live-search connector (Tavily / Linkup / Baidu)."""
-    perf = get_perf_logger()
-    spec = _LIVE_CONNECTOR_SPECS.get(connector)
-    if spec is None:
-        return []
-
-    method_name, _includes_date_range, includes_top_k, extra_kwargs = spec
-    kwargs: dict[str, Any] = {
-        "user_query": query,
-        "search_space_id": search_space_id,
-        **extra_kwargs,
-    }
-    if includes_top_k:
-        kwargs["top_k"] = top_k
-
-    try:
-        t0 = time.perf_counter()
-        async with semaphore, shielded_async_session() as session:
-            svc = ConnectorService(session, search_space_id)
-            _, chunks = await getattr(svc, method_name)(**kwargs)
-            perf.info(
-                "[web_search] connector=%s results=%d in %.3fs",
-                connector,
-                len(chunks),
-                time.perf_counter() - t0,
-            )
-            return chunks
-    except Exception as e:
-        perf.warning("[web_search] connector=%s FAILED: %s", connector, e)
-        return []
-
-
-def create_web_search_tool(
-    search_space_id: int | None = None,
-    available_connectors: list[str] | None = None,
-) -> StructuredTool:
-    """Factory for the ``web_search`` tool.
-
-    Dispatches in parallel to the platform SearXNG instance and any
-    user-configured live-search connectors (Tavily, Linkup, Baidu).
-    """
-    active_live_connectors: list[str] = []
-    if available_connectors:
-        active_live_connectors = [
-            c for c in available_connectors if c in _LIVE_SEARCH_CONNECTORS
-        ]
-
-    engine_names = ["SearXNG (platform default)"]
-    engine_names.extend(_CONNECTOR_LABELS.get(c, c) for c in active_live_connectors)
-    engines_summary = ", ".join(engine_names)
-
-    description = (
-        "Search the web for real-time information. "
-        "Use this for current events, news, prices, weather, public facts, or any "
-        "question that requires up-to-date information from the internet.\n\n"
-        f"Active search engines: {engines_summary}.\n"
-        "All configured engines are queried in parallel and results are merged."
-    )
-
-    _search_space_id = search_space_id
-    _active_live = active_live_connectors
-
-    async def _web_search_impl(query: str, top_k: int = 10) -> str:
-        from app.services import web_search_service
-
-        perf = get_perf_logger()
-        t0 = time.perf_counter()
-        clamped_top_k = min(max(1, top_k), 50)
-
-        semaphore = asyncio.Semaphore(4)
-        tasks: list[asyncio.Task[list[dict[str, Any]]]] = []
-
-        if web_search_service.is_available():
-
-            async def _searxng() -> list[dict[str, Any]]:
-                async with semaphore:
-                    _result_obj, docs = await web_search_service.search(
-                        query=query,
-                        top_k=clamped_top_k,
-                    )
-                    return docs
-
-            tasks.append(asyncio.ensure_future(_searxng()))
-
-        if _search_space_id is not None:
-            for connector in _active_live:
-                tasks.append(
-                    asyncio.ensure_future(
-                        _search_live_connector(
-                            connector=connector,
-                            query=query,
-                            search_space_id=_search_space_id,
-                            top_k=clamped_top_k,
-                            semaphore=semaphore,
-                        )
-                    )
-                )
-
-        if not tasks:
-            return "Web search is not available — no search engines are configured."
-
-        results_lists = await asyncio.gather(*tasks, return_exceptions=True)
-
-        all_documents: list[dict[str, Any]] = []
-        for result in results_lists:
-            if isinstance(result, BaseException):
-                perf.warning("[web_search] a search engine failed: %s", result)
-                continue
-            all_documents.extend(result)
-
-        seen_urls: set[str] = set()
-        deduplicated: list[dict[str, Any]] = []
-        for doc in all_documents:
-            url = ((doc.get("document") or {}).get("metadata") or {}).get("url", "")
-            if url and url in seen_urls:
-                continue
-            if url:
-                seen_urls.add(url)
-            deduplicated.append(doc)
-
-        formatted = _format_web_results(deduplicated)
-
-        perf.info(
-            "[web_search] query=%r engines=%d results=%d deduped=%d chars=%d in %.3fs",
-            query[:60],
-            len(tasks),
-            len(all_documents),
-            len(deduplicated),
-            len(formatted),
-            time.perf_counter() - t0,
-        )
-        return formatted
-
-    return StructuredTool(
-        name="web_search",
-        description=description,
-        coroutine=_web_search_impl,
-        args_schema=WebSearchInput,
-    )
--- a/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
@ -74,8 +74,9 @@ class ResolvedMentionSet:
    ``@Project``).

    ``mentioned_document_ids`` is an ordered, deduped list consumed by
-    the priority middleware downstream — see
-    ``KnowledgePriorityMiddleware._compute_priority_paths``.
+    the on-demand ``search_knowledge_base`` tool downstream (via
+    ``referenced_document_ids``) to pin @-mentioned docs into the
+    retrieval scope.
    """

    mentions: list[ResolvedMention] = field(default_factory=list)
@ -113,8 +114,8 @@ async def resolve_mentions(

    * Legacy clients that haven't migrated to the unified chip list
      still send the id arrays — we treat the union as authoritative.
-    * The id arrays are the canonical input to
-      ``KnowledgePriorityMiddleware`` (via ``SurfSenseContextSchema``);
+    * The id arrays are the canonical input to the retrieval scope
+      (via ``SurfSenseContextSchema`` → ``referenced_document_ids``);
      returning the deduped, validated lists lets the route forward
      them unchanged.

--- a/surfsense_backend/app/agents/chat/runtime/path_resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/path_resolver.py
@ -4,7 +4,6 @@ This module is the single source of truth for mapping ``Document`` rows to
 virtual paths under ``/documents/`` and back. It is used by:

 * :class:`KnowledgeTreeMiddleware` (rendering the workspace tree)
-* :class:`KnowledgePriorityMiddleware` (computing priority paths)
 * :class:`KBPostgresBackend` (``als_info`` / ``aread`` / move operations)
 * :class:`KnowledgeBasePersistenceMiddleware` (resolving moves and creates)

--- a/surfsense_backend/app/agents/chat/runtime/references/init.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/init.py
@ -0,0 +1,95 @@
+"""Resolved ``@``-references and their pointer block.
+
+References are scope, not content: they tell the model what the user pointed
+at this turn so it can retrieve from those sources with tools.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.runtime.path_resolver import build_path_index
+from app.schemas.new_chat import MentionedDocumentInfo
+
+from .chat import resolve_chat_references
+from .connectors import resolve_connector_references
+from .documents import referenced_document_ids, resolve_document_references
+from .folders import resolve_folder_references
+from .models import (
+    ChatReference,
+    ConnectorReference,
+    DocumentReference,
+    FolderReference,
+    Reference,
+    ReferenceKind,
+)
+from .reference_pointers import render_reference_pointers
+
+
+async def resolve_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    requesting_user_id: str | None,
+    current_chat_id: int,
+    document_ids: list[int] | None = None,
+    folder_ids: list[int] | None = None,
+    connector_ids: list[int] | None = None,
+    connector_chips: list[MentionedDocumentInfo] | None = None,
+    thread_ids: list[int] | None = None,
+) -> list[Reference]:
+    """Resolve a turn's ``@``-references into one ordered pointer list.
+
+    Order is documents, folders, connectors, chats. The path index is built
+    once and shared by the document and folder resolvers.
+    """
+    references: list[Reference] = []
+
+    if document_ids or folder_ids:
+        index = await build_path_index(session, search_space_id)
+        if document_ids:
+            references += await resolve_document_references(
+                session,
+                search_space_id=search_space_id,
+                document_ids=document_ids,
+                index=index,
+            )
+        if folder_ids:
+            references += await resolve_folder_references(
+                session,
+                search_space_id=search_space_id,
+                folder_ids=folder_ids,
+                index=index,
+            )
+
+    if connector_ids:
+        references += await resolve_connector_references(
+            session,
+            search_space_id=search_space_id,
+            connector_ids=connector_ids,
+            chips=connector_chips,
+        )
+
+    if thread_ids:
+        references += await resolve_chat_references(
+            session,
+            search_space_id=search_space_id,
+            requesting_user_id=requesting_user_id,
+            current_chat_id=current_chat_id,
+            thread_ids=thread_ids,
+        )
+
+    return references
+
+
+__all__ = [
+    "ChatReference",
+    "ConnectorReference",
+    "DocumentReference",
+    "FolderReference",
+    "Reference",
+    "ReferenceKind",
+    "referenced_document_ids",
+    "render_reference_pointers",
+    "resolve_references",
+]
--- a/surfsense_backend/app/agents/chat/runtime/references/chat/init.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/chat/init.py
@ -0,0 +1,7 @@
+"""Resolve ``@chat`` mentions into pointers, access-checked, titles only."""
+
+from __future__ import annotations
+
+from .resolver import resolve_chat_references
+
+__all__ = ["resolve_chat_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/chat/access.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/chat/access.py
@ -0,0 +1,79 @@
+"""Access-checked lookup of chat threads the requester may read.
+
+The single place chat visibility is enforced: a thread is readable when it is
+shared with the search space, the requester created it, or it is a legacy
+null-creator thread and the requester owns the search space. Anything else is
+dropped (fail-closed).
+"""
+
+from __future__ import annotations
+
+import logging
+from uuid import UUID
+
+from sqlalchemy import or_, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import ChatVisibility, NewChatThread, SearchSpace
+
+logger = logging.getLogger(__name__)
+
+
+def _visibility_predicate(user_uuid: UUID | None, *, include_legacy: bool):
+    """SQL predicate for threads the requester may read."""
+    conditions = [NewChatThread.visibility == ChatVisibility.SEARCH_SPACE]
+    if user_uuid is not None:
+        conditions.append(NewChatThread.created_by_id == user_uuid)
+    if include_legacy:
+        conditions.append(NewChatThread.created_by_id.is_(None))
+    return or_(*conditions)
+
+
+async def accessible_threads(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    requesting_user_id: str | None,
+    thread_ids: list[int],
+    exclude_thread_id: int | None = None,
+) -> list[NewChatThread]:
+    """Threads in this space the requester may read, in requested order.
+
+    Input order is preserved and de-duplicated; ``exclude_thread_id`` (the
+    active chat) is removed so a chat never references itself. Inaccessible or
+    foreign ids are silently dropped.
+    """
+    requested = [tid for tid in dict.fromkeys(thread_ids) if tid != exclude_thread_id]
+    if not requested:
+        return []
+
+    user_uuid: UUID | None = None
+    if requesting_user_id:
+        try:
+            user_uuid = UUID(requesting_user_id)
+        except (TypeError, ValueError):
+            logger.warning(
+                "accessible_threads: invalid user_id=%r; restricting to shared",
+                requesting_user_id,
+            )
+
+    # Legacy null-creator threads are readable only by the search-space owner.
+    include_legacy = False
+    if user_uuid is not None:
+        owner_id = await session.scalar(
+            select(SearchSpace.user_id).where(SearchSpace.id == search_space_id)
+        )
+        include_legacy = owner_id == user_uuid
+
+    rows = await session.execute(
+        select(NewChatThread).where(
+            NewChatThread.id.in_(requested),
+            NewChatThread.search_space_id == search_space_id,
+            _visibility_predicate(user_uuid, include_legacy=include_legacy),
+        )
+    )
+    threads_by_id = {row.id: row for row in rows.scalars().all()}
+    return [threads_by_id[tid] for tid in requested if tid in threads_by_id]
+
+
+__all__ = ["accessible_threads"]
--- a/surfsense_backend/app/agents/chat/runtime/references/chat/resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/chat/resolver.py
@ -0,0 +1,41 @@
+"""Resolve ``@chat`` mentions into pointer references.
+
+Chats are not KB-indexed, so a chat reference is a pointer only; its turns are
+read on demand via the chat read tool, not injected here. Only the title is
+needed, so this takes the cheap access-checked path and never loads transcripts.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..models import ChatReference
+from .access import accessible_threads
+
+
+async def resolve_chat_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    requesting_user_id: str | None,
+    current_chat_id: int,
+    thread_ids: list[int],
+) -> list[ChatReference]:
+    """Map ``@chat`` thread ids to access-checked pointers (titles only)."""
+    if not thread_ids:
+        return []
+
+    threads = await accessible_threads(
+        session,
+        search_space_id=search_space_id,
+        requesting_user_id=requesting_user_id,
+        thread_ids=thread_ids,
+        exclude_thread_id=current_chat_id,
+    )
+    return [
+        ChatReference(entity_id=thread.id, label=str(thread.title or "Untitled chat"))
+        for thread in threads
+    ]
+
+
+__all__ = ["resolve_chat_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/connectors.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/connectors.py
@ -0,0 +1,83 @@
+"""Resolve ``@connector`` account mentions into references for the pointer block."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import SearchSourceConnector
+from app.schemas.new_chat import MentionedDocumentInfo
+
+from .models import ConnectorReference
+
+
+def connector_pointer_fields(
+    *,
+    account_name: str | None,
+    connector_type: str | None,
+    fallback_name: str | None,
+) -> tuple[str, str | None]:
+    """Pick the account label and provider for a connector pointer.
+
+    Prefers the chip the user selected (``account_name`` / ``connector_type``)
+    and falls back to the stored connector name.
+    """
+    label = account_name or fallback_name or "account"
+    return label, connector_type or None
+
+
+async def resolve_connector_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    connector_ids: list[int],
+    chips: list[MentionedDocumentInfo] | None = None,
+) -> list[ConnectorReference]:
+    """Map ``@connector`` ids to references; ids outside the space are dropped.
+
+    The DB check only confirms the connector belongs to this search space;
+    display fields come from the user's chip.
+    """
+    if not connector_ids:
+        return []
+
+    rows = await session.execute(
+        select(
+            SearchSourceConnector.id,
+            SearchSourceConnector.name,
+            SearchSourceConnector.connector_type,
+        ).where(
+            SearchSourceConnector.search_space_id == search_space_id,
+            SearchSourceConnector.id.in_(connector_ids),
+        )
+    )
+    accessible = {row.id: row for row in rows.all()}
+
+    chip_by_id = {
+        chip.id: chip for chip in (chips or []) if chip.kind == "connector"
+    }
+
+    references: list[ConnectorReference] = []
+    for connector_id in dict.fromkeys(connector_ids):
+        row = accessible.get(connector_id)
+        if row is None:
+            continue
+        chip = chip_by_id.get(connector_id)
+        stored_type = getattr(row.connector_type, "value", row.connector_type)
+        label, provider = connector_pointer_fields(
+            account_name=chip.account_name if chip else None,
+            connector_type=(chip.connector_type if chip else None)
+            or (str(stored_type) if stored_type else None),
+            fallback_name=str(row.name or ""),
+        )
+        references.append(
+            ConnectorReference(
+                entity_id=connector_id,
+                label=label,
+                provider=provider,
+            )
+        )
+    return references
+
+
+__all__ = ["connector_pointer_fields", "resolve_connector_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/documents/init.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/init.py
@ -0,0 +1,13 @@
+"""Resolve ``@document`` references.
+
+Two concerns, one subject: ``resolver`` turns document ids into pointer
+references for the model, ``referenced`` turns ``@document`` / ``@folder``
+mentions into the document ids a retrieval is confined to.
+"""
+
+from __future__ import annotations
+
+from .referenced import referenced_document_ids
+from .resolver import resolve_document_references
+
+__all__ = ["referenced_document_ids", "resolve_document_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/referenced.py
@ -0,0 +1,39 @@
+"""Resolve ``@document`` / ``@folder`` mentions to the documents they point at.
+
+Reference resolution, not retrieval: this answers "which knowledge-base
+documents did the user point at this turn?". ``@document`` ids pass through;
+``@folder`` ids expand to the documents directly inside each folder within this
+search space (direct children only, not nested subfolders). The caller turns the
+returned ids into a retrieval ``SearchScope``.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import Document
+
+
+async def referenced_document_ids(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    document_ids: list[int] | None = None,
+    folder_ids: list[int] | None = None,
+) -> tuple[int, ...]:
+    """Sorted document ids the user pointed at (empty = nothing referenced)."""
+    doc_ids = set(document_ids or [])
+    folders = list(folder_ids or [])
+    if folders:
+        rows = await session.execute(
+            select(Document.id).where(
+                Document.search_space_id == search_space_id,
+                Document.folder_id.in_(folders),
+            )
+        )
+        doc_ids.update(rows.scalars().all())
+    return tuple(sorted(doc_ids))
+
+
+__all__ = ["referenced_document_ids"]
--- a/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/documents/resolver.py
@ -0,0 +1,58 @@
+"""Resolve ``@document`` ids into references for the pointer block."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.runtime.path_resolver import PathIndex, doc_to_virtual_path
+from app.db import Document
+
+from ..models import DocumentReference
+
+
+async def resolve_document_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    document_ids: list[int],
+    index: PathIndex,
+) -> list[DocumentReference]:
+    """Map document ids to references in input order; unknown ids are dropped.
+
+    Best-effort and fail-closed: an id outside ``search_space_id`` (deleted or
+    foreign) simply does not produce a reference.
+    """
+    if not document_ids:
+        return []
+
+    rows = await session.execute(
+        select(Document).where(
+            Document.search_space_id == search_space_id,
+            Document.id.in_(document_ids),
+        )
+    )
+    documents_by_id = {row.id: row for row in rows.scalars().all()}
+
+    references: list[DocumentReference] = []
+    for document_id in dict.fromkeys(document_ids):
+        document = documents_by_id.get(document_id)
+        if document is None:
+            continue
+        title = str(document.title or "untitled")
+        references.append(
+            DocumentReference(
+                entity_id=document.id,
+                label=title,
+                path=doc_to_virtual_path(
+                    doc_id=document.id,
+                    title=title,
+                    folder_id=document.folder_id,
+                    index=index,
+                ),
+            )
+        )
+    return references
+
+
+__all__ = ["resolve_document_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/folders.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/folders.py
@ -0,0 +1,54 @@
+"""Resolve ``@folder`` ids into references for the pointer block."""
+
+from __future__ import annotations
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.chat.runtime.path_resolver import DOCUMENTS_ROOT, PathIndex
+from app.db import Folder
+
+from .models import FolderReference
+
+
+def folder_pointer_path(folder_id: int, folder_paths: dict[int, str]) -> str:
+    """Trailing-slash virtual path so the model reads the pointer as a directory."""
+    base = folder_paths.get(folder_id, DOCUMENTS_ROOT)
+    return base if base.endswith("/") else f"{base}/"
+
+
+async def resolve_folder_references(
+    session: AsyncSession,
+    *,
+    search_space_id: int,
+    folder_ids: list[int],
+    index: PathIndex,
+) -> list[FolderReference]:
+    """Map folder ids to references in input order; unknown ids are dropped."""
+    if not folder_ids:
+        return []
+
+    rows = await session.execute(
+        select(Folder).where(
+            Folder.search_space_id == search_space_id,
+            Folder.id.in_(folder_ids),
+        )
+    )
+    folders_by_id = {row.id: row for row in rows.scalars().all()}
+
+    references: list[FolderReference] = []
+    for folder_id in dict.fromkeys(folder_ids):
+        folder = folders_by_id.get(folder_id)
+        if folder is None:
+            continue
+        references.append(
+            FolderReference(
+                entity_id=folder.id,
+                label=str(folder.name or "untitled"),
+                path=folder_pointer_path(folder.id, index.folder_paths),
+            )
+        )
+    return references
+
+
+__all__ = ["folder_pointer_path", "resolve_folder_references"]
--- a/surfsense_backend/app/agents/chat/runtime/references/models.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/models.py
@ -0,0 +1,73 @@
+"""Data shapes for resolved ``@``-references.
+
+One type per kind so each carries exactly the fields it needs: documents and
+folders have a path, connectors have a provider, chats have neither. ``kind`` is
+a class-level discriminator used by the renderer and scope builder.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import ClassVar
+
+
+class ReferenceKind(str, Enum):
+    """What the user pointed at; the value is the label shown to the model."""
+
+    DOCUMENT = "document"
+    FOLDER = "folder"
+    CONNECTOR = "connector"
+    CHAT = "chat"
+
+
+@dataclass(frozen=True)
+class _Reference:
+    """Identity shared by every reference kind."""
+
+    entity_id: int
+    label: str
+
+
+@dataclass(frozen=True)
+class DocumentReference(_Reference):
+    """A referenced document, reachable by its virtual path."""
+
+    path: str
+    kind: ClassVar[ReferenceKind] = ReferenceKind.DOCUMENT
+
+
+@dataclass(frozen=True)
+class FolderReference(_Reference):
+    """A referenced folder, reachable by its virtual path."""
+
+    path: str
+    kind: ClassVar[ReferenceKind] = ReferenceKind.FOLDER
+
+
+@dataclass(frozen=True)
+class ConnectorReference(_Reference):
+    """A referenced connector account; ``provider`` is its type label."""
+
+    provider: str | None = None
+    kind: ClassVar[ReferenceKind] = ReferenceKind.CONNECTOR
+
+
+@dataclass(frozen=True)
+class ChatReference(_Reference):
+    """A referenced chat thread; its turns are read on demand, not here."""
+
+    kind: ClassVar[ReferenceKind] = ReferenceKind.CHAT
+
+
+Reference = DocumentReference | FolderReference | ConnectorReference | ChatReference
+
+
+__all__ = [
+    "ChatReference",
+    "ConnectorReference",
+    "DocumentReference",
+    "FolderReference",
+    "Reference",
+    "ReferenceKind",
+]
--- a/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
+++ b/surfsense_backend/app/agents/chat/runtime/references/reference_pointers.py
@ -0,0 +1,66 @@
+"""Render resolved references into a ``<referenced_this_turn>`` pointer block.
+
+Pointers, not content: each line names what the user referenced and how to
+reach it (a path, a connector handle, a title) so the model knows what to
+retrieve from. Actual content is pulled later via tools, never injected here.
+"""
+
+from __future__ import annotations
+
+from .models import (
+    ChatReference,
+    ConnectorReference,
+    DocumentReference,
+    FolderReference,
+    Reference,
+)
+
+_HEADER = (
+    "The user pointed at these with @ this turn. They are scope, not content "
+    "— when the question is about them, retrieve from them before answering."
+)
+
+
+def render_reference_pointers(references: list[Reference]) -> str | None:
+    """Render references as one read-only pointer block.
+
+    Returns ``None`` when there is nothing to render so callers can skip the
+    block entirely.
+    """
+    if not references:
+        return None
+
+    lines = [_render_pointer(reference) for reference in references]
+    return (
+        "<referenced_this_turn>\n"
+        f"{_HEADER}\n"
+        + "\n".join(lines)
+        + "\n</referenced_this_turn>"
+    )
+
+
+def _render_pointer(reference: Reference) -> str:
+    """One ``- {kind} {id} — {handle}`` line, shaped per kind."""
+    head = f"- {reference.kind.value} {reference.entity_id} — "
+    return head + _handle(reference)
+
+
+def _handle(reference: Reference) -> str:
+    """The human-reachable handle: a path, a connector provider, or a title."""
+    label = _clean(reference.label)
+    match reference:
+        case DocumentReference() | FolderReference():
+            return f'"{label}" ({reference.path})'
+        case ConnectorReference():
+            provider = _clean(reference.provider) if reference.provider else ""
+            return f"{provider} ({label})" if provider else label
+        case ChatReference():
+            return f'"{label}"'
+
+
+def _clean(text: str) -> str:
+    """Collapse whitespace so a title can't break the one-line pointer."""
+    return " ".join(text.split())
+
+
+__all__ = ["render_reference_pointers"]
--- a/surfsense_backend/app/agents/chat/shared/context.py
+++ b/surfsense_backend/app/agents/chat/shared/context.py
@ -11,9 +11,9 @@ MUST live on this context object instead of being captured into a
 middleware ``__init__`` closure. Middlewares read fields back via
 ``runtime.context.<field>``; tools read them via ``runtime.context``.

-This object is read inside both ``KnowledgePriorityMiddleware`` (for
-``mentioned_document_ids``) and any future middleware that needs
-per-request state without invalidating the compiled-agent cache.
+This object is read by the ``search_knowledge_base`` tool (for
+``mentioned_document_ids``) and any middleware that needs per-request
+state without invalidating the compiled-agent cache.
 """

 from __future__ import annotations
@ -43,13 +43,12 @@ class SurfSenseContextSchema:
    Phase 1.5 fields:
        search_space_id: Search space the request is scoped to.
        mentioned_document_ids: KB documents the user @-mentioned this turn.
-            Read by ``KnowledgePriorityMiddleware`` to seed its priority
-            list. Stays out of the compiled-agent cache key — that's the
-            whole point of putting it here.
+            Read by the ``search_knowledge_base`` tool to pin these docs
+            into the retrieval scope. Stays out of the compiled-agent cache
+            key — that's the whole point of putting it here.
        mentioned_folder_ids: KB folders the user @-mentioned this turn
-            (cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
-            entries in ``<priority_documents>`` so the agent prioritises
-            walking those folders with ``ls`` / ``find_documents``.
+            (cloud filesystem mode). Pinned into the ``search_knowledge_base``
+            retrieval scope so matches from those folders are prioritised.
        file_operation_contract: One-shot file operation contract for the
            upcoming turn (reserved; not currently populated).
        turn_id / request_id: Correlation IDs surfaced by the streaming
--- a/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
+++ b/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
@ -4,7 +4,7 @@ Extends ``SummarizationMiddleware`` with three SurfSense behaviors:

 1. A structured summary template (:data:`SURFSENSE_SUMMARY_PROMPT`) instead of
   the base freeform prompt.
-2. Protected SystemMessages (injected hints like ``<priority_documents>``) are
+2. Protected SystemMessages (injected hints like ``<workspace_tree>``) are
   kept verbatim instead of being summarized away.
 3. ``content=None`` is sanitized before ``get_buffer_string`` (some providers
   stream tool-only AIMessages with ``None`` content, which would crash it).
@ -77,7 +77,6 @@ Respond ONLY with the structured summary. Do not include any text before or afte
 # compaction step happens *before* re-injection in some paths, so we
 # must preserve them verbatim across the cutoff.
 PROTECTED_SYSTEM_PREFIXES: tuple[str, ...] = (
-    "<priority_documents>",  # KnowledgePriorityMiddleware
    "<workspace_tree>",  # KnowledgeTreeMiddleware
    "<file_operation_contract>",  # reserved file-operation contract prefix
    "<user_memory>",  # MemoryInjectionMiddleware
--- a/surfsense_backend/app/agents/chat/shared/tools/web_search.py
+++ b/surfsense_backend/app/agents/chat/shared/tools/web_search.py
@ -4,20 +4,40 @@ Web search tool for the SurfSense agent.
 Provides a unified tool for real-time web searches that dispatches to all
 configured search engines: the platform SearXNG instance (always available)
 plus any user-configured live-search connectors (Tavily, Linkup, Baidu).
+
+Each result is registered into the conversation citation registry as a
+``WEB_RESULT`` and rendered with a server-assigned ``[n]`` label, so the model
+cites the web exactly like the knowledge base — one ``[n]`` spine, no special
+web citation form.
 """

-import asyncio
-import json
-import time
-from typing import Any
+from __future__ import annotations

-from langchain_core.tools import StructuredTool
-from pydantic import BaseModel, Field
+import asyncio
+import time
+from typing import TYPE_CHECKING, Annotated, Any
+from urllib.parse import urlparse
+
+from langchain.tools import ToolRuntime
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import BaseTool, StructuredTool
+from langgraph.types import Command

 from app.db import shielded_async_session
 from app.services.connector_service import ConnectorService
 from app.utils.perf import get_perf_logger

+if TYPE_CHECKING:
+    from app.agents.chat.multi_agent_chat.shared.document_render import (
+        RenderableDocument,
+    )
+
+# NOTE: imports from ``app.agents.chat.multi_agent_chat`` are done lazily inside
+# the functions below. This module lives under ``app.agents.chat.shared`` but is
+# imported during the ``multi_agent_chat`` package's own init cascade (via the
+# research subagent); importing that package at module load would re-enter a
+# partially-initialized module. Lazy imports break that cycle.
+
 _LIVE_SEARCH_CONNECTORS: set[str] = {
    "TAVILY_API",
    "LINKUP_API",
@ -37,28 +57,29 @@ _CONNECTOR_LABELS: dict[str, str] = {
 }


-class WebSearchInput(BaseModel):
-    """Input schema for the web_search tool."""
-
-    query: str = Field(
-        description="The search query to look up on the web. Use specific, descriptive terms.",
-    )
-    top_k: int = Field(
-        default=10,
-        description="Number of results to retrieve (default: 10, max: 50).",
-    )
+def _web_source_label(url: str) -> str:
+    """A compact, human-readable source for the ``<document source=…>`` attr."""
+    domain = urlparse(url).netloc.removeprefix("www.") if url else ""
+    return f"Web · {domain}" if domain else "Web"


-def _format_web_results(
+def _to_renderable_web_documents(
    documents: list[dict[str, Any]],
    *,
    max_chars: int = 50_000,
-) -> str:
-    """Format web search results into XML suitable for the LLM context."""
-    if not documents:
-        return "No web search results found."
+) -> list[RenderableDocument]:
+    """Map raw web results to renderable documents, one passage (the snippet) each.

-    parts: list[str] = []
+    A result with no URL is skipped: ``url`` is the citation locator, so without
+    it the result cannot be registered or resolved.
+    """
+    from app.agents.chat.multi_agent_chat.shared.citations import CitationSourceType
+    from app.agents.chat.multi_agent_chat.shared.document_render import (
+        RenderableDocument,
+        RenderablePassage,
+    )
+
+    renderables: list[RenderableDocument] = []
    total_chars = 0

    for doc in documents:
@ -67,36 +88,28 @@ def _format_web_results(
        title = doc_info.get("title") or "Web Result"
        url = metadata.get("url") or ""
        content = (doc.get("content") or "").strip()
-        source = metadata.get("document_type") or doc.get("source") or "WEB_SEARCH"
-        if not content:
+        if not content or not url:
            continue

-        metadata_json = json.dumps(metadata, ensure_ascii=False)
-        doc_xml = "\n".join(
-            [
-                "<document>",
-                "<document_metadata>",
-                f"  <document_type>{source}</document_type>",
-                f"  <title><![CDATA[{title}]]></title>",
-                f"  <url><![CDATA[{url}]]></url>",
-                f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
-                "</document_metadata>",
-                "<document_content>",
-                f"  <chunk id='{url}'><![CDATA[{content}]]></chunk>",
-                "</document_content>",
-                "</document>",
-                "",
-            ]
-        )
-
-        if total_chars + len(doc_xml) > max_chars:
-            parts.append("<!-- Output truncated to fit context window -->")
+        total_chars += len(content)
+        if total_chars > max_chars:
            break

-        parts.append(doc_xml)
-        total_chars += len(doc_xml)
+        renderables.append(
+            RenderableDocument(
+                title=title,
+                source=_web_source_label(url),
+                passages=[
+                    RenderablePassage(
+                        content=content,
+                        locator={"url": url},
+                        source_type=CitationSourceType.WEB_RESULT,
+                    )
+                ],
+            )
+        )

-    return "\n".join(parts).strip() or "No web search results found."
+    return renderables


 async def _search_live_connector(
@ -141,7 +154,7 @@ async def _search_live_connector(
 def create_web_search_tool(
    search_space_id: int | None = None,
    available_connectors: list[str] | None = None,
-) -> StructuredTool:
+) -> BaseTool:
    """Factory for the ``web_search`` tool.

    Dispatches in parallel to the platform SearXNG instance and any
@ -168,7 +181,17 @@ def create_web_search_tool(
    _search_space_id = search_space_id
    _active_live = active_live_connectors

-    async def _web_search_impl(query: str, top_k: int = 10) -> str:
+    async def _web_search_impl(
+        query: Annotated[
+            str,
+            "The search query to look up on the web. Use specific, descriptive terms.",
+        ],
+        runtime: ToolRuntime,
+        top_k: Annotated[
+            int,
+            "Number of results to retrieve (default: 10, max: 50).",
+        ] = 10,
+    ) -> Command | str:
        from app.services import web_search_service

        perf = get_perf_logger()
@ -226,22 +249,39 @@ def create_web_search_tool(
                seen_urls.add(url)
            deduplicated.append(doc)

-        formatted = _format_web_results(deduplicated)
+        from app.agents.chat.multi_agent_chat.shared.citations import load_registry
+        from app.agents.chat.multi_agent_chat.shared.document_render import (
+            render_web_results,
+        )
+
+        registry = load_registry(getattr(runtime, "state", None))
+        renderables = _to_renderable_web_documents(deduplicated)
+        rendered = render_web_results(renderables, registry)

        perf.info(
-            "[web_search] query=%r engines=%d results=%d deduped=%d chars=%d in %.3fs",
+            "[web_search] query=%r engines=%d results=%d deduped=%d renderable=%d in %.3fs",
            query[:60],
            len(tasks),
            len(all_documents),
            len(deduplicated),
-            len(formatted),
+            len(renderables),
            time.perf_counter() - t0,
        )
-        return formatted

-    return StructuredTool(
+        if rendered is None:
+            return "No web search results found."
+
+        return Command(
+            update={
+                "messages": [
+                    ToolMessage(content=rendered, tool_call_id=runtime.tool_call_id)
+                ],
+                "citation_registry": registry,
+            }
+        )
+
+    return StructuredTool.from_function(
        name="web_search",
        description=description,
        coroutine=_web_search_impl,
-        args_schema=WebSearchInput,
    )
--- a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
+++ b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
@ -78,7 +78,7 @@ async def _resolve_mention_context(
    Automation always runs in cloud filesystem mode, so we mirror the chat
    ``new_chat`` flow: substitute ``@title`` tokens with canonical
    ``/documents/...`` paths, prepend a ``<mentioned_connectors>`` block, and
-    build a ``SurfSenseContextSchema`` that ``KnowledgePriorityMiddleware``
+    build a ``SurfSenseContextSchema`` that the ``search_knowledge_base`` tool
    reads via ``runtime.context``. Returns ``(query, None)`` unchanged when
    there are no mentions.
    """
@ -210,7 +210,7 @@ async def run_agent_task(
            runtime_context.turn_id = turn_id

        # The compiled graph declares ``context_schema=SurfSenseContextSchema``;
-        # mentions only reach ``KnowledgePriorityMiddleware`` via ``context=``.
+        # mentions only reach the ``search_knowledge_base`` tool via ``context=``.
        invoke_kwargs: dict[str, Any] = {"config": config}
        if runtime_context is not None:
            invoke_kwargs["context"] = runtime_context
--- a/surfsense_backend/app/prompts/default_system_instructions.py
+++ b/surfsense_backend/app/prompts/default_system_instructions.py
@ -1,135 +0,0 @@
-"""
-Thin compatibility wrapper around :mod:`app.prompts.system_prompt_composer.composer`.
-
-The composer split the previous monolithic prompt string into a fragment
-tree under ``prompts/`` plus a model-family dispatch step (see the
-composer module docstring for credits). This module preserves the public
-function surface (``build_surfsense_system_prompt`` /
-``build_configurable_system_prompt`` /
-``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
-that existing call sites — the multi-agent chat factory, anonymous chat
-routes, and the configurable-prompt admin path — keep working without churn.
-
-For new call sites prefer importing ``compose_system_prompt`` directly
-from :mod:`app.prompts.system_prompt_composer.composer`.
-"""
-
-from __future__ import annotations
-
-from datetime import UTC, datetime
-
-from app.db import ChatVisibility
-
-from .system_prompt_composer.composer import (
-    _read_fragment,
-    compose_system_prompt,
-    detect_provider_variant,
-)
-
-# Optional routing fragments under ``prompts/routing/`` (see composer).
-_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
-
-# Public re-exports for backwards compatibility (some legacy code reads the
-# raw default-instructions text directly).
-SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
-    "<system_instruction>\nDefault SurfSense agent system instructions are now\n"
-    "composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
-    "</system_instruction>"
-)
-
-# Citation block re-exposed for legacy importers that referenced this constant
-# directly. The composer is the canonical source; this is a frozen snapshot
-# loaded at module-init time.
-SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
-SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
-
-
-def build_surfsense_system_prompt(
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    *,
-    model_name: str | None = None,
-) -> str:
-    """Build the default SurfSense system prompt (citations on, defaults).
-
-    See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
-    for full parameter docs.
-    """
-    return compose_system_prompt(
-        today=today,
-        thread_visibility=thread_visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-        mcp_connector_tools=mcp_connector_tools,
-        citations_enabled=True,
-        model_name=model_name,
-        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
-    )
-
-
-def build_configurable_system_prompt(
-    custom_system_instructions: str | None = None,
-    use_default_system_instructions: bool = True,
-    citations_enabled: bool = True,
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    *,
-    model_name: str | None = None,
-) -> str:
-    """Build a configurable SurfSense system prompt.
-
-    See :func:`app.prompts.system_prompt_composer.composer.compose_system_prompt`
-    for full parameter docs.
-    """
-    return compose_system_prompt(
-        today=today,
-        thread_visibility=thread_visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-        mcp_connector_tools=mcp_connector_tools,
-        custom_system_instructions=custom_system_instructions,
-        use_default_system_instructions=use_default_system_instructions,
-        citations_enabled=citations_enabled,
-        model_name=model_name,
-        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
-    )
-
-
-def get_default_system_instructions() -> str:
-    """Return the default ``<system_instruction>`` block (no tools / citations).
-
-    Useful for populating the UI when editing custom system instructions.
-    The output reflects the current fragment tree, not a baked-in constant.
-    """
-    resolved_today = datetime.now(UTC).date().isoformat()
-    from .system_prompt_composer.composer import (
-        _build_system_instructions,  # local import
-    )
-
-    return _build_system_instructions(
-        visibility=ChatVisibility.PRIVATE,
-        resolved_today=resolved_today,
-    ).strip()
-
-
-# Backwards compatibility — some modules import the constant directly.
-SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
-
-
-__all__ = [
-    "SURFSENSE_CITATION_INSTRUCTIONS",
-    "SURFSENSE_NO_CITATION_INSTRUCTIONS",
-    "SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE",
-    "SURFSENSE_SYSTEM_PROMPT",
-    "build_configurable_system_prompt",
-    "build_surfsense_system_prompt",
-    "compose_system_prompt",
-    "detect_provider_variant",
-    "get_default_system_instructions",
-]
--- a/surfsense_backend/app/prompts/system_prompt_composer/init.py
+++ b/surfsense_backend/app/prompts/system_prompt_composer/init.py
@ -1,7 +0,0 @@
-"""SurfSense agent prompt fragments.
-
-The prompt is composed at runtime by :mod:`composer` from the markdown
-fragments under ``base/``, ``providers/``, ``tools/``, ``examples/``, and
-``routing/``. ``system_prompt.py`` is now a thin wrapper that delegates
-to :func:`composer.compose_system_prompt`.
-"""
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/init.py
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/init.py
@ -1 +0,0 @@
-
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/agent_private.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/agent_private.md
@ -1,7 +0,0 @@
-You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base.
-
-Today's date (UTC): {resolved_today}
-
-When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
-
-NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/agent_team.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/agent_team.md
@ -1,9 +0,0 @@
-You are SurfSense, a reasoning and acting AI agent designed to answer questions in this team space using the team's shared knowledge base.
-
-In this team thread, each message is prefixed with **[DisplayName of the author]**. Use this to attribute and reference the author of anything in the discussion (who asked a question, made a suggestion, or contributed an idea) and to cite who said what in your answers.
-
-Today's date (UTC): {resolved_today}
-
-When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
-
-NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/citations_off.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/citations_off.md
@ -1,16 +0,0 @@
-<citation_instructions>
-IMPORTANT: Citations are DISABLED for this configuration.
-
-DO NOT include any citations in your responses. Specifically:
-1. Do NOT use the [citation:chunk_id] format anywhere in your response.
-2. Do NOT reference document IDs, chunk IDs, or source IDs.
-3. Simply provide the information naturally without any citation markers.
-4. Write your response as if you're having a normal conversation, incorporating the information from your knowledge seamlessly.
-
-When answering questions based on documents from the knowledge base:
- Present the information directly and confidently
- Do not mention that information comes from specific documents or chunks
- Integrate facts naturally into your response without attribution markers
-
-Your goal is to provide helpful, informative answers in a clean, readable format without any citation notation.
-</citation_instructions>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/citations_on.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/citations_on.md
@ -1,89 +0,0 @@
-<citation_instructions>
-CRITICAL CITATION REQUIREMENTS:
-
-1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `<chunk id='...'>` tag inside `<document_content>`.
-2. Make sure ALL factual statements from the documents have proper citations.
-3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2].
-4. You MUST use the exact chunk_id values from the `<chunk id='...'>` attributes. Do not create your own citation numbers.
-5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value.
-6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags.
-7. Do not return citations as clickable links.
-8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only.
-9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting.
-10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `<chunk id='...'>` tags.
-11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up.
-
-<document_structure_example>
-The documents you receive are structured like this:
-
-**Knowledge base documents (numeric chunk IDs):**
-<document>
-<document_metadata>
-  <document_id>42</document_id>
-  <document_type>GITHUB_CONNECTOR</document_type>
-  <title><![CDATA[Some repo / file / issue title]]></title>
-  <url><![CDATA[https://example.com]]></url>
-  <metadata_json><![CDATA[{{"any":"other metadata"}}]]></metadata_json>
-</document_metadata>
-
-<document_content>
-  <chunk id='123'><![CDATA[First chunk text...]]></chunk>
-  <chunk id='124'><![CDATA[Second chunk text...]]></chunk>
-</document_content>
-</document>
-
-**Web search results (URL chunk IDs):**
-<document>
-<document_metadata>
-  <document_type>WEB_SEARCH</document_type>
-  <title><![CDATA[Some web search result]]></title>
-  <url><![CDATA[https://example.com/article]]></url>
-</document_metadata>
-
-<document_content>
-  <chunk id='https://example.com/article'><![CDATA[Content from web search...]]></chunk>
-</document_content>
-</document>
-
-IMPORTANT: You MUST cite using the EXACT chunk ids from the `<chunk id='...'>` tags.
- For knowledge base documents, chunk ids are numeric (e.g. 123, 124) or prefixed (e.g. doc-45).
- For live web search results, chunk ids are URLs (e.g. https://example.com/article).
-Do NOT cite document_id. Always use the chunk id.
-</document_structure_example>
-
-<citation_format>
- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `<chunk id='...'>` tag
- Citations should appear at the end of the sentence containing the information they support
- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3]
- No need to return references section. Just citations in answer.
- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format
- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only
- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess
- Copy the EXACT chunk id from the XML - if it says `<chunk id='5'>`, use [citation:5]
- If the chunk id is a URL like `<chunk id='https://example.com/page'>`, use [citation:https://example.com/page]
-</citation_format>
-
-<citation_examples>
-CORRECT citation formats:
- [citation:5] (numeric chunk ID from knowledge base)
- [citation:https://example.com/article] (URL chunk ID from web search results)
- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] (multiple citations)
-
-INCORRECT citation formats (DO NOT use):
- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense))
- Using parentheses around brackets: ([citation:5])
- Using hyperlinked text: [link to source 5](https://example.com)
- Using footnote style: ... library¹
- Making up source IDs when source_id is unknown
- Using old IEEE format: [1], [2], [3]
- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5]
-</citation_examples>
-
-<citation_output_example>
-Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5].
-
-According to web search results, the key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:https://docs.python.org/3/library/asyncio.html]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources.
-
-However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead.
-</citation_output_example>
-</citation_instructions>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_private.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_private.md
@ -1,15 +0,0 @@
-<knowledge_base_only_policy>
-CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
- You MUST answer questions ONLY using information retrieved from the user's knowledge base, web search results, scraped webpages, or other tool outputs.
- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless the user explicitly grants permission.
- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
-  1. Inform the user that you could not find relevant information in their knowledge base.
-  2. Ask the user: "Would you like me to answer from my general knowledge instead?"
-  3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
- This policy does NOT apply to:
-  * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
-  * Formatting, summarization, or analysis of content already present in the conversation
-  * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
-  * Tool-usage actions like generating reports, podcasts, images, or scraping webpages
-  * Queries about services that have direct tools (Linear, ClickUp, Jira, Slack, Airtable) — see <tool_routing> below
-</knowledge_base_only_policy>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_team.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/kb_only_policy_team.md
@ -1,15 +0,0 @@
-<knowledge_base_only_policy>
-CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
- You MUST answer questions ONLY using information retrieved from the team's shared knowledge base, web search results, scraped webpages, or other tool outputs.
- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless a team member explicitly grants permission.
- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
-  1. Inform the team that you could not find relevant information in the shared knowledge base.
-  2. Ask: "Would you like me to answer from my general knowledge instead?"
-  3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
- This policy does NOT apply to:
-  * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?"). For "how do I use SurfSense" / product-documentation questions, point the user to https://www.surfsense.com/docs.
-  * Formatting, summarization, or analysis of content already present in the conversation
-  * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
-  * Tool-usage actions like generating reports, podcasts, images, or scraping webpages
-  * Queries about services that have direct tools (Linear, ClickUp, Jira, Slack, Airtable) — see <tool_routing> below
-</knowledge_base_only_policy>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_private.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_private.md
@ -1,12 +0,0 @@
-<memory_protocol>
-IMPORTANT — After understanding each user message, ALWAYS check: does this message
-reveal durable facts about the user (role, interests, preferences, projects,
-background, or standing instructions)? If yes, you MUST call update_memory
-alongside your normal response — do not defer this to a later turn.
-
-Memory is stored as a heading-based markdown document. New entries should be
-under `##` headings such as `## Facts`, `## Preferences`, or `## Instructions`
-with bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy
-`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write
-new saves in the heading-based format.
-</memory_protocol>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_team.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/memory_protocol_team.md
@ -1,14 +0,0 @@
-<memory_protocol>
-IMPORTANT — After understanding each user message, ALWAYS check: does this message
-reveal durable facts about the team (decisions, conventions, architecture, processes,
-or key facts)? If yes, you MUST call update_memory alongside your normal response —
-do not defer this to a later turn.
-
-Team memory is stored as a heading-based markdown document. New entries should
-be under `##` headings such as `## Product Decisions`,
-`## Engineering Conventions`, `## Project Facts`, or `## Open Questions` with
-bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy
-`(YYYY-MM-DD) [fact]` markers, preserve the information but write new saves in
-the heading-based format. Do not create personal headings such as
-`## Preferences` or `## Instructions`.
-</memory_protocol>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/parameter_resolution.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/parameter_resolution.md
@ -1,39 +0,0 @@
-<parameter_resolution>
-Some service tools require identifiers or context you do not have (account IDs,
-workspace names, channel IDs, project keys, etc.). NEVER ask the user for raw
-IDs or technical identifiers — they cannot memorise them.
-
-Instead, follow this discovery pattern:
-1. Call a listing/discovery tool to find available options.
-2. ONE result → use it silently, no question to the user.
-3. MULTIPLE results → present the options by their display names and let the
-   user choose. Never show raw UUIDs — always use friendly names.
-
-Discovery tools by level:
- Which account/workspace? → get_connected_accounts("<service>")
- Which Jira site (cloudId)? → getAccessibleAtlassianResources
- Which Jira project?  → getVisibleJiraProjects (after resolving cloudId)
- Which Jira issue type? → getJiraProjectIssueTypesMetadata (after resolving project)
- Which channel?  → slack_search_channels
- Which base?     → list_bases
- Which table?    → list_tables_for_base (after resolving baseId)
- Which task?     → clickup_search
- Which issue?    → list_issues (Linear) or searchJiraIssuesUsingJql (Jira)
-
-For Jira specifically: ALWAYS call getAccessibleAtlassianResources first to
-obtain the cloudId, then pass it to other Jira tools. When creating an issue,
-chain: getAccessibleAtlassianResources → getVisibleJiraProjects → createJiraIssue.
-If there is only one option at each step, use it silently. If multiple, present
-friendly names.
-
-Chain discovery when needed — e.g. for Airtable records: list_bases → pick
-base → list_tables_for_base → pick table → list_records_for_table.
-
-MULTI-ACCOUNT TOOL NAMING: When the user has multiple accounts connected for
-the same service, tool names are prefixed to avoid collisions — e.g.
-linear_25_list_issues and linear_30_list_issues instead of two list_issues.
-Each prefixed tool's description starts with [Account: <display_name>] so you
-know which account it targets. Use get_connected_accounts("<service>") to see
-the full list of accounts with their connector IDs and display names.
-When only one account is connected, tools have their normal unprefixed names.
-</parameter_resolution>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_private.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_private.md
@ -1,24 +0,0 @@
-<tool_routing>
-CRITICAL — You have direct tools for these services: Linear, ClickUp, Jira, Slack, Airtable.
-Their data is NEVER in the knowledge base. You MUST call their tools immediately — never
-say "I don't see it in the knowledge base" or ask the user if they want you to check.
-Ignore any knowledge base results for these services.
-
-When to use which tool:
- Linear (issues, teams, users, projects when MCP exposes them) → hosted Linear MCP read tools (e.g. `list_issues`, `get_issue`, `list_teams`, `list_users`, …) and `save_issue` for create/update; native SurfSense Linear issue tools when present. For **multi-step Linear-only** work (several reads, structured evidence), delegate with the `task` tool to subagent **`linear_specialist`** instead of mixing unrelated tools.
- ClickUp (tasks) → clickup_search, clickup_get_task
- Jira (issues) → getAccessibleAtlassianResources (cloudId discovery), getVisibleJiraProjects (project discovery), getJiraProjectIssueTypesMetadata (issue type discovery), searchJiraIssuesUsingJql, createJiraIssue, editJiraIssue
- Slack (messages, channels) → `slack_search_channels`, `slack_read_channel`, `slack_read_thread`, and other `slack_*` tools when connected. For **multi-step Slack-only** work, delegate with `task` to **`slack_specialist`**.
- Airtable (bases, tables, records) → list_bases, list_tables_for_base, list_records_for_table
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
- Real-time public web data → call web_search
- Reading a specific webpage → call scrape_webpage
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
-
-**`task` subagents (when to delegate):**
- **`linear_specialist`** — Linear-only investigations and tool use.
- **`slack_specialist`** — Slack-only investigations and tool use.
- **`connector_negotiator`** — **Cross-connector** chains (e.g. data from Slack then action in Linear).
- **`explore`** — Read-only KB + web research with citations.
- **`report_writer`** — Single `generate_report` deliverable.
-</tool_routing>
--- a/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_team.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/base/tool_routing_team.md
@ -1,24 +0,0 @@
-<tool_routing>
-CRITICAL — You have direct tools for these services: Linear, ClickUp, Jira, Slack, Airtable.
-Their data is NEVER in the knowledge base. You MUST call their tools immediately — never
-say "I don't see it in the knowledge base" or ask if they want you to check.
-Ignore any knowledge base results for these services.
-
-When to use which tool:
- Linear (issues, teams, users, projects when MCP exposes them) → hosted Linear MCP read tools (e.g. `list_issues`, `get_issue`, `list_teams`, `list_users`, …) and `save_issue` for create/update; native SurfSense Linear issue tools when present. For **multi-step Linear-only** work (several reads, structured evidence), delegate with the `task` tool to subagent **`linear_specialist`** instead of mixing unrelated tools.
- ClickUp (tasks) → clickup_search, clickup_get_task
- Jira (issues) → getAccessibleAtlassianResources (cloudId discovery), getVisibleJiraProjects (project discovery), getJiraProjectIssueTypesMetadata (issue type discovery), searchJiraIssuesUsingJql, createJiraIssue, editJiraIssue
- Slack (messages, channels) → `slack_search_channels`, `slack_read_channel`, `slack_read_thread`, and other `slack_*` tools when connected. For **multi-step Slack-only** work, delegate with `task` to **`slack_specialist`**.
- Airtable (bases, tables, records) → list_bases, list_tables_for_base, list_records_for_table
- Knowledge base content (Notion, GitHub, files, notes) → automatically searched
- Real-time public web data → call web_search
- Reading a specific webpage → call scrape_webpage
- SurfSense product / how-to questions (setup, configuration, connectors, feature behavior) → point the user to the documentation: https://www.surfsense.com/docs
-
-**`task` subagents (when to delegate):**
- **`linear_specialist`** — Linear-only investigations and tool use.
- **`slack_specialist`** — Slack-only investigations and tool use.
- **`connector_negotiator`** — **Cross-connector** chains (e.g. data from Slack then action in Linear).
- **`explore`** — Read-only KB + web research with citations.
- **`report_writer`** — Single `generate_report` deliverable.
-</tool_routing>
--- a/surfsense_backend/app/prompts/system_prompt_composer/composer.py
+++ b/surfsense_backend/app/prompts/system_prompt_composer/composer.py
@ -1,403 +0,0 @@
-"""
-Prompt composer for the SurfSense ``new_chat`` agent.
-
-This module assembles the agent's system prompt from the markdown fragments
-under :mod:`app.prompts.system_prompt_composer`. It replaces the monolithic
-``system_prompt.py`` with a clean, fragment-based composition:
-
-::
-
-    prompts/
-      base/                  # agent identity, KB policy, tool routing, …
-      providers/             # provider-specific tweaks (anthropic, gpt5, …)
-      tools/                 # one ``<name>.md`` per tool
-      examples/              # one ``<name>.md`` per tool with call examples
-      routing/               # connector-specific routing notes (linear, slack, …)
-
-The model-family dispatch step (see :func:`detect_provider_variant`)
-mirrors OpenCode's ``packages/opencode/src/session/system.ts`` — different
-model families respond best to differently-styled prompts (Claude likes
-XML/narrative, GPT-5 wants channel-aware pragmatic, Codex needs
-terse/file:line, Gemini wants formal numbered steps, etc.). LangChain's
-``dynamic_prompt`` helper supports per-call prompt swaps but ships no
-out-of-the-box family classifier, so we keep our own.
-
-Backwards compatibility
-=======================
-
-``system_prompt.py`` re-exports :func:`compose_system_prompt` and wraps it
-in functions with the same signatures as the legacy
-``build_surfsense_system_prompt`` / ``build_configurable_system_prompt`` so
-existing call sites do not change.
-"""
-
-from __future__ import annotations
-
-import re
-from collections.abc import Iterable
-from datetime import UTC, datetime
-from importlib import resources
-
-from app.db import ChatVisibility
-
-# -----------------------------------------------------------------------------
-# Provider variant detection
-# -----------------------------------------------------------------------------
-
-# String literal alias for the supported provider-specific prompt variants.
-# When adding a new variant, also drop a matching ``providers/<variant>.md``
-# file in this package and (if appropriate) extend the regex matchers below.
-#
-# Stylistic clusters: each variant is a focused style nudge, NOT a full
-# system prompt — the main prompt is already assembled from base/ +
-# tools/ + routing/. The clustering itself (which models map to which
-# style) follows OpenCode's ``system.ts`` family table; see the module
-# docstring for credits.
-ProviderVariant = str
-# Known values:
-#   "anthropic"        — Claude family (XML-friendly, narrative todos)
-#   "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
-#   "openai_classic"   — GPT-4 family (autonomous persistence)
-#   "openai_codex"     — gpt-*-codex (code-purist, terse, file:line refs)
-#   "google"           — Gemini (formal, <3-line, numbered workflow)
-#   "kimi"             — Moonshot Kimi-K* (action-bias, parallel tools)
-#   "grok"             — xAI Grok (extreme-terse, one-word ok)
-#   "deepseek"         — DeepSeek V3 / R1 (terse, R1-aware reasoning)
-#   "default"          — fallback, no provider-specific block emitted
-
-# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
-# More specific patterns must come first (e.g. ``codex`` before
-# ``openai_reasoning`` because codex model ids contain ``gpt``).
-
-_OPENAI_CODEX_RE = re.compile(
-    r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE
-)
-_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
-_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
-_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
-_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
-_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
-_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
-_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
-
-
-def detect_provider_variant(model_name: str | None) -> ProviderVariant:
-    """Pick a provider-specific prompt variant from a model id string.
-
-    Heuristic match on the model id; returns ``"default"`` when nothing
-    matches so the composer can fall back to the empty placeholder file.
-
-    Order is significant: more-specific patterns are tried first so
-    ``gpt-5-codex`` routes to ``"openai_codex"`` rather than
-    ``"openai_reasoning"`` — same dispatch order as OpenCode's
-    ``packages/opencode/src/session/system.ts``.
-    """
-    if not model_name:
-        return "default"
-    name = model_name.strip()
-    if _OPENAI_CODEX_RE.search(name):
-        return "openai_codex"
-    if _OPENAI_REASONING_RE.search(name):
-        return "openai_reasoning"
-    if _OPENAI_CLASSIC_RE.search(name):
-        return "openai_classic"
-    if _ANTHROPIC_RE.search(name):
-        return "anthropic"
-    if _GOOGLE_RE.search(name):
-        return "google"
-    if _KIMI_RE.search(name):
-        return "kimi"
-    if _GROK_RE.search(name):
-        return "grok"
-    if _DEEPSEEK_RE.search(name):
-        return "deepseek"
-    return "default"
-
-
-# -----------------------------------------------------------------------------
-# Fragment loading
-# -----------------------------------------------------------------------------
-
-
-_PROMPTS_PACKAGE = "app.prompts.system_prompt_composer"
-
-
-def _read_fragment(subpath: str) -> str:
-    """Read a fragment file from the ``prompts/`` resource tree.
-
-    Returns the raw contents stripped of any single trailing newline so
-    composition can append explicit separators without compounding blank
-    lines. Missing files return an empty string so optional fragments
-    (e.g. provider hints) act as no-ops.
-    """
-    parts = subpath.split("/")
-    try:
-        ref = resources.files(_PROMPTS_PACKAGE).joinpath(*parts)
-        if not ref.is_file():
-            return ""
-        text = ref.read_text(encoding="utf-8")
-    except (FileNotFoundError, ModuleNotFoundError):
-        return ""
-    if text.endswith("\n"):
-        text = text[:-1]
-    return text
-
-
-# -----------------------------------------------------------------------------
-# Tool ordering + memory variant resolution
-# -----------------------------------------------------------------------------
-
-
-# Ordered for reading flow: fundamentals first, then artifact generators,
-# then memory at the end (mirrors the legacy ``_ALL_TOOL_NAMES_ORDERED``).
-ALL_TOOL_NAMES_ORDERED: tuple[str, ...] = (
-    "web_search",
-    "generate_podcast",
-    "generate_video_presentation",
-    "generate_report",
-    "generate_resume",
-    "generate_image",
-    "scrape_webpage",
-    "update_memory",
-)
-
-
-_MEMORY_VARIANT_TOOLS: frozenset[str] = frozenset({"update_memory"})
-
-
-def _tool_fragment_path(tool_name: str, variant: str) -> str:
-    """Resolve a tool's instruction fragment path.
-
-    Tools listed in :data:`_MEMORY_VARIANT_TOOLS` switch on the conversation
-    visibility and load ``tools/<name>_<variant>.md``; everything else
-    falls back to ``tools/<name>.md``.
-    """
-    if tool_name in _MEMORY_VARIANT_TOOLS:
-        return f"tools/{tool_name}_{variant}.md"
-    return f"tools/{tool_name}.md"
-
-
-def _example_fragment_path(tool_name: str, variant: str) -> str:
-    if tool_name in _MEMORY_VARIANT_TOOLS:
-        return f"examples/{tool_name}_{variant}.md"
-    return f"examples/{tool_name}.md"
-
-
-def _format_tool_label(tool_name: str) -> str:
-    return tool_name.replace("_", " ").title()
-
-
-# -----------------------------------------------------------------------------
-# Section builders
-# -----------------------------------------------------------------------------
-
-
-def _build_system_instructions(
-    *,
-    visibility: ChatVisibility,
-    resolved_today: str,
-) -> str:
-    """Reconstruct the legacy ``<system_instruction>`` block from fragments."""
-    variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
-
-    sections = [
-        _read_fragment(f"base/agent_{variant}.md"),
-        _read_fragment(f"base/kb_only_policy_{variant}.md"),
-        _read_fragment(f"base/tool_routing_{variant}.md"),
-        _read_fragment("base/parameter_resolution.md"),
-        _read_fragment(f"base/memory_protocol_{variant}.md"),
-    ]
-    body = "\n\n".join(s for s in sections if s)
-    block = f"\n<system_instruction>\n{body}\n\n</system_instruction>\n"
-    return block.format(resolved_today=resolved_today)
-
-
-def _build_mcp_routing_block(
-    mcp_connector_tools: dict[str, list[str]] | None,
-) -> str:
-    """Emit the ``<mcp_tool_routing>`` block when at least one MCP server is wired."""
-    if not mcp_connector_tools:
-        return ""
-    lines: list[str] = [
-        "\n<mcp_tool_routing>",
-        "You also have direct tools from these user-connected MCP servers.",
-        "Their data is NEVER in the knowledge base — call their tools directly.",
-        "",
-    ]
-    for server_name, tool_names in mcp_connector_tools.items():
-        lines.append(f"- {server_name} → {', '.join(tool_names)}")
-    lines.append("</mcp_tool_routing>\n")
-    return "\n".join(lines)
-
-
-def _build_tools_section(
-    *,
-    visibility: ChatVisibility,
-    enabled_tool_names: set[str] | None,
-    disabled_tool_names: set[str] | None,
-) -> str:
-    """Reconstruct the ``<tools>`` block + ``<tool_call_examples>`` block."""
-    variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
-
-    parts: list[str] = []
-    preamble = _read_fragment("tools/_preamble.md")
-    if preamble:
-        parts.append(preamble + "\n")
-
-    examples: list[str] = []
-
-    for tool_name in ALL_TOOL_NAMES_ORDERED:
-        if enabled_tool_names is not None and tool_name not in enabled_tool_names:
-            continue
-
-        instruction = _read_fragment(_tool_fragment_path(tool_name, variant))
-        if instruction:
-            parts.append(instruction + "\n")
-
-        example = _read_fragment(_example_fragment_path(tool_name, variant))
-        if example:
-            examples.append(example + "\n")
-
-    known_disabled = (
-        set(disabled_tool_names) & set(ALL_TOOL_NAMES_ORDERED)
-        if disabled_tool_names
-        else set()
-    )
-    if known_disabled:
-        disabled_list = ", ".join(
-            _format_tool_label(n) for n in ALL_TOOL_NAMES_ORDERED if n in known_disabled
-        )
-        parts.append(
-            "\n"
-            "DISABLED TOOLS (by user):\n"
-            f"The following tools are available in SurfSense but have been disabled by the user for this session: {disabled_list}.\n"
-            "You do NOT have access to these tools and MUST NOT claim you can use them.\n"
-            "If the user asks about a capability provided by a disabled tool, let them know the relevant tool\n"
-            "is currently disabled and they can re-enable it.\n"
-        )
-
-    parts.append("\n</tools>\n")
-
-    if examples:
-        parts.append("<tool_call_examples>")
-        parts.extend(examples)
-        parts.append("</tool_call_examples>\n")
-
-    return "".join(parts)
-
-
-def _build_provider_block(provider_variant: ProviderVariant) -> str:
-    """Optional provider-tuned hints. Empty for ``"default"``."""
-    if not provider_variant or provider_variant == "default":
-        return ""
-    text = _read_fragment(f"providers/{provider_variant}.md")
-    return f"\n{text}\n" if text else ""
-
-
-def _build_routing_block(connector_routing: Iterable[str] | None) -> str:
-    if not connector_routing:
-        return ""
-    fragments: list[str] = []
-    for name in connector_routing:
-        text = _read_fragment(f"routing/{name}.md")
-        if text:
-            fragments.append(text)
-    if not fragments:
-        return ""
-    return "\n" + "\n\n".join(fragments) + "\n"
-
-
-def _build_citation_block(citations_enabled: bool) -> str:
-    fragment = (
-        _read_fragment("base/citations_on.md")
-        if citations_enabled
-        else _read_fragment("base/citations_off.md")
-    )
-    return f"\n{fragment}\n" if fragment else ""
-
-
-# -----------------------------------------------------------------------------
-# Public API
-# -----------------------------------------------------------------------------
-
-
-def compose_system_prompt(
-    *,
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    custom_system_instructions: str | None = None,
-    use_default_system_instructions: bool = True,
-    citations_enabled: bool = True,
-    provider_variant: ProviderVariant | None = None,
-    model_name: str | None = None,
-    connector_routing: Iterable[str] | None = None,
-) -> str:
-    """Assemble the SurfSense system prompt from disk fragments.
-
-    Args:
-        today: Optional clock injection for tests.
-        thread_visibility: Private vs shared (team) — drives memory wording
-            and a few base block variants.
-        enabled_tool_names: When provided, only these tools' instructions
-            are included; ``None`` keeps the legacy "include everything"
-            behavior.
-        disabled_tool_names: User-disabled tools (note appended to prompt).
-        mcp_connector_tools: ``{server_name: [tool_names...]}`` to inject
-            an explicit MCP routing block.
-        custom_system_instructions: Free-form instructions that override
-            the default ``<system_instruction>`` block.
-        use_default_system_instructions: When ``custom_system_instructions``
-            is empty/None, fall back to defaults (legacy semantics).
-        citations_enabled: Include ``citations_on.md`` (true) or
-            ``citations_off.md`` (false).
-        provider_variant: Explicit provider variant override
-            (``"anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"``).
-            When ``None``, falls back to :func:`detect_provider_variant`
-            on ``model_name``.
-        model_name: Used to auto-detect ``provider_variant`` when not
-            provided explicitly.
-        connector_routing: Optional list of routing fragment names
-            (``["linear", "slack", ...]``) to include from
-            ``prompts/routing/``.
-
-    Returns:
-        The fully composed system prompt string.
-    """
-    resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
-    visibility = thread_visibility or ChatVisibility.PRIVATE
-
-    if custom_system_instructions and custom_system_instructions.strip():
-        sys_block = custom_system_instructions.format(resolved_today=resolved_today)
-    elif use_default_system_instructions:
-        sys_block = _build_system_instructions(
-            visibility=visibility, resolved_today=resolved_today
-        )
-    else:
-        sys_block = ""
-
-    sys_block += _build_mcp_routing_block(mcp_connector_tools)
-
-    if provider_variant is None:
-        provider_variant = detect_provider_variant(model_name)
-    sys_block += _build_provider_block(provider_variant)
-    sys_block += _build_routing_block(connector_routing)
-
-    tools_block = _build_tools_section(
-        visibility=visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-    )
-    citation_block = _build_citation_block(citations_enabled)
-
-    return sys_block + tools_block + citation_block
-
-
-__all__ = [
-    "ALL_TOOL_NAMES_ORDERED",
-    "ProviderVariant",
-    "compose_system_prompt",
-    "detect_provider_variant",
-]
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/init.py
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/init.py
@ -1 +0,0 @@
-
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_image.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_image.md
@ -1,12 +0,0 @@
-
- User: "Generate an image of a cat"
-  - Call: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")`
-  - The generated image will automatically be displayed in the chat.
- User: "Draw me a logo for a coffee shop called Bean Dream"
-  - Call: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")`
-  - The generated image will automatically be displayed in the chat.
- User: "Show me this image: https://example.com/image.png"
-  - Simply include it in your response using markdown: `![Image](https://example.com/image.png)`
- User uploads an image file and asks: "What is this image about?"
-  - The user's uploaded image is already visible in the chat.
-  - Simply analyze the image content and respond directly.
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_podcast.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_podcast.md
@ -1,7 +0,0 @@
-
- User: "Give me a podcast about AI trends based on what we discussed"
-  - First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")`
- User: "Create a podcast summary of this conversation"
-  - Call: `generate_podcast(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")`
- User: "Make a podcast about quantum computing"
-  - First explore `/documents/` (ls/glob/grep/read_file), then: `generate_podcast(source_content="Key insights about quantum computing from retrieved files:\n\n[Comprehensive summary of findings]", podcast_title="Quantum Computing Explained")`
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_report.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_report.md
@ -1,13 +0,0 @@
-
- User: "Generate a report about AI trends"
-  - Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")`
-  - WHY: Has creation verb "generate" → call the tool. No prior discussion → use kb_search.
- User: "Write a research report from this conversation"
-  - Call: `generate_report(topic="Research Report", source_strategy="conversation", source_content="Complete conversation summary:\n\n...", report_style="deep_research")`
-  - WHY: Has creation verb "write" → call the tool. Conversation has the content → use source_strategy="conversation".
- User: (after a report on Climate Change was generated) "Add a section about carbon capture technologies"
-  - Call: `generate_report(topic="Climate Crisis: Causes, Impacts, and Solutions", source_strategy="conversation", source_content="[summary of conversation context if any]", parent_report_id=<previous_report_id>, user_instructions="Add a new section about carbon capture technologies")`
-  - WHY: Has modification verb "add" + specific deliverable target → call the tool with parent_report_id.
- User: (after a report was generated) "What else could we add to have more depth?"
-  - Do NOT call generate_report. Answer in chat with suggestions.
-  - WHY: No creation/modification verb directed at producing a deliverable.
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_resume.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_resume.md
@ -1,19 +0,0 @@
-
- User: "Build me a resume. I'm John Doe, engineer at Acme Corp..."
-  - Call: `generate_resume(user_info="John Doe, engineer at Acme Corp...", max_pages=1)`
-  - WHY: Has creation verb "build" + resume → call the tool.
- User: "Create my CV with this info: [experience, education, skills]"
-  - Call: `generate_resume(user_info="[experience, education, skills]", max_pages=1)`
- User: "Build me a resume" (and there is a resume/CV document in the conversation context)
-  - Extract the FULL content from the document in context, then call:
-    `generate_resume(user_info="Name: John Doe\nEmail: john@example.com\n\nExperience:\n- Senior Engineer at Acme Corp (2020-2024)\n  Led team of 5...\n\nEducation:\n- BS Computer Science, MIT (2016-2020)\n\nSkills: Python, TypeScript, AWS...", max_pages=1)`
-  - WHY: Document content is available in context — extract ALL of it into user_info. Do NOT ignore referenced documents.
- User: (after resume generated) "Change my title to Senior Engineer"
-  - Call: `generate_resume(user_info="", user_instructions="Change the job title to Senior Engineer", parent_report_id=<previous_report_id>, max_pages=1)`
-  - WHY: Modification verb "change" + refers to existing resume → set parent_report_id.
- User: (after resume generated) "Make this 2 pages and expand projects"
-  - Call: `generate_resume(user_info="", user_instructions="Expand projects and keep this to at most 2 pages", parent_report_id=<previous_report_id>, max_pages=2)`
-  - WHY: Explicit page increase request → set max_pages to 2.
- User: "How should I structure my resume?"
-  - Do NOT call generate_resume. Answer in chat with advice.
-  - WHY: No creation/modification verb.
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_video_presentation.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/generate_video_presentation.md
@ -1,7 +0,0 @@
-
- User: "Give me a presentation about AI trends based on what we discussed"
-  - First search for relevant content, then call: `generate_video_presentation(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", video_title="AI Trends Presentation")`
- User: "Create slides summarizing this conversation"
-  - Call: `generate_video_presentation(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", video_title="Conversation Summary")`
- User: "Make a video presentation about quantum computing"
-  - First explore `/documents/` (ls/glob/grep/read_file), then: `generate_video_presentation(source_content="Key insights about quantum computing from retrieved files:\n\n[Comprehensive summary of findings]", video_title="Quantum Computing Explained")`
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/scrape_webpage.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/scrape_webpage.md
@ -1,13 +0,0 @@
-
- User: "Check out https://dev.to/some-article"
-  - Call: `scrape_webpage(url="https://dev.to/some-article")`
-  - Respond with a structured analysis — key points, takeaways.
- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
-  - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
-  - Respond with a thorough summary using headings and bullet points.
- User: (after discussing https://example.com/stats) "Can you get the live data from that page?"
-  - Call: `scrape_webpage(url="https://example.com/stats")`
-  - IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
- User: "https://example.com/blog/weekend-recipes"
-  - Call: `scrape_webpage(url="https://example.com/blog/weekend-recipes")`
-  - When a user sends just a URL with no instructions, scrape it and provide a concise summary of the content.
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_private.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_private.md
@ -1,16 +0,0 @@
-
- <user_name>Alex</user_name>, <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
-  - The user casually shared a durable fact:
-    update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n")
- User: "Remember that I prefer concise answers over detailed explanations"
-  - Durable preference. Merge with existing memory:
-    update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n")
- User: "I actually moved to Tokyo last month"
-  - Updated fact, date prefix reflects when recorded:
-    update_memory(updated_memory="## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...")
- User: "I'm a freelance photographer working on a nature documentary"
-  - Durable background info under a fitting heading:
-    update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n")
- User: "Always respond in bullet points"
-  - Standing instruction:
-    update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n")
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_team.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/update_memory_team.md
@ -1,7 +0,0 @@
-
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
-  - Durable team decision:
-    update_memory(updated_memory="## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...")
- User: "Our office is in downtown Seattle, 5th floor"
-  - Durable team fact:
-    update_memory(updated_memory="## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...")
--- a/surfsense_backend/app/prompts/system_prompt_composer/examples/web_search.md
+++ b/surfsense_backend/app/prompts/system_prompt_composer/examples/web_search.md
@ -1,8 +0,0 @@
-
- User: "What's the current USD to INR exchange rate?"
-  - Call: `web_search(query="current USD to INR exchange rate")`
-  - Then answer using the returned web results with citations.
- User: "What's the latest news about AI?"
-  - Call: `web_search(query="latest AI news today")`
- User: "What's the weather in New York?"
-  - Call: `web_search(query="weather New York today")`
--- a/surfsense_backend/app/prompts/system_prompt_composer/providers/init.py
+++ b/surfsense_backend/app/prompts/system_prompt_composer/providers/init.py
@ -1 +0,0 @@
-
--- a/Show more
+++ b/Show more