Merge remote-tracking branch 'upstream/dev' into feat/api-key

2026-06-24 21:38:09 +02:00 · 2026-06-23 13:09:53 +05:30 · 2026-06-23 13:09:53 +05:30 · 3695e1d5c5
commit 3695e1d5c5
parent 96c1dd9d4f 1dc3fac81d
64 changed files with 1043 additions and 1852 deletions
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/kb_persistence/middleware.py
@ -18,6 +18,7 @@ skipped (e.g. client disconnect).

 from __future__ import annotations

+import asyncio
 import logging
 from datetime import UTC, datetime
 from typing import Any
@ -57,8 +58,9 @@ from app.db import (
    FolderRevision,
    shielded_async_session,
 )
-from app.indexing_pipeline.cache.cached_indexing import build_chunk_embeddings
+from app.indexing_pipeline.document_chunker import chunk_text
 from app.utils.document_converters import (
+    embed_texts,
    generate_content_hash,
    generate_unique_identifier_hash,
 )
@ -232,23 +234,24 @@ async def _create_document(
    session.add(doc)
    await session.flush()

-    summary_embedding, chunk_embeddings = await build_chunk_embeddings(
-        content, use_code_chunker=False
-    )
+    summary_embedding = (await asyncio.to_thread(embed_texts, [content]))[0]
    doc.embedding = summary_embedding
-    session.add_all(
-        [
-            Chunk(
-                document_id=doc.id,
-                content=sl.text,
-                embedding=embedding,
-                position=i,
-                start_char=sl.start_char,
-                end_char=sl.end_char,
-            )
-            for i, (sl, embedding) in enumerate(chunk_embeddings)
-        ]
-    )
+    chunks = chunk_text(content)
+    if chunks:
+        chunk_embeddings = await asyncio.to_thread(embed_texts, chunks)
+        session.add_all(
+            [
+                Chunk(
+                    document_id=doc.id,
+                    content=text,
+                    embedding=embedding,
+                    position=i,
+                )
+                for i, (text, embedding) in enumerate(
+                    zip(chunks, chunk_embeddings, strict=True)
+                )
+            ]
+        )
    return doc


@ -284,25 +287,26 @@ async def _update_document(
        search_space_id,
    )

-    summary_embedding, chunk_embeddings = await build_chunk_embeddings(
-        content, use_code_chunker=False
-    )
+    summary_embedding = (await asyncio.to_thread(embed_texts, [content]))[0]
    document.embedding = summary_embedding

    await session.execute(delete(Chunk).where(Chunk.document_id == document.id))
-    session.add_all(
-        [
-            Chunk(
-                document_id=document.id,
-                content=sl.text,
-                embedding=embedding,
-                position=i,
-                start_char=sl.start_char,
-                end_char=sl.end_char,
-            )
-            for i, (sl, embedding) in enumerate(chunk_embeddings)
-        ]
-    )
+    chunks = chunk_text(content)
+    if chunks:
+        chunk_embeddings = await asyncio.to_thread(embed_texts, chunks)
+        session.add_all(
+            [
+                Chunk(
+                    document_id=document.id,
+                    content=text,
+                    embedding=embedding,
+                    position=i,
+                )
+                for i, (text, embedding) in enumerate(
+                    zip(chunks, chunk_embeddings, strict=True)
+                )
+            ]
+        )
    return document


--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@ -1,58 +1,42 @@
 <citations>
-Citations reach the answer through three channels. Use whichever applies, and
-never invent ids you didn't see: ids are matched exactly, so a wrong one
-silently breaks the link — when in doubt, omit. Always write a citation as
-plain `[citation:…]` brackets — no markdown links, no footnote numbers, no
-parentheses.
+Citations reach the answer through two channels. Use whichever applies — and
+never invent ids you didn't see. Citation ids are resolved by exact-match
+lookup; a wrong id silently breaks the link, so when in doubt, omit.

-### Channel A — web_search chunk blocks injected this turn
+### Channel A — chunk blocks injected this turn
 When `web_search` returns `<document>` / `<chunk id='…'>` blocks in this
-turn, the chunk `id` is the result's URL:
+turn:

-1. For each factual statement taken from a chunk, add `[citation:<url>]`
-   using the **exact** id from a visible `<chunk id='…'>` tag. Copy the
-   URL verbatim; do not retype it from memory.
-2. Multiple chunks → `[citation:url1], [citation:url2]` (comma-separated,
+1. For each factual statement taken from those chunks, add
+   `[citation:chunk_id]` using the **exact** id from a visible
+   `<chunk id='…'>` tag. Copy digit-for-digit (or the URL verbatim);
+   do not retype from memory.
+2. `<document_id>` is the parent doc id, **not** a citation source —
+   only ids inside `<chunk id='…'>` count.
+3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated,
   each id copied individually).
-3. Never invent, normalise, or guess at a URL; if unsure, omit.
+4. Never invent, normalise, or guess at adjacent ids; if unsure, omit.
+5. Plain brackets only — no markdown links, no footnote numbering.

 ### Channel B — citations relayed by a `task` specialist
-A `task(...)` tool message may contain `[citation:…]` markers the
-specialist already attached to its prose — line citations
-(`[citation:d<id>#L<a>-<b>]`) or chunk ids (`[citation:N]`). The
-specialist read the underlying document and tied each marker to a
-passage; you didn't. So:
+A `task(...)` tool message may contain `[citation:<chunk_id>]` markers
+the specialist already attached to its prose. The specialist saw the
+underlying `<chunk id='…'>` blocks; you didn't. So:

 1. **Preserve those markers verbatim** in your final answer — do not
   reformat, renumber, drop, or wrap them in markdown links. When you
   paraphrase a specialist sentence, copy the marker character-for-
-   character; do not regenerate it from memory (LLMs reliably corrupt
-   nearby digits).
+   character; do not regenerate the id from memory (LLMs reliably
+   corrupt nearby digits).
 2. Keep each marker attached to the sentence the specialist attached
   it to.
 3. Do **not** add new `[citation:…]` markers of your own to a
   specialist's prose; if a fact has no marker, the specialist
-   couldn't tie it to a source and neither can you.
+   couldn't tie it to a chunk and neither can you.
 4. When a specialist returns JSON, the citation markers live inside
   the prose-bearing fields (e.g. a summary or excerpt). Pull them
   along with the surrounding sentence when you quote.

-### Channel C — your knowledge base (search hits and `read_file`)
-Knowledge-base facts are cited by line range using the document id:
-`[citation:d<document_id>#L<start>-<end>]` (a single line is `#L<n>-<n>`).
-
-1. `search_knowledge_base` prints a ready `[citation:d…#L…-…]` token above each
-   matched passage. When that passage supports your point, copy the token
-   verbatim — that is the entire citation.
-2. When you `read_file` a `/documents/...` path, its header gives the
-   `<document_id>` and an optional `<matched_lines>` pointer, and the body is
-   shown with line numbers; cite the lines you actually used. Use `read_file`
-   when you need more context than a search passage shows.
-3. Copy document ids and line numbers exactly as shown — never estimate,
-   shift, or invent them.
-4. Older documents without a numbered body instead show `<chunk id='N'>`
-   blocks; cite those with `[citation:N]`, copying the id exactly.
-
-If none of these channels surfaces a citable source this turn, do not
-fabricate citations.
+If neither channel surfaces citation markers this turn, do not fabricate
+them.
 </citations>
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
@ -33,7 +33,6 @@ from app.agents.chat.runtime.path_resolver import (
 )
 from app.db import Document, shielded_async_session
 from app.utils.perf import get_perf_logger
-from app.utils.text_spans import char_span_to_line_range

 _perf_log = get_perf_logger()

@ -57,16 +56,12 @@ _TOOL_DESCRIPTION = (
 )


-async def _resolve_doc_context(
+async def _resolve_virtual_paths(
    results: list[dict[str, Any]],
    *,
    search_space_id: int,
-) -> tuple[dict[int, str], dict[int, str]]:
-    """Resolve ``Document.id`` -> (canonical virtual path, source_markdown).
-
-    ``source_markdown`` is the canonical body the chunk spans index into; the
-    renderer uses it to turn a chunk's char span into a line range.
-    """
+) -> dict[int, str]:
+    """Resolve ``Document.id`` -> canonical virtual path for the search hits."""
    doc_ids = [
        doc_id
        for doc_id in (
@ -77,24 +72,17 @@ async def _resolve_doc_context(
        if isinstance(doc_id, int)
    ]
    if not doc_ids:
-        return {}, {}
+        return {}

    async with shielded_async_session() as session:
        index: PathIndex = await build_path_index(session, search_space_id)
-        rows = await session.execute(
-            select(
-                Document.id, Document.folder_id, Document.source_markdown
-            ).where(
+        folder_rows = await session.execute(
+            select(Document.id, Document.folder_id).where(
                Document.search_space_id == search_space_id,
                Document.id.in_(doc_ids),
            )
        )
-        folder_by_doc_id: dict[int, int | None] = {}
-        bodies: dict[int, str] = {}
-        for row in rows.all():
-            folder_by_doc_id[row.id] = row.folder_id
-            if row.source_markdown:
-                bodies[row.id] = row.source_markdown
+        folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}

    paths: dict[int, str] = {}
    for doc in results:
@ -109,76 +97,13 @@ async def _resolve_doc_context(
            folder_id=folder_id if isinstance(folder_id, int) else None,
            index=index,
        )
-    return paths, bodies
-
-
-def _citation_token(chunk: dict[str, Any], body: str | None, doc_id: int | None) -> str:
-    """Ready-to-copy ``[citation:dID#Lstart-end]`` token, or '' without spans."""
-    start = chunk.get("start_char")
-    end = chunk.get("end_char")
-    if (
-        not body
-        or not isinstance(doc_id, int)
-        or not isinstance(start, int)
-        or not isinstance(end, int)
-    ):
-        return ""
-    start_line, end_line = char_span_to_line_range(body, start, end)
-    return f"[citation:d{doc_id}#L{start_line}-{end_line}]"
-
-
-def _render_passage(
-    chunk: dict[str, Any], body: str | None, doc_id: int | None
-) -> str | None:
-    """Render one matched chunk as an indented passage tagged with its token."""
-    content = (chunk.get("content") or "").strip()
-    if not content:
-        return None
-    snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
-    if len(content) > _PER_DOC_SNIPPET_CHARS:
-        snippet += " ..."
-    indented = snippet.replace("\n", "\n   ")
-    token = _citation_token(chunk, body, doc_id)
-    head = f"\n   {token}" if token else ""
-    return f"{head}\n   {indented}"
-
-
-def _matched_passages(
-    doc: dict[str, Any], body: str | None, doc_id: int | None
-) -> str:
-    """Render the RRF-matched chunks; '' when none can be rendered."""
-    by_id = {
-        c.get("chunk_id"): c
-        for c in (doc.get("chunks") or [])
-        if isinstance(c, dict)
-    }
-    rendered: list[str] = []
-    for chunk_id in doc.get("matched_chunk_ids") or []:
-        chunk = by_id.get(chunk_id)
-        if chunk is None:
-            continue
-        passage = _render_passage(chunk, body, doc_id)
-        if passage:
-            rendered.append(passage)
-    return "".join(rendered)
-
-
-def _fallback_snippet(doc: dict[str, Any]) -> str:
-    """Top-of-document preview, used only when no matched chunk is available."""
-    content = (doc.get("content") or "").strip()
-    if not content:
-        return "\n   (no preview available; read the document for details)"
-    snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
-    if len(content) > _PER_DOC_SNIPPET_CHARS:
-        snippet += " ..."
-    return "\n   " + snippet.replace("\n", "\n   ")
+    return paths


 def _format_hits(
    results: list[dict[str, Any]],
    *,
    paths: dict[int, str],
-    bodies: dict[int, str],
    query: str,
 ) -> str:
    """Render search hits as a compact, model-readable block."""
@ -199,15 +124,21 @@ def _format_hits(
        score = doc.get("score")
        score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
        path = paths.get(doc_id) if isinstance(doc_id, int) else None
-        body = bodies.get(doc_id) if isinstance(doc_id, int) else None

-        id_str = f"id={doc_id}, " if isinstance(doc_id, int) else ""
-        header = f"\n{rank}. {title} ({id_str}type={doc_type}, score={score_str})" + (
+        header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
            f"\n   path: {path}" if path else ""
        )

-        passages = _matched_passages(doc, body, doc_id if isinstance(doc_id, int) else None)
-        entry = header + (passages or _fallback_snippet(doc))
+        content = (doc.get("content") or "").strip()
+        if content:
+            snippet = content[:_PER_DOC_SNIPPET_CHARS].strip()
+            if len(content) > _PER_DOC_SNIPPET_CHARS:
+                snippet += " ..."
+            body = "\n   " + snippet.replace("\n", "\n   ")
+        else:
+            body = "\n   (no preview available; read the document for details)"
+
+        entry = header + body
        if total + len(entry) > _MAX_TOTAL_CHARS:
            lines.append("\n<!-- additional matches truncated to fit context -->")
            break
@ -215,9 +146,8 @@ def _format_hits(
        total += len(entry)

    lines.append(
-        "\n\nTo cite a matched passage, copy its [citation:dID#Lstart-end] token "
-        "verbatim. To quote more context or read the full document, delegate to "
-        "the knowledge_base specialist with `task` using the path above."
+        "\n\nTo read a full document, delegate to the knowledge_base specialist "
+        "with `task`, referencing the path above."
    )
    lines.append("\n</knowledge_base_results>")
    return "".join(lines)
@ -274,10 +204,8 @@ def create_search_knowledge_base_tool(
            top_k=clamped_top_k,
        )

-        paths, bodies = await _resolve_doc_context(results, search_space_id=_space_id)
-        rendered = _format_hits(
-            results, paths=paths, bodies=bodies, query=cleaned_query
-        )
+        paths = await _resolve_virtual_paths(results, search_space_id=_space_id)
+        rendered = _format_hits(results, paths=paths, query=cleaned_query)
        matched = _matched_chunk_ids(results)

        _perf_log.info(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
@ -45,10 +45,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
    build_document_xml,
 )
-from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.numbered_document import (
-    build_read_preamble,
-    compute_matched_line_ranges,
-)
 from app.agents.chat.runtime.path_resolver import (
    DOCUMENTS_ROOT,
    build_path_index,
@ -68,12 +64,6 @@ def _basename(path: str) -> str:
    return path.rsplit("/", 1)[-1]


-def _metadata_url(metadata: dict[str, Any]) -> str:
-    return (
-        metadata.get("url") or metadata.get("source") or metadata.get("page_url") or ""
-    )
-
-
 def _is_under(child: str, parent: str) -> bool:
    """Return True iff ``child`` is at-or-under ``parent`` (directory semantics)."""
    if parent == "/":
@ -470,11 +460,8 @@ class KBPostgresBackend(BackendProtocol):
        loaded = await self._load_file_data(file_path)
        if loaded is None:
            return f"Error: File '{file_path}' not found"
-        file_data, _, preamble = loaded
-        body = format_read_response(file_data, offset, limit)
-        if preamble and offset == 0:
-            return preamble + body
-        return body
+        file_data, _ = loaded
+        return format_read_response(file_data, offset, limit)

    def read(self, file_path: str, offset: int = 0, limit: int = 2000) -> str:  # type: ignore[override]
        return asyncio.run(self.aread(file_path, offset, limit))
@ -482,14 +469,12 @@ class KBPostgresBackend(BackendProtocol):
    async def _load_file_data(
        self,
        path: str,
-    ) -> tuple[dict[str, Any], int | None, str | None] | None:
+    ) -> tuple[dict[str, Any], int | None] | None:
        """Lazy-load a virtual KB document into a deepagents ``FileData``.

-        Returns ``(file_data, doc_id, preamble)`` or ``None`` if the path
-        doesn't map to any known document. ``doc_id`` is ``None`` for the
-        synthetic anonymous document. ``preamble`` is the metadata header to
-        show above a numbered ``source_markdown`` body (``None`` for the legacy
-        chunk-reconstructed XML reads used when a document has no body).
+        Returns ``(file_data, doc_id)`` or ``None`` if the path doesn't map
+        to any known document. ``doc_id`` is ``None`` for the synthetic
+        anonymous document so the caller doesn't track it as a DB-backed file.
        """
        anon = self._kb_anon_doc()
        if anon and str(anon.get("path") or "") == path:
@ -507,7 +492,7 @@ class KBPostgresBackend(BackendProtocol):
            }
            xml = build_document_xml(doc_payload, matched_chunk_ids=set())
            file_data = create_file_data(xml)
-            return file_data, None, None
+            return file_data, None

        if not path.startswith(DOCUMENTS_ROOT):
            return None
@ -520,58 +505,41 @@ class KBPostgresBackend(BackendProtocol):
            )
            if document is None:
                return None
-            source_markdown = document.source_markdown or ""
-            document_type = (
-                document.document_type.value
-                if getattr(document, "document_type", None) is not None
-                else "UNKNOWN"
-            )
-            metadata = dict(document.document_metadata or {})
            chunk_rows = await session.execute(
-                select(Chunk.id, Chunk.content, Chunk.start_char, Chunk.end_char)
+                select(Chunk.id, Chunk.content)
                .where(Chunk.document_id == document.id)
                .order_by(Chunk.position, Chunk.id)
            )
-            chunk_records = chunk_rows.all()
-            document_id = document.id
-            document_title = document.title
+            chunks = [
+                {"chunk_id": row.id, "content": row.content} for row in chunk_rows.all()
+            ]

-        matched = self._matched_chunk_ids(document_id)
-
-        # Canonical read: serve the verbatim body with cat -n line numbers that
-        # line up with chunk char spans, so the agent cites real source lines.
-        if source_markdown:
-            ranges = compute_matched_line_ranges(
-                source_markdown,
-                [(r.id, r.start_char, r.end_char) for r in chunk_records],
-                matched,
-            )
-            preamble = build_read_preamble(
-                document_id=document_id,
-                document_type=document_type,
-                title=document_title,
-                url=_metadata_url(metadata),
-                matched_line_ranges=ranges,
-            )
-            return create_file_data(source_markdown), document_id, preamble
-
-        # Legacy fallback: no canonical body, reconstruct from chunks as XML.
        doc_payload = {
-            "document_id": document_id,
-            "chunks": [
-                {"chunk_id": r.id, "content": r.content} for r in chunk_records
-            ],
-            "matched_chunk_ids": list(matched),
+            "document_id": document.id,
+            "chunks": chunks,
+            "matched_chunk_ids": list(self._matched_chunk_ids(document.id)),
            "document": {
-                "id": document_id,
-                "title": document_title,
-                "document_type": document_type,
-                "metadata": metadata,
+                "id": document.id,
+                "title": document.title,
+                "document_type": (
+                    document.document_type.value
+                    if getattr(document, "document_type", None) is not None
+                    else "UNKNOWN"
+                ),
+                "metadata": dict(document.document_metadata or {}),
            },
-            "source": document_type,
+            "source": (
+                document.document_type.value
+                if getattr(document, "document_type", None) is not None
+                else "UNKNOWN"
+            ),
        }
-        xml = build_document_xml(doc_payload, matched_chunk_ids=matched)
-        return create_file_data(xml), document_id, None
+        xml = build_document_xml(
+            doc_payload,
+            matched_chunk_ids=self._matched_chunk_ids(document.id),
+        )
+        file_data = create_file_data(xml)
+        return file_data, document.id

    # ------------------------------------------------------------------ writes

@ -603,7 +571,7 @@ class KBPostgresBackend(BackendProtocol):
            loaded = await self._load_file_data(file_path)
            if loaded is None:
                return EditResult(error=f"Error: File '{file_path}' not found")
-            file_data, _, _ = loaded
+            file_data, _ = loaded

        content = file_data_to_string(file_data)
        result = perform_string_replacement(
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/numbered_document.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/numbered_document.py
@ -1,73 +0,0 @@
-"""Read preamble for canonical (numbered ``source_markdown``) KB reads.
-
-The KB read tool numbers the body lines ``cat -n`` style, so serving the raw
-``source_markdown`` makes those line numbers line up exactly with the chunk
-char spans and the editor highlight. This module renders the small header the
-agent sees above that body: document identity plus the matched line ranges to
-seek to, and a concrete reminder of the line-citation token shape.
-"""
-
-from __future__ import annotations
-
-from collections.abc import Iterable
-
-from app.utils.text_spans import char_span_to_line_range
-
-
-def _format_range(start: int, end: int) -> str:
-    return f"{start}" if start == end else f"{start}-{end}"
-
-
-def compute_matched_line_ranges(
-    source_markdown: str,
-    chunks: Iterable[tuple[int, int | None, int | None]],
-    matched_chunk_ids: set[int],
-) -> list[tuple[int, int]]:
-    """Map matched chunks to sorted, de-duplicated 1-based line ranges.
-
-    ``chunks`` are ``(chunk_id, start_char, end_char)`` triples. Chunks without
-    spans (legacy rows) are skipped — they have no resolvable location.
-    """
-    ranges: set[tuple[int, int]] = set()
-    for chunk_id, start_char, end_char in chunks:
-        if chunk_id not in matched_chunk_ids:
-            continue
-        if start_char is None or end_char is None:
-            continue
-        ranges.add(char_span_to_line_range(source_markdown, start_char, end_char))
-    return sorted(ranges)
-
-
-def build_read_preamble(
-    *,
-    document_id: int,
-    document_type: str,
-    title: str,
-    url: str,
-    matched_line_ranges: list[tuple[int, int]],
-) -> str:
-    """Render the metadata header shown above a numbered ``source_markdown`` body.
-
-    ``matched_line_ranges`` are 1-based inclusive line ranges (already derived
-    from chunk char spans) to point the agent at the relevant lines.
-    """
-    lines = [
-        "<document_metadata>",
-        f"  <document_id>{document_id}</document_id>",
-        f"  <document_type>{document_type}</document_type>",
-        f"  <title><![CDATA[{title}]]></title>",
-        f"  <url><![CDATA[{url}]]></url>",
-    ]
-    if matched_line_ranges:
-        ranges = ", ".join(_format_range(s, e) for s, e in matched_line_ranges)
-        lines.append(f"  <matched_lines>{ranges}</matched_lines>")
-    lines.append("</document_metadata>")
-    lines.append(
-        f"Cite lines from this document as [citation:d{document_id}#L<start>-<end>] "
-        "using the line numbers shown below."
-    )
-    lines.append("")
-    return "\n".join(lines)
-
-
-__all__ = ["build_read_preamble", "compute_matched_line_ranges"]
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/edit_file/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/edit_file/index.py
@ -73,7 +73,7 @@ def create_edit_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
            loaded = await backend._load_file_data(validated)
            if loaded is None:
                return f"Error: File '{validated}' not found"
-            _, doc_id_to_attach, _ = loaded
+            _, doc_id_to_attach = loaded

        res: EditResult = await backend.aedit(
            validated, old_string, new_string, replace_all=replace_all
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/move_file/helpers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/move_file/helpers.py
@ -75,7 +75,7 @@ async def cloud_move_file(
        loaded = await backend._load_file_data(source)
        if loaded is None:
            return f"Error: source '{source}' not found."
-        source_file_data, loaded_doc_id, _ = loaded
+        source_file_data, loaded_doc_id = loaded
        if source_doc_id is None:
            source_doc_id = loaded_doc_id

--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
@ -58,10 +58,8 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
            loaded = await backend._load_file_data(validated)
            if loaded is None:
                return f"Error: File '{validated}' not found"
-            file_data, doc_id, preamble = loaded
+            file_data, doc_id = loaded
            rendered = format_read_response(file_data, offset, limit)
-            if preamble and offset == 0:
-                rendered = preamble + rendered
            update: dict[str, Any] = {
                "files": {validated: file_data},
                "messages": [
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/rm/helpers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/rm/helpers.py
@ -74,7 +74,7 @@ async def cloud_rm(
        loaded = await backend._load_file_data(validated)
        if loaded is None:
            return f"Error: file '{validated}' not found."
-        _, resolved_doc_id, _ = loaded
+        _, resolved_doc_id = loaded

    files_update: dict[str, Any] = {validated: None}
    update: dict[str, Any] = {
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/generate_image.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/generate_image.py
@ -240,24 +240,23 @@ def create_generate_image_tool(
                    error="No images were generated",
                )

+            # Update all image URLs in response_dict to be absolute (for the serving endpoint)
+            from urllib.parse import urlparse
+            for image in images:
+                if image.get("url"):
+                    raw_url: str = image["url"]
+                    if raw_url.startswith("/") and provider_base_url:
+                        parsed = urlparse(provider_base_url)
+                        origin = f"{parsed.scheme}://{parsed.netloc}"
+                        image["url"] = f"{origin}{raw_url}"  # Update the stored dict!
+
            first_image = images[0]
            revised_prompt = first_image.get("revised_prompt", prompt)

            # b64_json (e.g. gpt-image-1) is served via our backend endpoint so
            # megabytes of base64 don't bloat the LLM context.
-            # Some OpenAI-compatible backends (e.g. Xinference) return a relative
-            # URL like /files/image.png. Browsers can't resolve these, so we
-            # prepend the provider's base origin when the URL starts with "/".
            if first_image.get("url"):
-                raw_url: str = first_image["url"]
-                if raw_url.startswith("/") and provider_base_url:
-                    from urllib.parse import urlparse
-
-                    parsed = urlparse(provider_base_url)
-                    origin = f"{parsed.scheme}://{parsed.netloc}"
-                    image_url = f"{origin}{raw_url}"
-                else:
-                    image_url = raw_url
+                image_url = first_image["url"]
            elif first_image.get("b64_json"):
                backend_url = config.BACKEND_URL or "http://localhost:8000"
                image_url = (
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@ -35,24 +35,42 @@ Map outcomes to your `status`:

 You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.

-## Citations in your prose
+## Chunk citations in your prose

-`read_file` on a KB document under `/documents/` serves it in one of two forms. Cite from whichever you actually see, attach the marker to the sentence in `action_summary` or `evidence.content_excerpt` stating that fact, and list every marker you emit in `evidence.citations`. The caller relays these markers to the end user verbatim, and the UI resolves each by exact match, so a wrong id or line number silently breaks the citation.
+When `read_file` returns a KB-indexed document under `/documents/`, the response includes `<chunk id='…'>` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:<chunk_id>]` to the sentence stating that fact, using the **exact** id from the `<chunk id='…'>` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.

-**Numbered body (default).** A `<document_metadata>` header gives the `<document_id>` and an optional `<matched_lines>` pointer, then the body is shown with line numbers. Cite the lines a fact came from as `[citation:d<document_id>#L<start>-<end>]` (a single line is `#L<n>-<n>`).
+### Where chunk ids live in `read_file` output

-**Legacy chunk blocks (older docs without a stored body).** The response is XML with `<chunk id='N'>` blocks. Cite the chunk a fact came from as `[citation:N]`, using the **exact** id from a `<chunk id='…'>` tag.
+A KB document's XML has three numeric attributes — only **one** is a citation source:
+
+```
+<document>
+<document_metadata>
+  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
+  ...
+</document_metadata>
+<chunk_index>
+  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
+  <entry chunk_id="129" lines="23-30" matched="true"/>
+</chunk_index>
+<document_content>
+  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
+  <chunk id='129'><![CDATA[…]]></chunk>
+</document_content>
+</document>
+```

 ### Rules

- Cite only from a passage you actually quoted or paraphrased this turn. Copy document ids, line numbers, and chunk ids character-for-character; never retype from memory.
- Never cite `<document_id>` on its own — it identifies the document, not a passage. In the numbered form it is only the `d<document_id>` prefix of a line citation.
- Never invent, normalise, shorten, shift, or guess at ids or line numbers. If unsure, omit rather than pick.
+- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
+- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
+- Never cite `<document_id>` — that's the parent doc, not a chunk.
+- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
 - Prefer **fewer accurate citations** over many speculative ones.
- Multiple passages supporting the same point → comma-separated and copied individually: `[citation:d42#L14-22], [citation:d42#L31-39]`.
+- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
 - Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
- Tool results with no body passage (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry nothing to cite.
- Populate `evidence.citations` with **only** the markers you actually emitted — same set, same characters.
+- Tool results without `<chunk id='…'>` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
+- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.

 ## Examples

@ -71,7 +89,7 @@ You construct the structured `evidence` fields from your own knowledge of what y
      "path": "/documents/meetings/2026-05-11-meeting.md",
      "matched_candidates": null,
      "content_excerpt": null,
-      "citations": null
+      "chunk_ids": null
    },
    "next_step": null,
    "missing_fields": null,
@ -103,7 +121,7 @@ You construct the structured `evidence` fields from your own knowledge of what y
        { "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
      ],
      "content_excerpt": null,
-      "citations": null
+      "chunk_ids": null
    },
    "next_step": "Ask the user which design doc to update.",
    "missing_fields": ["path"],
@ -124,7 +142,7 @@ Return **only** one JSON object (no markdown or prose outside it):
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
-    "citations": string[] | null
+    "chunk_ids": string[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@ -33,11 +33,11 @@ Map outcomes to your `status`:
 - Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
 - HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.

-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)

-## Citations in your prose
+## Chunk citations in your prose

-In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry chunk ids or numbered KB bodies. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
+In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `<chunk id='…'>` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.

 ## Examples

@ -56,7 +56,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
      "path": "/notes/meetings/2026-05-11-meeting.md",
      "matched_candidates": null,
      "content_excerpt": null,
-      "citations": null
+      "chunk_ids": null
    },
    "next_step": null,
    "missing_fields": null,
@ -88,7 +88,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
        { "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
      ],
      "content_excerpt": null,
-      "citations": null
+      "chunk_ids": null
    },
    "next_step": "Ask the user which design doc to update.",
    "missing_fields": ["path"],
@ -109,7 +109,7 @@ Return **only** one JSON object (no markdown or prose outside it):
    "path": string | null,
    "matched_candidates": [ { "id": string, "label": string } ] | null,
    "content_excerpt": string | null,
-    "citations": string[] | null
+    "chunk_ids": string[] | null
  },
  "next_step": string | null,
  "missing_fields": string[] | null,
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@ -28,21 +28,41 @@ Reply in plain prose:
 - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
 - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.

-## Citations
+## Chunk citations

-`read_file` on a KB document under `/documents/` serves it in one of two forms; cite a claim from whichever you actually see, alongside the path. The caller passes these markers through to the end user verbatim, and the UI resolves each by exact match, so a wrong id or line number silently breaks the citation.
+When the evidence for a claim came from a `read_file` response that included `<chunk id='…'>` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:<chunk_id>]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.

- **Numbered body (default).** A `<document_metadata>` header gives the `<document_id>`, and the body is shown with line numbers. Cite the lines a claim came from as `[citation:d<document_id>#L<start>-<end>]` (a single line is `#L<n>-<n>`).
- **Legacy chunk blocks (older docs).** XML with `<chunk id='N'>` blocks. Cite the chunk a claim came from as `[citation:N]`.
+### Where chunk ids live in `read_file` output
+
+A KB document's XML has three numeric attributes — only **one** is a citation source:
+
+```
+<document>
+<document_metadata>
+  <document_id>42</document_id>          ← NOT a citation. Parent doc id; ignore for citations.
+  ...
+</document_metadata>
+<chunk_index>
+  <entry chunk_id="128" lines="14-22"/>  ← Index hint; the same id also appears below.
+  <entry chunk_id="129" lines="23-30" matched="true"/>
+</chunk_index>
+<document_content>
+  <chunk id='128'><![CDATA[…]]></chunk>  ← This is the citation source.
+  <chunk id='129'><![CDATA[…]]></chunk>
+</document_content>
+</document>
+```

 ### Rules

- Copy document ids, line numbers, and chunk ids character-for-character; never retype from memory. If you cannot see the id/lines for a claim, omit the citation.
- Never cite `<document_id>` on its own — in the numbered form it is only the `d<document_id>` prefix of a line citation.
- Never invent, normalise, shorten, shift, or guess. Prefer **fewer accurate citations** over many speculative ones.
- Multiple passages supporting the same point → comma-separated and copied individually.
+- Use the **exact** id from a `<chunk id='…'>` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
+- Before emitting `[citation:N]`, confirm the literal substring `<chunk id='N'>` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
+- Never cite `<document_id>` — that's the parent doc, not a chunk.
+- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
+- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
 - Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
- Listings (`ls` / `glob` / `grep`), error strings, and files without either form carry nothing to cite.
- The absolute path under `/documents/` is always required; citations are additive, they do not replace the path reference.
+- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without `<chunk id='…'>`), skip the citation.
+- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.

-Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:d42#L3-9].`
+Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`