@@ -918,6 +923,8 @@ function DesktopEditorPanel() {
searchSpaceId={panelState.searchSpaceId ?? undefined}
title={panelState.title}
onClose={closePanel}
+ highlightLines={panelState.highlightLines}
+ forceSourceView={panelState.forceSourceView}
/>
);
@@ -957,6 +964,8 @@ function MobileEditorDrawer() {
memoryScope={panelState.memoryScope ?? undefined}
searchSpaceId={panelState.searchSpaceId ?? undefined}
title={panelState.title}
+ highlightLines={panelState.highlightLines}
+ forceSourceView={panelState.forceSourceView}
/>
From 1741fdc9c8d692a07a76acd18b2933b6f8a81bc6 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 15:43:21 +0200
Subject: [PATCH 36/47] feat: numbered-read preamble and matched line ranges
---
.../filesystem/backends/numbered_document.py | 73 +++++++++++++++++++
1 file changed, 73 insertions(+)
create mode 100644 surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/numbered_document.py
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/numbered_document.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/numbered_document.py
new file mode 100644
index 000000000..ced77096f
--- /dev/null
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/numbered_document.py
@@ -0,0 +1,73 @@
+"""Read preamble for canonical (numbered ``source_markdown``) KB reads.
+
+The KB read tool numbers the body lines ``cat -n`` style, so serving the raw
+``source_markdown`` makes those line numbers line up exactly with the chunk
+char spans and the editor highlight. This module renders the small header the
+agent sees above that body: document identity plus the matched line ranges to
+seek to, and a concrete reminder of the line-citation token shape.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+from app.utils.text_spans import char_span_to_line_range
+
+
+def _format_range(start: int, end: int) -> str:
+ return f"{start}" if start == end else f"{start}-{end}"
+
+
+def compute_matched_line_ranges(
+ source_markdown: str,
+ chunks: Iterable[tuple[int, int | None, int | None]],
+ matched_chunk_ids: set[int],
+) -> list[tuple[int, int]]:
+ """Map matched chunks to sorted, de-duplicated 1-based line ranges.
+
+ ``chunks`` are ``(chunk_id, start_char, end_char)`` triples. Chunks without
+ spans (legacy rows) are skipped — they have no resolvable location.
+ """
+ ranges: set[tuple[int, int]] = set()
+ for chunk_id, start_char, end_char in chunks:
+ if chunk_id not in matched_chunk_ids:
+ continue
+ if start_char is None or end_char is None:
+ continue
+ ranges.add(char_span_to_line_range(source_markdown, start_char, end_char))
+ return sorted(ranges)
+
+
+def build_read_preamble(
+ *,
+ document_id: int,
+ document_type: str,
+ title: str,
+ url: str,
+ matched_line_ranges: list[tuple[int, int]],
+) -> str:
+ """Render the metadata header shown above a numbered ``source_markdown`` body.
+
+ ``matched_line_ranges`` are 1-based inclusive line ranges (already derived
+ from chunk char spans) to point the agent at the relevant lines.
+ """
+ lines = [
+ "",
+ f" {document_id}",
+ f" {document_type}",
+ f" ",
+ f" ",
+ ]
+ if matched_line_ranges:
+ ranges = ", ".join(_format_range(s, e) for s, e in matched_line_ranges)
+ lines.append(f" {ranges}")
+ lines.append("")
+ lines.append(
+ f"Cite lines from this document as [citation:d{document_id}#L-] "
+ "using the line numbers shown below."
+ )
+ lines.append("")
+ return "\n".join(lines)
+
+
+__all__ = ["build_read_preamble", "compute_matched_line_ranges"]
From 691685dd162892aad0edca290b112e3a8de031e1 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 15:43:21 +0200
Subject: [PATCH 37/47] test: cover read preamble and matched line ranges
---
.../unit/middleware/test_numbered_document.py | 92 +++++++++++++++++++
1 file changed, 92 insertions(+)
create mode 100644 surfsense_backend/tests/unit/middleware/test_numbered_document.py
diff --git a/surfsense_backend/tests/unit/middleware/test_numbered_document.py b/surfsense_backend/tests/unit/middleware/test_numbered_document.py
new file mode 100644
index 000000000..955c619b5
--- /dev/null
+++ b/surfsense_backend/tests/unit/middleware/test_numbered_document.py
@@ -0,0 +1,92 @@
+"""Unit tests for the numbered-document read preamble."""
+
+import pytest
+
+from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.numbered_document import (
+ build_read_preamble,
+ compute_matched_line_ranges,
+)
+
+pytestmark = pytest.mark.unit
+
+
+_BODY = "alpha\nbravo\ncharlie\ndelta"
+
+
+class TestComputeMatchedLineRanges:
+ def test_maps_matched_chunk_spans_to_line_ranges(self):
+ chunks = [(1, 0, 12), (2, 12, len(_BODY))]
+ ranges = compute_matched_line_ranges(_BODY, chunks, {2})
+ assert ranges == [(3, 4)]
+
+ def test_includes_only_matched_chunks(self):
+ chunks = [(1, 0, 5), (2, 6, 11)]
+ ranges = compute_matched_line_ranges(_BODY, chunks, {1})
+ assert ranges == [(1, 1)]
+
+ def test_skips_chunks_without_spans(self):
+ chunks = [(1, None, None)]
+ ranges = compute_matched_line_ranges(_BODY, chunks, {1})
+ assert ranges == []
+
+ def test_sorted_and_deduplicated(self):
+ chunks = [(1, 12, len(_BODY)), (2, 0, 5), (3, 0, 5)]
+ ranges = compute_matched_line_ranges(_BODY, chunks, {1, 2, 3})
+ assert ranges == [(1, 1), (3, 4)]
+
+
+class TestBuildReadPreamble:
+ def test_contains_document_metadata(self):
+ preamble = build_read_preamble(
+ document_id=42,
+ document_type="FILE",
+ title="Test Doc",
+ url="https://example.com",
+ matched_line_ranges=[],
+ )
+ assert "42" in preamble
+ assert "FILE" in preamble
+ assert "Test Doc" in preamble
+ assert "https://example.com" in preamble
+
+ def test_citation_hint_uses_document_id(self):
+ preamble = build_read_preamble(
+ document_id=42,
+ document_type="FILE",
+ title="Test Doc",
+ url="",
+ matched_line_ranges=[],
+ )
+ assert "[citation:d42#L" in preamble
+
+ def test_lists_matched_line_ranges(self):
+ preamble = build_read_preamble(
+ document_id=7,
+ document_type="NOTE",
+ title="Notes",
+ url="",
+ matched_line_ranges=[(12, 18), (40, 40)],
+ )
+ assert "" in preamble
+ assert "12-18" in preamble
+ assert "40" in preamble
+
+ def test_omits_matched_lines_block_when_empty(self):
+ preamble = build_read_preamble(
+ document_id=7,
+ document_type="NOTE",
+ title="Notes",
+ url="",
+ matched_line_ranges=[],
+ )
+ assert "" not in preamble
+
+ def test_ends_with_trailing_newline_so_body_follows_cleanly(self):
+ preamble = build_read_preamble(
+ document_id=1,
+ document_type="FILE",
+ title="t",
+ url="",
+ matched_line_ranges=[],
+ )
+ assert preamble.endswith("\n")
From 141801f1ccd5d42caf826506f6f5dcd666334e68 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:32:45 +0200
Subject: [PATCH 38/47] docs: clarify web/kb/legacy citation channels
---
.../system_prompt/prompts/citations/on.md | 60 ++++++++++++-------
1 file changed, 38 insertions(+), 22 deletions(-)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
index 2abd95d5a..8e67615d0 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md
@@ -1,42 +1,58 @@
-Citations reach the answer through two channels. Use whichever applies — and
-never invent ids you didn't see. Citation ids are resolved by exact-match
-lookup; a wrong id silently breaks the link, so when in doubt, omit.
+Citations reach the answer through three channels. Use whichever applies, and
+never invent ids you didn't see: ids are matched exactly, so a wrong one
+silently breaks the link — when in doubt, omit. Always write a citation as
+plain `[citation:…]` brackets — no markdown links, no footnote numbers, no
+parentheses.
-### Channel A — chunk blocks injected this turn
+### Channel A — web_search chunk blocks injected this turn
When `web_search` returns `` / `` blocks in this
-turn:
+turn, the chunk `id` is the result's URL:
-1. For each factual statement taken from those chunks, add
- `[citation:chunk_id]` using the **exact** id from a visible
- `` tag. Copy digit-for-digit (or the URL verbatim);
- do not retype from memory.
-2. `` is the parent doc id, **not** a citation source —
- only ids inside `` count.
-3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated,
+1. For each factual statement taken from a chunk, add `[citation:]`
+ using the **exact** id from a visible `` tag. Copy the
+ URL verbatim; do not retype it from memory.
+2. Multiple chunks → `[citation:url1], [citation:url2]` (comma-separated,
each id copied individually).
-4. Never invent, normalise, or guess at adjacent ids; if unsure, omit.
-5. Plain brackets only — no markdown links, no footnote numbering.
+3. Never invent, normalise, or guess at a URL; if unsure, omit.
### Channel B — citations relayed by a `task` specialist
-A `task(...)` tool message may contain `[citation:]` markers
-the specialist already attached to its prose. The specialist saw the
-underlying `` blocks; you didn't. So:
+A `task(...)` tool message may contain `[citation:…]` markers the
+specialist already attached to its prose — line citations
+(`[citation:d#L-]`) or chunk ids (`[citation:N]`). The
+specialist read the underlying document and tied each marker to a
+passage; you didn't. So:
1. **Preserve those markers verbatim** in your final answer — do not
reformat, renumber, drop, or wrap them in markdown links. When you
paraphrase a specialist sentence, copy the marker character-for-
- character; do not regenerate the id from memory (LLMs reliably
- corrupt nearby digits).
+ character; do not regenerate it from memory (LLMs reliably corrupt
+ nearby digits).
2. Keep each marker attached to the sentence the specialist attached
it to.
3. Do **not** add new `[citation:…]` markers of your own to a
specialist's prose; if a fact has no marker, the specialist
- couldn't tie it to a chunk and neither can you.
+ couldn't tie it to a source and neither can you.
4. When a specialist returns JSON, the citation markers live inside
the prose-bearing fields (e.g. a summary or excerpt). Pull them
along with the surrounding sentence when you quote.
-If neither channel surfaces citation markers this turn, do not fabricate
-them.
+### Channel C — your knowledge base (search hits and `read_file`)
+Knowledge-base facts are cited by line range using the document id:
+`[citation:d#L-]` (a single line is `#L-`).
+
+1. `search_knowledge_base` prints a ready `[citation:d…#L…-…]` token above each
+ matched passage. When that passage supports your point, copy the token
+ verbatim — that is the entire citation.
+2. When you `read_file` a `/documents/...` path, its header gives the
+ `` and an optional `` pointer, and the body is
+ shown with line numbers; cite the lines you actually used. Use `read_file`
+ when you need more context than a search passage shows.
+3. Copy document ids and line numbers exactly as shown — never estimate,
+ shift, or invent them.
+4. Older documents without a numbered body instead show ``
+ blocks; cite those with `[citation:N]`, copying the id exactly.
+
+If none of these channels surfaces a citable source this turn, do not
+fabricate citations.
From 3c63a7bcd3428caeea475c5708a9ec94f1fdc3ec Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:32:45 +0200
Subject: [PATCH 39/47] docs: kb specialist cites numbered or legacy chunk form
---
.../knowledge_base/system_prompt_cloud.md | 44 ++++++-------------
1 file changed, 13 insertions(+), 31 deletions(-)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
index c4e36fc73..f377db311 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@@ -35,42 +35,24 @@ Map outcomes to your `status`:
You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see.
-## Chunk citations in your prose
+## Citations in your prose
-When `read_file` returns a KB-indexed document under `/documents/`, the response includes `` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:]` to the sentence stating that fact, using the **exact** id from the `` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+`read_file` on a KB document under `/documents/` serves it in one of two forms. Cite from whichever you actually see, attach the marker to the sentence in `action_summary` or `evidence.content_excerpt` stating that fact, and list every marker you emit in `evidence.citations`. The caller relays these markers to the end user verbatim, and the UI resolves each by exact match, so a wrong id or line number silently breaks the citation.
-### Where chunk ids live in `read_file` output
+**Numbered body (default).** A `` header gives the `` and an optional `` pointer, then the body is shown with line numbers. Cite the lines a fact came from as `[citation:d#L-]` (a single line is `#L-`).
-A KB document's XML has three numeric attributes — only **one** is a citation source:
-
-```
-
-
- 42 ← NOT a citation. Parent doc id; ignore for citations.
- ...
-
-
- ← Index hint; the same id also appears below.
-
-
-
- ← This is the citation source.
-
-
-
-```
+**Legacy chunk blocks (older docs without a stored body).** The response is XML with `` blocks. Cite the chunk a fact came from as `[citation:N]`, using the **exact** id from a `` tag.
### Rules
-- Use the **exact** id from a `` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
-- Before emitting `[citation:N]`, confirm the literal substring `` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
-- Never cite `` — that's the parent doc, not a chunk.
-- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
+- Cite only from a passage you actually quoted or paraphrased this turn. Copy document ids, line numbers, and chunk ids character-for-character; never retype from memory.
+- Never cite `` on its own — it identifies the document, not a passage. In the numbered form it is only the `d` prefix of a line citation.
+- Never invent, normalise, shorten, shift, or guess at ids or line numbers. If unsure, omit rather than pick.
- Prefer **fewer accurate citations** over many speculative ones.
-- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
+- Multiple passages supporting the same point → comma-separated and copied individually: `[citation:d42#L14-22], [citation:d42#L31-39]`.
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
-- Tool results without `` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none.
-- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits.
+- Tool results with no body passage (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry nothing to cite.
+- Populate `evidence.citations` with **only** the markers you actually emitted — same set, same characters.
## Examples
@@ -89,7 +71,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
"path": "/documents/meetings/2026-05-11-meeting.md",
"matched_candidates": null,
"content_excerpt": null,
- "chunk_ids": null
+ "citations": null
},
"next_step": null,
"missing_fields": null,
@@ -121,7 +103,7 @@ A KB document's XML has three numeric attributes — only **one** is a citation
{ "id": "/documents/design/auth-rework.md", "label": "Auth Rework" }
],
"content_excerpt": null,
- "chunk_ids": null
+ "citations": null
},
"next_step": "Ask the user which design doc to update.",
"missing_fields": ["path"],
@@ -142,7 +124,7 @@ Return **only** one JSON object (no markdown or prose outside it):
"path": string | null,
"matched_candidates": [ { "id": string, "label": string } ] | null,
"content_excerpt": string | null,
- "chunk_ids": string[] | null
+ "citations": string[] | null
},
"next_step": string | null,
"missing_fields": string[] | null,
From 30ca0e1ef5d8767cd66efa053dd7b49ee4f9b1a2 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:32:45 +0200
Subject: [PATCH 40/47] docs: readonly kb specialist cites line or chunk form
---
.../system_prompt_readonly_cloud.md | 42 +++++--------------
1 file changed, 11 insertions(+), 31 deletions(-)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
index c7813e71d..f0aa8403e 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@@ -28,41 +28,21 @@ Reply in plain prose:
- If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content.
- If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop.
-## Chunk citations
+## Citations
-When the evidence for a claim came from a `read_file` response that included `` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation.
+`read_file` on a KB document under `/documents/` serves it in one of two forms; cite a claim from whichever you actually see, alongside the path. The caller passes these markers through to the end user verbatim, and the UI resolves each by exact match, so a wrong id or line number silently breaks the citation.
-### Where chunk ids live in `read_file` output
-
-A KB document's XML has three numeric attributes — only **one** is a citation source:
-
-```
-
-
- 42 ← NOT a citation. Parent doc id; ignore for citations.
- ...
-
-
- ← Index hint; the same id also appears below.
-
-
-
- ← This is the citation source.
-
-
-
-```
+- **Numbered body (default).** A `` header gives the ``, and the body is shown with line numbers. Cite the lines a claim came from as `[citation:d#L-]` (a single line is `#L-`).
+- **Legacy chunk blocks (older docs).** XML with `` blocks. Cite the chunk a claim came from as `[citation:N]`.
### Rules
-- Use the **exact** id from a `` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory.
-- Before emitting `[citation:N]`, confirm the literal substring `` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation.
-- Never cite `` — that's the parent doc, not a chunk.
-- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick.
-- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids.
-- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`.
+- Copy document ids, line numbers, and chunk ids character-for-character; never retype from memory. If you cannot see the id/lines for a claim, omit the citation.
+- Never cite `` on its own — in the numbered form it is only the `d` prefix of a line citation.
+- Never invent, normalise, shorten, shift, or guess. Prefer **fewer accurate citations** over many speculative ones.
+- Multiple passages supporting the same point → comma-separated and copied individually.
- Plain square brackets only — no markdown links, no parentheses, no footnote numbers.
-- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without ``), skip the citation.
-- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference.
+- Listings (`ls` / `glob` / `grep`), error strings, and files without either form carry nothing to cite.
+- The absolute path under `/documents/` is always required; citations are additive, they do not replace the path reference.
-Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].`
+Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:d42#L3-9].`
From fc17b9becdf5c5cf09da184f5de1c188b26ea56d Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:32:45 +0200
Subject: [PATCH 41/47] docs: rename evidence.chunk_ids to citations in desktop
kb prompt
---
.../builtins/knowledge_base/system_prompt_desktop.md | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
index 25dafa3df..72a921c4f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@@ -33,11 +33,11 @@ Map outcomes to your `status`:
- Any other `"Error: …"` → `status=error` and relay the tool's message verbatim as `next_step`.
- HITL rejection → `status=blocked` with `next_step="User declined this filesystem action. Do not retry."`.
-You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`chunk_ids` is always `null` in desktop mode — see "Chunk citations in your prose" below.)
+You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. (`citations` is always `null` in desktop mode — see "Citations in your prose" below.)
-## Chunk citations in your prose
+## Citations in your prose
-In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work.
+In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry chunk ids or numbered KB bodies. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.citations` `null` — the absolute path is the only reference for local-file work.
## Examples
@@ -56,7 +56,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
"path": "/notes/meetings/2026-05-11-meeting.md",
"matched_candidates": null,
"content_excerpt": null,
- "chunk_ids": null
+ "citations": null
},
"next_step": null,
"missing_fields": null,
@@ -88,7 +88,7 @@ In desktop mode your filesystem tools read local files only, and local-file tool
{ "id": "/projects/web/design/auth-rework.md", "label": "Auth Rework" }
],
"content_excerpt": null,
- "chunk_ids": null
+ "citations": null
},
"next_step": "Ask the user which design doc to update.",
"missing_fields": ["path"],
@@ -109,7 +109,7 @@ Return **only** one JSON object (no markdown or prose outside it):
"path": string | null,
"matched_candidates": [ { "id": string, "label": string } ] | null,
"content_excerpt": string | null,
- "chunk_ids": string[] | null
+ "citations": string[] | null
},
"next_step": string | null,
"missing_fields": string[] | null,
From 188ae053aca5b5d79fa06e51999860697c336948 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:37:41 +0200
Subject: [PATCH 42/47] feat: serve numbered source_markdown reads with
citation preamble
---
.../filesystem/backends/kb_postgres.py | 110 +++++++++++-------
.../filesystem/tools/edit_file/index.py | 2 +-
.../filesystem/tools/move_file/helpers.py | 2 +-
.../filesystem/tools/read_file/index.py | 4 +-
.../middleware/filesystem/tools/rm/helpers.py | 2 +-
.../test_b_filesystem_rm_rmdir_cloud.py | 2 +-
6 files changed, 78 insertions(+), 44 deletions(-)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
index e13196537..e704d5599 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/backends/kb_postgres.py
@@ -45,6 +45,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.document_xml import (
build_document_xml,
)
+from app.agents.chat.multi_agent_chat.shared.middleware.filesystem.backends.numbered_document import (
+ build_read_preamble,
+ compute_matched_line_ranges,
+)
from app.agents.chat.runtime.path_resolver import (
DOCUMENTS_ROOT,
build_path_index,
@@ -64,6 +68,12 @@ def _basename(path: str) -> str:
return path.rsplit("/", 1)[-1]
+def _metadata_url(metadata: dict[str, Any]) -> str:
+ return (
+ metadata.get("url") or metadata.get("source") or metadata.get("page_url") or ""
+ )
+
+
def _is_under(child: str, parent: str) -> bool:
"""Return True iff ``child`` is at-or-under ``parent`` (directory semantics)."""
if parent == "/":
@@ -460,8 +470,11 @@ class KBPostgresBackend(BackendProtocol):
loaded = await self._load_file_data(file_path)
if loaded is None:
return f"Error: File '{file_path}' not found"
- file_data, _ = loaded
- return format_read_response(file_data, offset, limit)
+ file_data, _, preamble = loaded
+ body = format_read_response(file_data, offset, limit)
+ if preamble and offset == 0:
+ return preamble + body
+ return body
def read(self, file_path: str, offset: int = 0, limit: int = 2000) -> str: # type: ignore[override]
return asyncio.run(self.aread(file_path, offset, limit))
@@ -469,12 +482,14 @@ class KBPostgresBackend(BackendProtocol):
async def _load_file_data(
self,
path: str,
- ) -> tuple[dict[str, Any], int | None] | None:
+ ) -> tuple[dict[str, Any], int | None, str | None] | None:
"""Lazy-load a virtual KB document into a deepagents ``FileData``.
- Returns ``(file_data, doc_id)`` or ``None`` if the path doesn't map
- to any known document. ``doc_id`` is ``None`` for the synthetic
- anonymous document so the caller doesn't track it as a DB-backed file.
+ Returns ``(file_data, doc_id, preamble)`` or ``None`` if the path
+ doesn't map to any known document. ``doc_id`` is ``None`` for the
+ synthetic anonymous document. ``preamble`` is the metadata header to
+ show above a numbered ``source_markdown`` body (``None`` for the legacy
+ chunk-reconstructed XML reads used when a document has no body).
"""
anon = self._kb_anon_doc()
if anon and str(anon.get("path") or "") == path:
@@ -492,7 +507,7 @@ class KBPostgresBackend(BackendProtocol):
}
xml = build_document_xml(doc_payload, matched_chunk_ids=set())
file_data = create_file_data(xml)
- return file_data, None
+ return file_data, None, None
if not path.startswith(DOCUMENTS_ROOT):
return None
@@ -505,41 +520,58 @@ class KBPostgresBackend(BackendProtocol):
)
if document is None:
return None
- chunk_rows = await session.execute(
- select(Chunk.id, Chunk.content)
- .where(Chunk.document_id == document.id)
- .order_by(Chunk.position, Chunk.id)
- )
- chunks = [
- {"chunk_id": row.id, "content": row.content} for row in chunk_rows.all()
- ]
-
- doc_payload = {
- "document_id": document.id,
- "chunks": chunks,
- "matched_chunk_ids": list(self._matched_chunk_ids(document.id)),
- "document": {
- "id": document.id,
- "title": document.title,
- "document_type": (
- document.document_type.value
- if getattr(document, "document_type", None) is not None
- else "UNKNOWN"
- ),
- "metadata": dict(document.document_metadata or {}),
- },
- "source": (
+ source_markdown = document.source_markdown or ""
+ document_type = (
document.document_type.value
if getattr(document, "document_type", None) is not None
else "UNKNOWN"
- ),
+ )
+ metadata = dict(document.document_metadata or {})
+ chunk_rows = await session.execute(
+ select(Chunk.id, Chunk.content, Chunk.start_char, Chunk.end_char)
+ .where(Chunk.document_id == document.id)
+ .order_by(Chunk.position, Chunk.id)
+ )
+ chunk_records = chunk_rows.all()
+ document_id = document.id
+ document_title = document.title
+
+ matched = self._matched_chunk_ids(document_id)
+
+ # Canonical read: serve the verbatim body with cat -n line numbers that
+ # line up with chunk char spans, so the agent cites real source lines.
+ if source_markdown:
+ ranges = compute_matched_line_ranges(
+ source_markdown,
+ [(r.id, r.start_char, r.end_char) for r in chunk_records],
+ matched,
+ )
+ preamble = build_read_preamble(
+ document_id=document_id,
+ document_type=document_type,
+ title=document_title,
+ url=_metadata_url(metadata),
+ matched_line_ranges=ranges,
+ )
+ return create_file_data(source_markdown), document_id, preamble
+
+ # Legacy fallback: no canonical body, reconstruct from chunks as XML.
+ doc_payload = {
+ "document_id": document_id,
+ "chunks": [
+ {"chunk_id": r.id, "content": r.content} for r in chunk_records
+ ],
+ "matched_chunk_ids": list(matched),
+ "document": {
+ "id": document_id,
+ "title": document_title,
+ "document_type": document_type,
+ "metadata": metadata,
+ },
+ "source": document_type,
}
- xml = build_document_xml(
- doc_payload,
- matched_chunk_ids=self._matched_chunk_ids(document.id),
- )
- file_data = create_file_data(xml)
- return file_data, document.id
+ xml = build_document_xml(doc_payload, matched_chunk_ids=matched)
+ return create_file_data(xml), document_id, None
# ------------------------------------------------------------------ writes
@@ -571,7 +603,7 @@ class KBPostgresBackend(BackendProtocol):
loaded = await self._load_file_data(file_path)
if loaded is None:
return EditResult(error=f"Error: File '{file_path}' not found")
- file_data, _ = loaded
+ file_data, _, _ = loaded
content = file_data_to_string(file_data)
result = perform_string_replacement(
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/edit_file/index.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/edit_file/index.py
index 775469531..036617d8d 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/edit_file/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/edit_file/index.py
@@ -73,7 +73,7 @@ def create_edit_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
loaded = await backend._load_file_data(validated)
if loaded is None:
return f"Error: File '{validated}' not found"
- _, doc_id_to_attach = loaded
+ _, doc_id_to_attach, _ = loaded
res: EditResult = await backend.aedit(
validated, old_string, new_string, replace_all=replace_all
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/move_file/helpers.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/move_file/helpers.py
index ded4701f9..be61ca94f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/move_file/helpers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/move_file/helpers.py
@@ -75,7 +75,7 @@ async def cloud_move_file(
loaded = await backend._load_file_data(source)
if loaded is None:
return f"Error: source '{source}' not found."
- source_file_data, loaded_doc_id = loaded
+ source_file_data, loaded_doc_id, _ = loaded
if source_doc_id is None:
source_doc_id = loaded_doc_id
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
index 5c20619d6..6cbbe6ae5 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/read_file/index.py
@@ -58,8 +58,10 @@ def create_read_file_tool(mw: SurfSenseFilesystemMiddleware) -> BaseTool:
loaded = await backend._load_file_data(validated)
if loaded is None:
return f"Error: File '{validated}' not found"
- file_data, doc_id = loaded
+ file_data, doc_id, preamble = loaded
rendered = format_read_response(file_data, offset, limit)
+ if preamble and offset == 0:
+ rendered = preamble + rendered
update: dict[str, Any] = {
"files": {validated: file_data},
"messages": [
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/rm/helpers.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/rm/helpers.py
index e2e445d08..020200cbd 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/rm/helpers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/tools/rm/helpers.py
@@ -74,7 +74,7 @@ async def cloud_rm(
loaded = await backend._load_file_data(validated)
if loaded is None:
return f"Error: file '{validated}' not found."
- _, resolved_doc_id = loaded
+ _, resolved_doc_id, _ = loaded
files_update: dict[str, Any] = {validated: None}
update: dict[str, Any] = {
diff --git a/surfsense_backend/tests/unit/middleware/test_b_filesystem_rm_rmdir_cloud.py b/surfsense_backend/tests/unit/middleware/test_b_filesystem_rm_rmdir_cloud.py
index 898ec3765..27653c544 100644
--- a/surfsense_backend/tests/unit/middleware/test_b_filesystem_rm_rmdir_cloud.py
+++ b/surfsense_backend/tests/unit/middleware/test_b_filesystem_rm_rmdir_cloud.py
@@ -71,7 +71,7 @@ class _KBBackendStub(KBPostgresBackend):
def __init__(self, *, children=None, file_data=None) -> None:
self.als_info = AsyncMock(return_value=children or [])
self._load_file_data = AsyncMock(
- return_value=(file_data, 17) if file_data is not None else None
+ return_value=(file_data, 17, None) if file_data is not None else None
)
From 73dd4e8e3a3e8026880cf738cef6008d584ce5a3 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:37:41 +0200
Subject: [PATCH 43/47] feat: embed line-citation tokens in search hits
---
.../main_agent/tools/search_knowledge_base.py | 41 +++++++++++--------
.../tools/test_search_knowledge_base.py | 18 +++++---
2 files changed, 38 insertions(+), 21 deletions(-)
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
index ad47816f9..0696dc92e 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/tools/search_knowledge_base.py
@@ -112,20 +112,25 @@ async def _resolve_doc_context(
return paths, bodies
-def _line_label(chunk: dict[str, Any], body: str | None) -> str:
- """``[lines X-Y]`` for a span-bearing chunk, or '' when spans are absent."""
+def _citation_token(chunk: dict[str, Any], body: str | None, doc_id: int | None) -> str:
+ """Ready-to-copy ``[citation:dID#Lstart-end]`` token, or '' without spans."""
start = chunk.get("start_char")
end = chunk.get("end_char")
- if not body or not isinstance(start, int) or not isinstance(end, int):
+ if (
+ not body
+ or not isinstance(doc_id, int)
+ or not isinstance(start, int)
+ or not isinstance(end, int)
+ ):
return ""
start_line, end_line = char_span_to_line_range(body, start, end)
- if start_line == end_line:
- return f"[line {start_line}]"
- return f"[lines {start_line}-{end_line}]"
+ return f"[citation:d{doc_id}#L{start_line}-{end_line}]"
-def _render_passage(chunk: dict[str, Any], body: str | None) -> str | None:
- """Render one matched chunk as an indented, line-annotated passage."""
+def _render_passage(
+ chunk: dict[str, Any], body: str | None, doc_id: int | None
+) -> str | None:
+ """Render one matched chunk as an indented passage tagged with its token."""
content = (chunk.get("content") or "").strip()
if not content:
return None
@@ -133,12 +138,14 @@ def _render_passage(chunk: dict[str, Any], body: str | None) -> str | None:
if len(content) > _PER_DOC_SNIPPET_CHARS:
snippet += " ..."
indented = snippet.replace("\n", "\n ")
- label = _line_label(chunk, body)
- head = f"\n {label}" if label else ""
+ token = _citation_token(chunk, body, doc_id)
+ head = f"\n {token}" if token else ""
return f"{head}\n {indented}"
-def _matched_passages(doc: dict[str, Any], body: str | None) -> str:
+def _matched_passages(
+ doc: dict[str, Any], body: str | None, doc_id: int | None
+) -> str:
"""Render the RRF-matched chunks; '' when none can be rendered."""
by_id = {
c.get("chunk_id"): c
@@ -150,7 +157,7 @@ def _matched_passages(doc: dict[str, Any], body: str | None) -> str:
chunk = by_id.get(chunk_id)
if chunk is None:
continue
- passage = _render_passage(chunk, body)
+ passage = _render_passage(chunk, body, doc_id)
if passage:
rendered.append(passage)
return "".join(rendered)
@@ -194,11 +201,12 @@ def _format_hits(
path = paths.get(doc_id) if isinstance(doc_id, int) else None
body = bodies.get(doc_id) if isinstance(doc_id, int) else None
- header = f"\n{rank}. {title} (type={doc_type}, score={score_str})" + (
+ id_str = f"id={doc_id}, " if isinstance(doc_id, int) else ""
+ header = f"\n{rank}. {title} ({id_str}type={doc_type}, score={score_str})" + (
f"\n path: {path}" if path else ""
)
- passages = _matched_passages(doc, body)
+ passages = _matched_passages(doc, body, doc_id if isinstance(doc_id, int) else None)
entry = header + (passages or _fallback_snippet(doc))
if total + len(entry) > _MAX_TOTAL_CHARS:
lines.append("\n")
@@ -207,8 +215,9 @@ def _format_hits(
total += len(entry)
lines.append(
- "\n\nTo read a full document, delegate to the knowledge_base specialist "
- "with `task`, referencing the path above."
+ "\n\nTo cite a matched passage, copy its [citation:dID#Lstart-end] token "
+ "verbatim. To quote more context or read the full document, delegate to "
+ "the knowledge_base specialist with `task` using the path above."
)
lines.append("\n")
return "".join(lines)
diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/tools/test_search_knowledge_base.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/tools/test_search_knowledge_base.py
index eadfcd30d..e068792b1 100644
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/tools/test_search_knowledge_base.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/tools/test_search_knowledge_base.py
@@ -51,20 +51,28 @@ def test_renders_matched_passage_not_top_of_doc() -> None:
assert "Intro paragraph." not in out
-def test_includes_line_range_when_spans_present() -> None:
+def test_emits_copyable_line_citation_token_when_spans_present() -> None:
out = _format_hits([_hit()], paths={7: "/documents/note.md"}, bodies={7: _BODY}, query="q")
- # "Matched passage here." sits on line 3 of the body.
- assert "line 3" in out
+ # "Matched passage here." sits on line 3 of the body; the hit must surface
+ # a ready-to-copy token so the agent can cite without a separate read.
+ assert "[citation:d7#L3-3]" in out
-def test_omits_line_range_when_spans_absent() -> None:
+def test_header_includes_document_id() -> None:
+ out = _format_hits([_hit()], paths={7: "/documents/note.md"}, bodies={7: _BODY}, query="q")
+ assert "id=7" in out
+
+
+def test_omits_citation_token_when_spans_absent() -> None:
hit = _hit()
for chunk in hit["chunks"]:
chunk["start_char"] = None
chunk["end_char"] = None
out = _format_hits([hit], paths={7: "/documents/note.md"}, bodies={7: _BODY}, query="q")
assert "Matched passage here." in out
- assert "[line" not in out
+ # No concrete, copyable token for this document without spans (the closing
+ # instruction's placeholder template doesn't count).
+ assert "[citation:d7#L" not in out
def test_falls_back_to_content_when_no_matched_ids() -> None:
From 5f341bdd2fa35b0184f5522f6fc2d5543b945f28 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 17:37:41 +0200
Subject: [PATCH 44/47] feat: parse and render kb line citations
---
.../assistant-ui/inline-citation.tsx | 46 +++++++++++++++++++
.../citations/citation-renderer.tsx | 12 ++++-
.../editor/plugins/citation-kit.tsx | 35 +++++++++++---
.../lib/citations/citation-parser.ts | 18 ++++++--
4 files changed, 101 insertions(+), 10 deletions(-)
diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx
index 59a10739c..28f5212ae 100644
--- a/surfsense_web/components/assistant-ui/inline-citation.tsx
+++ b/surfsense_web/components/assistant-ui/inline-citation.tsx
@@ -2,9 +2,11 @@
import { useSetAtom } from "jotai";
import { FileText } from "lucide-react";
+import { useParams } from "next/navigation";
import type { FC } from "react";
import { useId, useState } from "react";
import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
+import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
import { CitationPanelContent } from "@/components/citation-panel/citation-panel";
import { Citation } from "@/components/tool-ui/citation";
@@ -108,6 +110,50 @@ const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
);
};
+interface LineCitationProps {
+ documentId: number;
+ startLine: number;
+ endLine: number;
+}
+
+/**
+ * Inline citation for a knowledge-base document line range
+ * (`[citation:d#L-]`). Clicking opens the document in
+ * the editor's read-only source view, scrolled to and highlighting the cited
+ * lines — the same anchor the citation panel uses for chunk citations.
+ */
+export const LineCitation: FC = ({ documentId, startLine, endLine }) => {
+ const openEditorPanel = useSetAtom(openEditorPanelAtom);
+ const params = useParams();
+ const searchSpaceId = Number(params?.search_space_id);
+
+ const label = startLine === endLine ? `L${startLine}` : `L${startLine}-${endLine}`;
+
+ const handleClick = () => {
+ if (!Number.isFinite(searchSpaceId)) return;
+ openEditorPanel({
+ documentId,
+ searchSpaceId,
+ highlightLines: { start: startLine, end: endLine },
+ forceSourceView: true,
+ });
+ };
+
+ return (
+
+ );
+};
+
import { tryGetHostname } from "@/lib/url";
interface UrlCitationProps {
diff --git a/surfsense_web/components/citations/citation-renderer.tsx b/surfsense_web/components/citations/citation-renderer.tsx
index f2de4b27d..b0ab13f84 100644
--- a/surfsense_web/components/citations/citation-renderer.tsx
+++ b/surfsense_web/components/citations/citation-renderer.tsx
@@ -1,7 +1,7 @@
"use client";
import type { ReactNode } from "react";
-import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
+import { InlineCitation, LineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
import {
type CitationToken,
type CitationUrlMap,
@@ -21,6 +21,16 @@ export function renderCitationToken(token: CitationToken, ordinalKey: number): R
if (token.kind === "url") {
return ;
}
+ if (token.kind === "line") {
+ return (
+
+ );
+ }
return (
> = ({
element,
}) => {
const isUrl = element.kind === "url";
+ const isLine =
+ element.kind === "line" &&
+ element.documentId !== undefined &&
+ element.startLine !== undefined &&
+ element.endLine !== undefined;
return (
{isUrl && element.url ? (
+ ) : isLine ? (
+
) : element.chunkId !== undefined ? (
) : null}
@@ -97,10 +112,7 @@ function copyMarks(textNode: SlateText): Record {
return marks;
}
-function makeCitationElement(
- rawText: string,
- segment: { kind: "url"; url: string } | { kind: "chunk"; chunkId: number; isDocsChunk: boolean }
-): CitationElementNode {
+function makeCitationElement(rawText: string, segment: CitationToken): CitationElementNode {
if (segment.kind === "url") {
return {
type: CITATION_TYPE,
@@ -110,6 +122,17 @@ function makeCitationElement(
children: [{ text: "" }],
};
}
+ if (segment.kind === "line") {
+ return {
+ type: CITATION_TYPE,
+ kind: "line",
+ documentId: segment.documentId,
+ startLine: segment.startLine,
+ endLine: segment.endLine,
+ rawText,
+ children: [{ text: "" }],
+ };
+ }
return {
type: CITATION_TYPE,
kind: segment.isDocsChunk ? "doc" : "chunk",
diff --git a/surfsense_web/lib/citations/citation-parser.ts b/surfsense_web/lib/citations/citation-parser.ts
index 533c644c2..0d320956f 100644
--- a/surfsense_web/lib/citations/citation-parser.ts
+++ b/surfsense_web/lib/citations/citation-parser.ts
@@ -18,12 +18,16 @@ import { FENCED_OR_INLINE_CODE } from "@/lib/markdown/code-regions";
* sometimes emit.
*/
export const CITATION_REGEX =
- /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
+ /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|d\d+#L\d+-\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
+
+/** Matches the knowledge-base line-citation form `d#L-`. */
+const LINE_CITATION_REGEX = /^d(\d+)#L(\d+)-(\d+)$/;
/** A single parsed citation reference. */
export type CitationToken =
| { kind: "url"; url: string }
- | { kind: "chunk"; chunkId: number; isDocsChunk: boolean };
+ | { kind: "chunk"; chunkId: number; isDocsChunk: boolean }
+ | { kind: "line"; documentId: number; startLine: number; endLine: number };
/** Output of `parseTextWithCitations` — interleaved text + citation tokens. */
export type ParsedSegment = string | CitationToken;
@@ -95,7 +99,15 @@ export function parseTextWithCitations(text: string, urlMap: CitationUrlMap): Pa
const captured = match[1];
- if (captured.startsWith("http://") || captured.startsWith("https://")) {
+ const lineMatch = LINE_CITATION_REGEX.exec(captured);
+ if (lineMatch) {
+ segments.push({
+ kind: "line",
+ documentId: Number.parseInt(lineMatch[1], 10),
+ startLine: Number.parseInt(lineMatch[2], 10),
+ endLine: Number.parseInt(lineMatch[3], 10),
+ });
+ } else if (captured.startsWith("http://") || captured.startsWith("https://")) {
segments.push({ kind: "url", url: captured.trim() });
} else if (captured.startsWith("urlcite")) {
const url = urlMap.get(captured);
From cfafed09bc76c5bcb6427998091b2c120a2a2185 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 19:12:14 +0200
Subject: [PATCH 45/47] fix: forward citation line anchor to editor panel and
harden reveal
---
.../components/editor/source-code-editor.tsx | 33 ++++++++++++-------
.../layout/ui/right-panel/RightPanel.tsx | 2 ++
2 files changed, 24 insertions(+), 11 deletions(-)
diff --git a/surfsense_web/components/editor/source-code-editor.tsx b/surfsense_web/components/editor/source-code-editor.tsx
index 0277cde85..4af4f2125 100644
--- a/surfsense_web/components/editor/source-code-editor.tsx
+++ b/surfsense_web/components/editor/source-code-editor.tsx
@@ -49,15 +49,20 @@ export function SourceCodeEditor({
}
const range = highlightLinesRef.current;
if (!range) return;
- const start = Math.max(1, Math.floor(range.start));
- const end = Math.max(start, Math.floor(range.end));
- decorationsRef.current = editor.createDecorationsCollection([
- {
- range: new monaco.Range(start, 1, end, 1),
- options: { isWholeLine: true, className: "citation-line-highlight" },
- },
- ]);
- editor.revealLinesInCenter(start, end);
+ const lineCount = editor.getModel()?.getLineCount() ?? range.end;
+ const start = Math.min(Math.max(1, Math.floor(range.start)), lineCount);
+ const end = Math.min(Math.max(start, Math.floor(range.end)), lineCount);
+ try {
+ decorationsRef.current = editor.createDecorationsCollection([
+ {
+ range: new monaco.Range(start, 1, end, 1),
+ options: { isWholeLine: true, className: "citation-line-highlight" },
+ },
+ ]);
+ } catch {
+ // Decoration failure must not block the reveal below.
+ }
+ editor.revealLinesInCenter(start, end, monaco.editor.ScrollType.Immediate);
}, []);
useEffect(() => {
@@ -138,8 +143,14 @@ export function SourceCodeEditor({
monacoRef.current = monaco;
editorRef.current = editor;
applySidebarTheme(monaco);
- // Defer one frame so the model is laid out before revealing.
- requestAnimationFrame(() => applyHighlight());
+ // Reveal now, then once more after the first layout settles:
+ // the panel slide-in animation means the editor often has no
+ // usable viewport height on the initial frame.
+ applyHighlight();
+ const layoutSub = editor.onDidLayoutChange(() => {
+ applyHighlight();
+ layoutSub.dispose();
+ });
if (!isManualSaveEnabled) return;
editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, () => {
void onSaveRef.current?.();
diff --git a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
index 5a7588979..bfad44dd8 100644
--- a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
+++ b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
@@ -308,6 +308,8 @@ export function RightPanel({
searchSpaceId={editorState.searchSpaceId ?? undefined}
title={editorState.title}
onClose={closeEditor}
+ highlightLines={editorState.highlightLines}
+ forceSourceView={editorState.forceSourceView}
/>
)}
From cfc3be5b1fcc19d7e857c23ec21ad3337ec226a0 Mon Sep 17 00:00:00 2001
From: CREDO23
Date: Fri, 19 Jun 2026 19:22:19 +0200
Subject: [PATCH 46/47] fix: gate desktop right panel to prevent duplicate
mobile editor
---
.../components/layout/ui/right-panel/RightPanel.tsx | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
index bfad44dd8..6662d7830 100644
--- a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
+++ b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx
@@ -12,6 +12,7 @@ import { rightPanelCollapsedAtom, rightPanelTabAtom } from "@/atoms/layout/right
import { Button } from "@/components/ui/button";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { closeHitlEditPanelAtom, hitlEditPanelAtom } from "@/features/chat-messages/hitl";
+import { useMediaQuery } from "@/hooks/use-media-query";
import { cn } from "@/lib/utils";
import { DocumentsSidebar } from "../sidebar";
@@ -196,6 +197,9 @@ export function RightPanel({
const citationState = useAtomValue(citationPanelAtom);
const closeCitation = useSetAtom(closeCitationPanelAtom);
const [collapsed, setCollapsed] = useAtom(rightPanelCollapsedAtom);
+ // Desktop-only surface; mobile uses the dedicated Mobile* drawers. Without
+ // this guard both render together and two editors fight over one model.
+ const isDesktop = useMediaQuery("(min-width: 1024px)");
const documentsOpen = documentsPanel?.open ?? false;
const reportOpen = reportState.isOpen && !!reportState.reportId;
@@ -267,7 +271,7 @@ export function RightPanel({
setCollapsed(true)} />
) : null;
- if (!isVisible) return null;
+ if (!isVisible || !isDesktop) return null;
return (