feat: various UI fixes, prompt optimizations, and allowing duplicate docs

- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths. - Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`. - Added detailed comments for clarity on the changes and their implications. - Introduced new citation handling in the editor for improved user experience with citation jumps. - Updated package dependencies in the frontend for better functionality.
2026-05-04 13:22:41 +02:00 · 2026-04-28 21:30:53 -07:00 · 2026-04-28 21:30:53 -07:00 · b9a66cb417
commit b9a66cb417
parent e6433f78c4
26 changed files with 1540 additions and 852 deletions
--- a/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py
+++ b/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py
@ -0,0 +1,107 @@
+"""133_drop_documents_content_hash_unique
+
+Revision ID: 133
+Revises: 132
+Create Date: 2026-04-29
+
+Drop the global UNIQUE constraint on ``documents.content_hash`` so the
+new-chat agent's ``write_file`` flow can persist legitimate file copies
+(two paths, identical content) without hitting a constraint that mirrors
+no real filesystem semantic.
+
+Path uniqueness still lives on ``documents.unique_identifier_hash`` (per
+search space), which is the right invariant — exactly like an inode at a
+given path on a POSIX filesystem.
+
+The non-unique INDEX on ``content_hash`` is preserved so connector
+indexers' "have we seen this content before?" lookup
+(:func:`app.tasks.document_processors.base.check_duplicate_document`,
+which already uses ``.scalars().first()`` and is therefore tolerant of
+duplicates) stays cheap.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from sqlalchemy import inspect
+
+from alembic import op
+
+revision: str = "133"
+down_revision: str | None = "132"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _existing_constraint_names(bind, table: str) -> set[str]:
+    inspector = inspect(bind)
+    return {c["name"] for c in inspector.get_unique_constraints(table)}
+
+
+def _existing_index_names(bind, table: str) -> set[str]:
+    inspector = inspect(bind)
+    return {i["name"] for i in inspector.get_indexes(table)}
+
+
+def upgrade() -> None:
+    bind = op.get_bind()
+
+    # Both the named UniqueConstraint (added in revision 8) and the
+    # implicit-unique-index variant SQLAlchemy may emit need draining.
+    constraints = _existing_constraint_names(bind, "documents")
+    if "uq_documents_content_hash" in constraints:
+        op.drop_constraint(
+            "uq_documents_content_hash", "documents", type_="unique"
+        )
+
+    indexes = _existing_index_names(bind, "documents")
+    # Some Postgres versions surface the unique constraint via a unique
+    # index of the same name; check for that too.
+    for idx_name in ("uq_documents_content_hash",):
+        if idx_name in indexes:
+            op.drop_index(idx_name, table_name="documents")
+
+    # Ensure the non-unique index is present for fast lookups.
+    if "ix_documents_content_hash" not in indexes:
+        op.create_index(
+            "ix_documents_content_hash",
+            "documents",
+            ["content_hash"],
+            unique=False,
+        )
+
+
+def downgrade() -> None:
+    bind = op.get_bind()
+
+    # Re-applying UNIQUE is destructive: there may now be legitimate
+    # duplicates (e.g. two NOTE documents that share content because the
+    # user explicitly copied one to a new path). To avoid the migration
+    # silently deleting user data, we keep only the lowest-id row per
+    # content_hash — same strategy revision 8 used when first introducing
+    # the constraint.
+    op.execute(
+        """
+        DELETE FROM documents
+        WHERE id NOT IN (
+            SELECT MIN(id)
+            FROM documents
+            GROUP BY content_hash
+        )
+        """
+    )
+
+    indexes = _existing_index_names(bind, "documents")
+    if "ix_documents_content_hash" in indexes:
+        op.drop_index("ix_documents_content_hash", table_name="documents")
+
+    op.create_index(
+        "ix_documents_content_hash",
+        "documents",
+        ["content_hash"],
+        unique=False,
+    )
+    op.create_unique_constraint(
+        "uq_documents_content_hash", "documents", ["content_hash"]
+    )
--- a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py
@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState
 from langchain_core.callbacks import dispatch_custom_event
 from langgraph.runtime import Runtime
 from sqlalchemy import delete, select
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.new_chat.filesystem_selection import FilesystemMode
@ -150,10 +151,11 @@ async def _create_document(
        virtual_path,
        search_space_id,
    )
-    # Guard against the unique_identifier_hash constraint: another row at the
-    # same virtual_path (this search space) already owns the hash. Callers are
-    # expected to upsert via the wrapper, but this defends against bypasses
-    # and gives a clean ValueError instead of a session-poisoning IntegrityError.
+    # Filesystem-parity invariant: the only thing that *must* be unique is
+    # the path. Two notes can legitimately share content (e.g. ``cp a b``).
+    # Guard against the path-derived ``unique_identifier_hash`` constraint
+    # so we surface a clean ValueError instead of letting the INSERT poison
+    # the session with an IntegrityError.
    path_collision = await session.execute(
        select(Document.id).where(
            Document.search_space_id == search_space_id,
@ -165,17 +167,14 @@ async def _create_document(
            f"a document already exists at path '{virtual_path}' "
            "(unique_identifier_hash collision)"
        )
+    # ``content_hash`` is intentionally NOT checked for uniqueness here.
+    # In a real filesystem two files at different paths can hold identical
+    # bytes, and the agent's ``write_file`` path needs that semantic to
+    # support copy/duplicate operations. The hash remains useful as a
+    # change-detection hint for connector indexers, which still consult it
+    # via :func:`check_duplicate_document` but do so with a non-unique
+    # lookup (``.first()``).
    content_hash = generate_content_hash(content, search_space_id)
-    content_collision = await session.execute(
-        select(Document.id).where(
-            Document.search_space_id == search_space_id,
-            Document.content_hash == content_hash,
-        )
-    )
-    if content_collision.scalar_one_or_none() is not None:
-        raise ValueError(
-            f"a document with identical content already exists for path '{virtual_path}'"
-        )
    doc = Document(
        title=title,
        document_type=DocumentType.NOTE,
@ -493,19 +492,43 @@ async def commit_staged_filesystem_state(
                            }
                        )
                else:
+                    # Wrap each create in a SAVEPOINT so a residual
+                    # ``IntegrityError`` (e.g. a deployment that hasn't run
+                    # migration 133 yet, where ``documents.content_hash``
+                    # still carries its legacy global UNIQUE constraint)
+                    # rolls back only this one create instead of poisoning
+                    # the whole turn's transaction.
                    try:
-                        new_doc = await _create_document(
-                            session,
-                            virtual_path=path,
-                            content=content,
-                            search_space_id=search_space_id,
-                            created_by_id=created_by_id,
-                        )
+                        async with session.begin_nested():
+                            new_doc = await _create_document(
+                                session,
+                                virtual_path=path,
+                                content=content,
+                                search_space_id=search_space_id,
+                                created_by_id=created_by_id,
+                            )
                    except ValueError as exc:
                        logger.warning(
                            "kb_persistence: skipping %s create: %s", path, exc
                        )
                        continue
+                    except IntegrityError as exc:
+                        # The path-uniqueness check above already protected
+                        # against ``unique_identifier_hash`` collisions, so
+                        # the most likely culprit is the legacy
+                        # ``ix_documents_content_hash`` UNIQUE constraint
+                        # that migration 133 drops. Log loudly so operators
+                        # know to run the migration; do NOT silently swallow.
+                        msg = str(exc.orig) if exc.orig is not None else str(exc)
+                        logger.error(
+                            "kb_persistence: IntegrityError creating %s: %s. "
+                            "If this mentions content_hash, run alembic "
+                            "upgrade to apply migration 133 which drops the "
+                            "global UNIQUE constraint on documents.content_hash.",
+                            path,
+                            msg,
+                        )
+                        continue
                    doc_id_by_path[path] = new_doc.id
                    committed_creates.append(
                        {
--- a/surfsense_backend/app/agents/new_chat/prompts/composer.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py
@ -38,12 +38,38 @@ from app.db import ChatVisibility
 # Provider variant detection
 # -----------------------------------------------------------------------------

-ProviderVariant = str  # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"
+# String literal alias for the supported provider-specific prompt variants.
+# When adding a new variant, also drop a matching ``providers/<variant>.md``
+# file in this package and (if appropriate) extend the regex matchers below.
+#
+# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
+# to SurfSense's "supplemental hints" architecture (each fragment is a
+# focused style nudge, NOT a full system prompt — the main prompt is
+# already assembled from base/ + tools/ + routing/).
+ProviderVariant = str
+# Known values:
+#   "anthropic"        — Claude family (XML-friendly, narrative todos)
+#   "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
+#   "openai_classic"   — GPT-4 family (autonomous persistence)
+#   "openai_codex"     — gpt-*-codex (code-purist, terse, file:line refs)
+#   "google"           — Gemini (formal, <3-line, numbered workflow)
+#   "kimi"             — Moonshot Kimi-K* (action-bias, parallel tools)
+#   "grok"             — xAI Grok (extreme-terse, one-word ok)
+#   "deepseek"         — DeepSeek V3 / R1 (terse, R1-aware reasoning)
+#   "default"          — fallback, no provider-specific block emitted

+# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
+# More specific patterns must come first (e.g. ``codex`` before
+# ``openai_reasoning`` because codex model ids contain ``gpt``).
+
+_OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE)
 _OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
 _OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
 _ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
 _GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
+_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
+_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
+_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)


 def detect_provider_variant(model_name: str | None) -> ProviderVariant:
@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:

    Heuristic match on the model id; returns ``"default"`` when nothing
    matches so the composer can fall back to the empty placeholder file.
+
+    Order is significant: more-specific patterns are tried first so
+    ``gpt-5-codex`` routes to ``"openai_codex"`` rather than
+    ``"openai_reasoning"`` (mirrors OpenCode's
+    ``packages/opencode/src/session/system.ts`` dispatch).
    """
    if not model_name:
        return "default"
    name = model_name.strip()
+    if _OPENAI_CODEX_RE.search(name):
+        return "openai_codex"
    if _OPENAI_REASONING_RE.search(name):
        return "openai_reasoning"
    if _OPENAI_CLASSIC_RE.search(name):
@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
        return "anthropic"
    if _GOOGLE_RE.search(name):
        return "google"
+    if _KIMI_RE.search(name):
+        return "kimi"
+    if _GROK_RE.search(name):
+        return "grok"
+    if _DEEPSEEK_RE.search(name):
+        return "deepseek"
    return "default"


--- a/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md
@ -1,5 +1,20 @@
 <provider_hints>
-You are running on an Anthropic Claude model. Use XML tags liberally to structure
-intermediate reasoning when the task is complex. Prefer step-by-step plans inside
-`<thinking>` blocks before producing the final answer.
+You are running on an Anthropic Claude model.
+
+Structured reasoning:
+- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
+- For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
+
+Professional objectivity:
+- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
+- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
+- Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
+
+Task management:
+- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
+- Narrate progress through the todo list itself, not through chatty status lines.
+
+Tool calls:
+- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
+- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md
@ -0,0 +1,18 @@
+<provider_hints>
+You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
+
+Reasoning hygiene (R1-aware):
+- If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
+- Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
+- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
+
+Output style:
+- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
+- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
+- For factual answers, cite once with `[citation:chunk_id]` and stop.
+
+Tool calls:
+- Issue independent tool calls in parallel within a single turn.
+- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
+- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
+</provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/google.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/google.md
@ -1,4 +1,20 @@
 <provider_hints>
-You are running on a Google Gemini model. Prefer concise, structured responses.
-When using tools, follow the function-calling protocol and avoid verbose preludes.
+You are running on a Google Gemini model.
+
+Output style:
+- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
+- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
+- Format with GitHub-flavoured Markdown; assume monospace rendering.
+- For one-line factual answers, just answer. No headers, no bullets.
+
+Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
+1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
+2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
+3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
+4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
+
+Discipline:
+- Do not take significant actions beyond the clear scope of the user's request without confirming first.
+- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
+- Path arguments must be the exact strings returned by tools; do not synthesise file paths.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md
@ -0,0 +1,17 @@
+<provider_hints>
+You are running on an xAI Grok model.
+
+Maximum terseness:
+- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
+- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
+- Avoid restating the user's question.
+- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
+
+Tool discipline:
+- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
+- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
+
+Style:
+- No emojis unless the user asked. No nested bullets, no headers for short answers.
+- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
+</provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md
@ -0,0 +1,21 @@
+<provider_hints>
+You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
+
+Action bias:
+- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
+- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
+- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
+
+Tool calls:
+- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
+- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
+- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
+
+Language:
+- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
+
+Discipline:
+- Stay on track. Never give the user more than what they asked for.
+- Fact-check before stating anything as factual; don't fabricate citations.
+- Keep it stupidly simple. Don't overcomplicate.
+</provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md
@ -1,5 +1,21 @@
 <provider_hints>
-You are running on a classic OpenAI chat model (GPT-4 family). Use direct
-function-calling for tools. When editing files, use the standard `edit_file`
-or `write_file` tools rather than diff-based patches.
+You are running on a classic OpenAI chat model (GPT-4 family).
+
+Persistence:
+- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
+- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
+- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
+
+Planning:
+- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
+- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
+
+Output style:
+- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
+- Don't dump tool output verbatim — summarise the relevant lines.
+- Don't add a closing recap unless the user asked for one. After completing the work, just stop.
+
+Tool calls:
+- Issue independent tool calls in parallel within one response.
+- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md
@ -0,0 +1,19 @@
+<provider_hints>
+You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
+
+Output style:
+- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
+- Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
+- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
+- Skip headers and heavy formatting for simple confirmations.
+- No emojis, no em-dashes, no nested bullets. Single-level lists only.
+
+Code & structured-output tasks:
+- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
+- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
+- For multi-line snippets use fenced code blocks with a language tag.
+
+Tool calls:
+- Run independent tool calls in parallel; chain only when later calls need earlier results.
+- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
+</provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md
@ -1,5 +1,21 @@
 <provider_hints>
-You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and
-direct in your responses. When editing files, prefer the `apply_patch` tool format
-where available. Avoid restating the user request before answering.
+You are running on an OpenAI reasoning model (GPT-5+ / o-series).
+
+Output style:
+- Be terse and direct. Don't restate the user's request before answering.
+- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
+- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
+- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
+- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
+
+Channels (for clients that support them):
+- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
+- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
+
+Tool calls:
+- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
+- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
+
+Autonomy:
+- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
 </provider_hints>
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin):
    document_metadata = Column(JSON, nullable=True)

    content = Column(Text, nullable=False)
-    content_hash = Column(String, nullable=False, index=True, unique=True)
+    # ``content_hash`` is intentionally NOT globally unique. In a real
+    # filesystem two files at different paths can hold identical bytes,
+    # and the agent's ``write_file`` flow needs that semantic to support
+    # copy / duplicate operations. Path uniqueness lives on
+    # ``unique_identifier_hash`` (per search space). The hash remains
+    # indexed because connector indexers consult it as a change-detection
+    # / cross-source dedup hint via :func:`check_duplicate_document`.
+    # See migration 133.
+    content_hash = Column(String, nullable=False, index=True)
    unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
    embedding = Column(Vector(config.embedding_model_instance.dimension))

--- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
@ -25,17 +25,33 @@ class TestProviderVariantDetection:
    @pytest.mark.parametrize(
        "model_name,expected",
        [
+            # GPT-4 family routes to "classic" (autonomous-persistence style)
            ("openai:gpt-4o-mini", "openai_classic"),
            ("openai:gpt-4-turbo", "openai_classic"),
+            # GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
            ("openai:gpt-5", "openai_reasoning"),
-            ("openai:gpt-5-codex", "openai_reasoning"),
            ("openai:o1-preview", "openai_reasoning"),
            ("openai:o3-mini", "openai_reasoning"),
+            # Codex family beats reasoning (more specific). Mirrors OpenCode
+            # ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
+            ("openai:gpt-5-codex", "openai_codex"),
+            ("openai:gpt-codex", "openai_codex"),
+            ("openai:codex-mini", "openai_codex"),
+            # Anthropic + Google
            ("anthropic:claude-3-5-sonnet", "anthropic"),
            ("anthropic/claude-opus-4", "anthropic"),
            ("google:gemini-2.0-flash", "google"),
            ("vertex:gemini-1.5-pro", "google"),
+            # Newly-covered families
+            ("moonshot:kimi-k2", "kimi"),
+            ("openrouter:moonshot/kimi-k2.5", "kimi"),
+            ("xai:grok-2", "grok"),
+            ("openrouter:x-ai/grok-3", "grok"),
+            ("openai:deepseek-v3", "deepseek"),
+            ("deepseek:deepseek-r1", "deepseek"),
+            # Unknown families fall back to default (no provider block emitted)
            ("groq:mixtral-8x7b", "default"),
+            ("together:llama-3.1-70b", "default"),
            (None, "default"),
            ("", "default"),
        ],
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
    def test_detection(self, model_name: str | None, expected: str) -> None:
        assert detect_provider_variant(model_name) == expected

+    def test_codex_takes_precedence_over_reasoning(self) -> None:
+        """Regression guard: ``gpt-5-codex`` must NOT match the generic
+        ``gpt-5`` reasoning regex first. Codex is the more specialised
+        prompt and mirrors OpenCode's dispatch order.
+        """
+        from app.agents.new_chat.prompts.composer import detect_provider_variant
+
+        assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
+        assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
+

 class TestCompose:
    def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
@ -149,6 +175,52 @@ class TestCompose:
        prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
        assert "<provider_hints>" not in prompt

+    @pytest.mark.parametrize(
+        "model_name,expected_marker",
+        [
+            # Each marker is a unique-ish phrase from the corresponding fragment.
+            # If a fragment is renamed/rewritten such that the marker is gone,
+            # update both the fragment and this test deliberately.
+            ("openai:gpt-5-codex", "Codex-class"),
+            ("openai:gpt-5", "OpenAI reasoning model"),
+            ("openai:gpt-4o", "classic OpenAI chat model"),
+            ("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
+            ("google:gemini-2.0-flash", "Google Gemini"),
+            ("moonshot:kimi-k2", "Moonshot Kimi"),
+            ("xai:grok-2", "xAI Grok"),
+            ("deepseek:deepseek-r1", "DeepSeek"),
+        ],
+    )
+    def test_each_known_variant_renders_with_its_marker(
+        self,
+        fixed_today: datetime,
+        model_name: str,
+        expected_marker: str,
+    ) -> None:
+        """Every supported variant must produce a ``<provider_hints>`` block
+        containing its identifying marker. This pins the dispatch + the
+        on-disk fragments together so a missing/renamed file is caught
+        immediately.
+        """
+        prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
+        assert "<provider_hints>" in prompt, (
+            f"variant for {model_name!r} did not emit a provider_hints block; "
+            "the corresponding providers/<variant>.md may be missing"
+        )
+        assert expected_marker in prompt, (
+            f"variant for {model_name!r} emitted hints but lacked the "
+            f"expected marker {expected_marker!r} — the fragment may have "
+            "drifted from the dispatch table"
+        )
+
+    def test_provider_blocks_are_byte_stable_across_calls(
+        self, fixed_today: datetime
+    ) -> None:
+        """Cache-stability guard: same model id → byte-identical prompt."""
+        a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
+        b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
+        assert a == b
+
    def test_custom_system_instructions_override_default(
        self, fixed_today: datetime
    ) -> None:
--- a/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py
+++ b/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py
@ -0,0 +1,168 @@
+"""Unit tests for kb_persistence filesystem-parity invariants.
+
+Specifically, these tests pin down that the agent-driven write_file flow
+treats path uniqueness — not content uniqueness — as the only hard
+invariant. This mirrors a real filesystem: ``cp a b`` produces two files
+with identical bytes living at different paths, and that should round-trip
+through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import numpy as np
+import pytest
+
+from app.agents.new_chat.middleware import kb_persistence
+from app.db import Document
+
+
+class _FakeResult:
+    """Minimal stand-in for ``sqlalchemy.engine.Result``."""
+
+    def __init__(self, value: Any = None) -> None:
+        self._value = value
+
+    def scalar_one_or_none(self) -> Any:
+        return self._value
+
+    def scalar(self) -> Any:
+        return self._value
+
+
+class _FakeSession:
+    """Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
+
+    Records every ``add`` so we can assert against the resulting Documents
+    and Chunks. ``execute`` always returns "no row" by default — i.e. no
+    folder hierarchy preexists and no path collision exists. Tests that
+    want a path collision can override that on a per-call basis.
+    """
+
+    def __init__(self) -> None:
+        self.added: list[Any] = []
+        self.execute = AsyncMock(return_value=_FakeResult(None))
+        self.flush = AsyncMock()
+
+        # Simulate ``await session.flush()`` assigning an id to the doc;
+        # we increment a counter so each Document gets a unique id.
+        self._next_id = 1
+
+        async def _flush_assigning_ids() -> None:
+            for obj in self.added:
+                if getattr(obj, "id", None) is None:
+                    obj.id = self._next_id
+                    self._next_id += 1
+
+        self.flush.side_effect = _flush_assigning_ids
+
+    def add(self, obj: Any) -> None:
+        self.added.append(obj)
+
+    def add_all(self, objs: list[Any]) -> None:
+        self.added.extend(objs)
+
+
+@pytest.fixture(autouse=True)
+def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Avoid loading the embedding model in unit tests."""
+    monkeypatch.setattr(
+        kb_persistence,
+        "embed_texts",
+        lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
+    )
+    monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
+
+
+@pytest.mark.asyncio
+async def test_create_document_allows_identical_content_at_different_paths() -> None:
+    """The core regression: ``cp /a/notes.md /b/notes-copy.md``.
+
+    Both create calls must succeed even though the bytes are byte-for-byte
+    identical, because path is the only filesystem-style unique key.
+    """
+    session = _FakeSession()
+    content = "# Same body\n\nIdentical content used by two different paths.\n"
+
+    first = await kb_persistence._create_document(
+        session,  # type: ignore[arg-type]
+        virtual_path="/documents/a/notes.md",
+        content=content,
+        search_space_id=42,
+        created_by_id="user-1",
+    )
+    assert isinstance(first, Document)
+    assert first.title == "notes.md"
+
+    # Second create with byte-identical content at a different path should
+    # not raise — that's the whole point of the filesystem-parity fix.
+    second = await kb_persistence._create_document(
+        session,  # type: ignore[arg-type]
+        virtual_path="/documents/b/notes-copy.md",
+        content=content,
+        search_space_id=42,
+        created_by_id="user-1",
+    )
+    assert isinstance(second, Document)
+    assert second.title == "notes-copy.md"
+
+    # Both rows share the same content_hash but live at distinct paths
+    # (distinct ``unique_identifier_hash``). That's the desired contract.
+    assert first.content_hash == second.content_hash
+    assert first.unique_identifier_hash != second.unique_identifier_hash
+
+
+@pytest.mark.asyncio
+async def test_create_document_still_rejects_path_collision() -> None:
+    """Path uniqueness remains the hard invariant.
+
+    If ``unique_identifier_hash`` already points at an existing row in
+    the same search space, the create call must raise ``ValueError``
+    with a clear message — matching the behavior the commit loop relies
+    on to upsert via the existing-row code path.
+    """
+    session = _FakeSession()
+
+    # Path with no folder parts so ``_ensure_folder_hierarchy`` is a
+    # no-op and the only SELECT executed is the path-collision check.
+    # That SELECT returns an existing doc id, triggering the guard.
+    session.execute = AsyncMock(return_value=_FakeResult(value=99))
+
+    with pytest.raises(ValueError, match="already exists at path"):
+        await kb_persistence._create_document(
+            session,  # type: ignore[arg-type]
+            virtual_path="/documents/notes.md",
+            content="anything",
+            search_space_id=42,
+            created_by_id="user-1",
+        )
+
+
+@pytest.mark.asyncio
+async def test_create_document_does_not_query_for_content_hash_collision(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Regression guard: the legacy second SELECT (content_hash collision
+    pre-check) must be gone. Counting ``execute`` calls is a brittle but
+    effective way to lock that in.
+
+    The current flow runs exactly one ``execute`` for the path-collision
+    SELECT (no folder parts in this path → ``_ensure_folder_hierarchy``
+    short-circuits). If a future refactor reintroduces a content-hash
+    SELECT, this test will fail loud.
+    """
+    session = _FakeSession()
+    await kb_persistence._create_document(
+        session,  # type: ignore[arg-type]
+        virtual_path="/documents/notes.md",
+        content="hello",
+        search_space_id=42,
+        created_by_id="user-1",
+    )
+    # Path-collision SELECT only. No content_hash SELECT.
+    assert session.execute.await_count == 1, (
+        f"Unexpected execute count {session.execute.await_count}; "
+        "did the legacy content_hash collision pre-check get re-added?"
+    )
--- a/surfsense_web/app/globals.css
+++ b/surfsense_web/app/globals.css
@ -210,6 +210,27 @@ button {
 	}
 }

+/* Citation-jump highlight — entrance pulse only. The `SearchHighlightLeaf`
+   (see components/ui/search-highlight-node.tsx) is otherwise statically
+   tinted; this animation runs once on mount to draw the eye to the cited
+   text after `scrollIntoView` lands. The highlight itself is permanent
+   until the user clicks inside the editor (or another dismissal trigger
+   fires in `EditorPanelContent`). */
+@keyframes citation-flash-in {
+	0% {
+		background-color: transparent;
+		box-shadow: 0 0 0 0 transparent;
+	}
+	40% {
+		background-color: color-mix(in oklab, var(--primary) 30%, transparent);
+		box-shadow: 0 0 0 3px color-mix(in oklab, var(--primary) 25%, transparent);
+	}
+	100% {
+		background-color: color-mix(in oklab, var(--primary) 15%, transparent);
+		box-shadow: 0 0 0 1px color-mix(in oklab, var(--primary) 40%, transparent);
+	}
+}
+
 /* Human-in-the-loop approval card animations */
@keyframes pulse-subtle {
 	0%,
--- a/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts
+++ b/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts
@ -0,0 +1,19 @@
+import { atom } from "jotai";
+
+/**
+ * Cross-component handoff for citation jumps. Set by `InlineCitation` when a
+ * numeric chunk badge is clicked (after the document has been resolved); read
+ * by `DocumentTabContent` once the matching document tab mounts so it can
+ * scroll to and softly highlight the cited chunk inside the rendered markdown.
+ *
+ * Cleared by `DocumentTabContent` only after a terminal state — exact /
+ * approximate / miss — has been reached, so that an escalation refetch (2MB
+ * preview → 16MB) keeps the pending intent alive across the re-render.
+ */
+export interface PendingChunkHighlight {
+	documentId: number;
+	chunkId: number;
+	chunkText: string;
+}
+
+export const pendingChunkHighlightAtom = atom<PendingChunkHighlight | null>(null);
--- a/surfsense_web/components/assistant-ui/inline-citation.tsx
+++ b/surfsense_web/components/assistant-ui/inline-citation.tsx
@ -1,26 +1,45 @@
 "use client";

-import { FileText } from "lucide-react";
+import { useQuery, useQueryClient } from "@tanstack/react-query";
+import { useSetAtom } from "jotai";
+import { ExternalLink, FileText } from "lucide-react";
 import type { FC } from "react";
-import { useState } from "react";
+import { useCallback, useEffect, useRef, useState } from "react";
+import { toast } from "sonner";
+import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
+import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
 import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
-import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel";
+import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Citation } from "@/components/tool-ui/citation";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
+import { Spinner } from "@/components/ui/spinner";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+import { documentsApiService } from "@/lib/apis/documents-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";

 interface InlineCitationProps {
 	chunkId: number;
 	isDocsChunk?: boolean;
 }

+const POPOVER_HOVER_CLOSE_DELAY_MS = 150;
+
 /**
- * Inline citation for knowledge-base chunks (numeric chunk IDs).
- * Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel.
- * Negative chunk IDs indicate anonymous/synthetic uploads and render as a static badge.
+ * Inline citation badge for knowledge-base chunks (numeric chunk IDs) and
+ * Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as
+ * a static "doc" pill (anonymous/synthetic uploads).
+ *
+ * Numeric KB chunks: clicking resolves the parent document via
+ * `getDocumentByChunk`, opens the document in the right side panel (alongside
+ * the chat — does not replace it), and stages the cited chunk text in
+ * `pendingChunkHighlightAtom` so `EditorPanelContent` can scroll to and softly
+ * highlight it inside the rendered markdown.
+ *
+ * Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that
+ * lazily fetches and previews the cited chunk inline, since those docs aren't
+ * indexed into the user's search space and have no tab to open.
 */
 export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => {
-	const [isOpen, setIsOpen] = useState(false);
-
 	if (chunkId < 0) {
 		return (
 			<Tooltip>
@ -38,26 +57,185 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk =
 		);
 	}

+	if (isDocsChunk) {
+		return <SurfsenseDocCitation chunkId={chunkId} />;
+	}
+
+	return <NumericChunkCitation chunkId={chunkId} />;
+};
+
+const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
+	const queryClient = useQueryClient();
+	const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
+	const openEditorPanel = useSetAtom(openEditorPanelAtom);
+	const [resolving, setResolving] = useState(false);
+
+	const handleClick = useCallback(async () => {
+		if (resolving) return;
+		setResolving(true);
+		console.log("[citation:click] start", { chunkId });
+		try {
+			const data = await queryClient.fetchQuery({
+				// Local key with explicit window. The shared `cacheKeys.documents.byChunk`
+				// is window-agnostic (latent footgun); namespace the call to avoid
+				// reusing a different-window cached result.
+				queryKey: ["documents", "by-chunk", chunkId, "w0"] as const,
+				queryFn: () =>
+					documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 0 }),
+				staleTime: 5 * 60 * 1000,
+			});
+			const cited = data.chunks.find((c) => c.id === chunkId) ?? data.chunks[0];
+			console.log("[citation:click] fetched doc-by-chunk", {
+				docId: data.id,
+				docTitle: data.title,
+				chunksReturned: data.chunks.length,
+				citedChunkId: cited?.id,
+				citedChunkContentLen: cited?.content?.length ?? 0,
+				citedChunkPreview:
+					cited?.content && cited.content.length > 120
+						? `${cited.content.slice(0, 120)}…(+${cited.content.length - 120})`
+						: (cited?.content ?? ""),
+			});
+			// Stage the highlight BEFORE opening the panel so `EditorPanelContent`
+			// already sees the pending intent on its very first render — avoids a
+			// "fetch → render → no-pending → next-tick render with pending" race.
+			setPendingHighlight({
+				documentId: data.id,
+				chunkId,
+				chunkText: cited?.content ?? "",
+			});
+			openEditorPanel({
+				documentId: data.id,
+				searchSpaceId: data.search_space_id,
+				title: data.title,
+			});
+			console.log("[citation:click] staged highlight + opened editor panel", {
+				documentId: data.id,
+			});
+		} catch (err) {
+			console.warn("[citation:click] failed", err);
+			toast.error(err instanceof Error ? err.message : "Couldn't open cited document");
+		} finally {
+			setResolving(false);
+		}
+	}, [chunkId, openEditorPanel, queryClient, resolving, setPendingHighlight]);
+
 	return (
-		<SourceDetailPanel
-			open={isOpen}
-			onOpenChange={setIsOpen}
-			chunkId={chunkId}
-			sourceType={isDocsChunk ? "SURFSENSE_DOCS" : ""}
-			title={isDocsChunk ? "Surfsense Documentation" : "Source"}
-			description=""
-			url=""
-			isDocsChunk={isDocsChunk}
+		<button
+			type="button"
+			onClick={handleClick}
+			disabled={resolving}
+			className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none disabled:cursor-progress disabled:opacity-70"
+			title={`View source chunk #${chunkId}`}
+			aria-label={`Jump to cited chunk ${chunkId}`}
 		>
-			<button
-				type="button"
-				onClick={() => setIsOpen(true)}
-				className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
-				title={`View source chunk #${chunkId}`}
+			{resolving ? <Spinner size="xs" /> : chunkId}
+		</button>
+	);
+};
+
+const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
+	const [open, setOpen] = useState(false);
+	const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+	const cancelClose = useCallback(() => {
+		if (closeTimerRef.current) {
+			clearTimeout(closeTimerRef.current);
+			closeTimerRef.current = null;
+		}
+	}, []);
+
+	const scheduleClose = useCallback(() => {
+		cancelClose();
+		closeTimerRef.current = setTimeout(() => {
+			setOpen(false);
+			closeTimerRef.current = null;
+		}, POPOVER_HOVER_CLOSE_DELAY_MS);
+	}, [cancelClose]);
+
+	useEffect(() => () => cancelClose(), [cancelClose]);
+
+	const { data, isLoading, error } = useQuery({
+		queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`),
+		queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId),
+		enabled: open,
+		staleTime: 5 * 60 * 1000,
+	});
+
+	const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0];
+
+	return (
+		<Popover open={open} onOpenChange={setOpen}>
+			<PopoverTrigger asChild>
+				<button
+					type="button"
+					onClick={() => setOpen((prev) => !prev)}
+					onMouseEnter={() => {
+						cancelClose();
+						setOpen(true);
+					}}
+					onMouseLeave={scheduleClose}
+					onFocus={() => {
+						cancelClose();
+						setOpen(true);
+					}}
+					onBlur={scheduleClose}
+					className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center gap-0.5 rounded-md bg-primary/10 px-1.5 text-[11px] font-medium text-primary align-baseline shadow-sm transition-colors hover:bg-primary/15 focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
+					aria-label={`Show Surfsense documentation chunk ${chunkId}`}
+					title="Surfsense documentation"
+				>
+					<FileText className="size-3" />
+					doc
+				</button>
+			</PopoverTrigger>
+			<PopoverContent
+				className="w-96 max-w-[calc(100vw-2rem)] p-0"
+				align="start"
+				sideOffset={6}
+				onMouseEnter={cancelClose}
+				onMouseLeave={scheduleClose}
+				onOpenAutoFocus={(e) => e.preventDefault()}
 			>
-				{chunkId}
-			</button>
-		</SourceDetailPanel>
+				<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
+					<div className="min-w-0">
+						<p className="truncate text-sm font-medium">
+							{data?.title ?? "Surfsense documentation"}
+						</p>
+						<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
+					</div>
+					{data?.source && (
+						<a
+							href={data.source}
+							target="_blank"
+							rel="noopener noreferrer"
+							className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
+						>
+							<ExternalLink className="size-3" />
+							Open
+						</a>
+					)}
+				</div>
+				<div className="max-h-72 overflow-auto px-3 py-2 text-sm">
+					{isLoading && (
+						<div className="flex items-center gap-2 py-4 text-muted-foreground">
+							<Spinner size="xs" />
+							<span className="text-xs">Loading…</span>
+						</div>
+					)}
+					{error && (
+						<p className="py-4 text-xs text-destructive">
+							{error instanceof Error ? error.message : "Failed to load chunk"}
+						</p>
+					)}
+					{!isLoading && !error && citedChunk?.content && (
+						<MarkdownViewer content={citedChunk.content} maxLength={1500} />
+					)}
+					{!isLoading && !error && !citedChunk?.content && (
+						<p className="py-4 text-xs text-muted-foreground">No content available.</p>
+					)}
+				</div>
+			</PopoverContent>
+		</Popover>
 	);
 };

--- a/surfsense_web/components/editor-panel/editor-panel.tsx
+++ b/surfsense_web/components/editor-panel/editor-panel.tsx
@ -1,5 +1,6 @@
 "use client";

+import { FindReplacePlugin } from "@platejs/find-replace";
 import { useAtomValue, useSetAtom } from "jotai";
 import {
 	Check,
@ -14,17 +15,21 @@ import {
 import dynamic from "next/dynamic";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
+import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
 import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom";
 import { VersionHistoryButton } from "@/components/documents/version-history";
+import type { PlateEditorInstance } from "@/components/editor/plate-editor";
 import { SourceCodeEditor } from "@/components/editor/source-code-editor";
 import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Alert, AlertDescription } from "@/components/ui/alert";
 import { Button } from "@/components/ui/button";
 import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
+import { CITATION_HIGHLIGHT_CLASS } from "@/components/ui/search-highlight-node";
 import { Spinner } from "@/components/ui/spinner";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
 import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils";
+import { buildCitationSearchCandidates } from "@/lib/citation-search";
 import { inferMonacoLanguageFromPath } from "@/lib/editor-language";

 const PlateEditor = dynamic(
@ -32,7 +37,10 @@ const PlateEditor = dynamic(
 	{ ssr: false, loading: () => <EditorPanelSkeleton /> }
 );

+type CitationHighlightStatus = "exact" | "miss";
+
 const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB
+const CITATION_MAX_LENGTH = 16 * 1024 * 1024; // 16MB on-demand cap for citation jumps

 interface EditorContent {
 	document_id: number;
@ -136,6 +144,61 @@ export function EditorPanelContent({
 	const [displayTitle, setDisplayTitle] = useState(title || "Untitled");
 	const isLocalFileMode = kind === "local_file";
 	const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown";
+
+	// --- Citation-jump highlight wiring ----------------------------------
+	// `EditorPanelContent` is the consumer of `pendingChunkHighlightAtom`: when
+	// a citation badge is clicked, the badge stages `{documentId, chunkId,
+	// chunkText}` and opens this panel. We drive Plate's `FindReplacePlugin`
+	// (registered in every preset) to highlight the cited text natively via
+	// Slate decorations — no DOM walking, no Range gymnastics. The state
+	// machine below escalates the document fetch from 2MB → 16MB once if no
+	// candidate snippet matched in the preview, and surfaces miss outcomes
+	// via an inline alert.
+	const pending = useAtomValue(pendingChunkHighlightAtom);
+	const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
+	const [fetchKey, setFetchKey] = useState(0);
+	const [maxLengthOverride, setMaxLengthOverride] = useState<number | null>(null);
+	const [highlightResult, setHighlightResult] = useState<CitationHighlightStatus | null>(null);
+	const editorRef = useRef<PlateEditorInstance | null>(null);
+	const escalatedForRef = useRef<number | null>(null);
+	const lastAppliedChunkIdRef = useRef<number | null>(null);
+	// Tracks whether a citation highlight is currently decorated in the
+	// editor. We use a ref (not state) because the click-to-dismiss handler
+	// runs in a stable callback that would otherwise close over stale state.
+	const isHighlightActiveRef = useRef(false);
+	// Once a citation jump targets this doc we have to keep `PlateEditor`
+	// mounted for the *rest of the doc session* — even after the highlight
+	// effect clears `pendingChunkHighlightAtom` (which it does as soon as
+	// the decoration is applied, so a follow-up citation on the same chunk
+	// can re-trigger). Without this latch, non-editable docs would re-render
+	// back into `MarkdownViewer` the instant `pending` is released, tearing
+	// down the Plate decorations and dropping the highlight after a frame.
+	const [stickyPlateMode, setStickyPlateMode] = useState(false);
+
+	const clearCitationSearch = useCallback(() => {
+		isHighlightActiveRef.current = false;
+		const editor = editorRef.current;
+		if (!editor) return;
+		try {
+			editor.setOption(FindReplacePlugin, "search", "");
+			editor.api.redecorate();
+		} catch (err) {
+			console.warn("[EditorPanelContent] clearCitationSearch failed:", err);
+		}
+	}, []);
+
+	// Dismiss the highlight when the user interacts with the editor surface.
+	// `onPointerDown` fires before focus / selection changes so the click
+	// itself feels responsive — the highlight clears in the same event tick
+	// that places the cursor. No-op when nothing is highlighted, so we don't
+	// thrash `redecorate` on every click in normal editing.
+	const handleEditorPointerDown = useCallback(() => {
+		if (!isHighlightActiveRef.current) return;
+		clearCitationSearch();
+		setHighlightResult(null);
+	}, [clearCitationSearch]);
+
+	const isCitationTarget = !!pending && !isLocalFileMode && pending.documentId === documentId;
 	const resolveLocalVirtualPath = useCallback(
 		async (candidatePath: string): Promise<string> => {
 			if (!electronAPI?.getAgentFilesystemMounts) {
@ -155,6 +218,8 @@ export function EditorPanelContent({

 	const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD;

+	// `fetchKey` is an explicit re-fetch trigger (escalation bumps it to force
+	// a new request even when documentId/searchSpaceId haven't changed).
 	useEffect(() => {
 		const controller = new AbortController();
 		setIsLoading(true);
@ -166,6 +231,12 @@ export function EditorPanelContent({
 		setIsEditing(false);
 		initialLoadDone.current = false;
 		changeCountRef.current = 0;
+		// Clear any in-flight FindReplacePlugin search before the editor
+		// re-mounts on new content (a fresh editor key is generated below
+		// from documentId + isEditing, so the previous editor + its
+		// decorations are about to be discarded anyway, but we belt-and-
+		// brace here for the case where only `fetchKey` changed).
+		clearCitationSearch();

 		const doFetch = async () => {
 			try {
@ -210,7 +281,11 @@ export function EditorPanelContent({
 				const url = new URL(
 					`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`
 				);
-				url.searchParams.set("max_length", String(LARGE_DOCUMENT_THRESHOLD));
+				url.searchParams.set("max_length", String(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD));
+				// `fetchKey` participates here so biome's noUnusedVariables sees it
+				// as consumed; bumping it forces a fresh request even when the URL
+				// is otherwise identical.
+				if (fetchKey > 0) url.searchParams.set("_n", String(fetchKey));

 				const response = await authenticatedFetch(url.toString(), { method: "GET" });

@ -256,8 +331,259 @@ export function EditorPanelContent({
 		resolveLocalVirtualPath,
 		searchSpaceId,
 		title,
+		fetchKey,
+		maxLengthOverride,
+		clearCitationSearch,
 	]);

+	// Reset citation-jump bookkeeping whenever the panel switches to a different
+	// document (or local file). Body only writes setters — the deps are the
+	// real triggers we want to react to.
+	// biome-ignore lint/correctness/useExhaustiveDependencies: documentId/localFilePath are intentional triggers.
+	useEffect(() => {
+		clearCitationSearch();
+		escalatedForRef.current = null;
+		lastAppliedChunkIdRef.current = null;
+		setHighlightResult(null);
+		setMaxLengthOverride(null);
+		setFetchKey(0);
+		// Drop sticky Plate mode when the panel moves to a different doc
+		// — the next doc starts in its preferred render mode (Plate for
+		// editable, MarkdownViewer for everything else) until/unless a
+		// citation jump targets it.
+		setStickyPlateMode(false);
+	}, [documentId, localFilePath, clearCitationSearch]);
+
+	// Latch sticky Plate mode the first time a citation jump targets this
+	// doc. We keep it sticky for the remainder of this doc session so the
+	// highlight effect's `setPendingHighlight(null)` doesn't unmount the
+	// editor mid-flight (see comment on `stickyPlateMode` declaration).
+	useEffect(() => {
+		if (isCitationTarget) setStickyPlateMode(true);
+	}, [isCitationTarget]);
+
+	// `isEditorReady` is what `useEffect` actually depends on — `editorRef`
+	// is a ref so changes don't trigger re-runs. We flip this to `true` once
+	// `PlateEditor` calls back with its live editor instance (its
+	// `usePlateEditor` value-init runs synchronously, so by the time this
+	// flips true the markdown is already deserialized into the Slate tree).
+	const [isEditorReady, setIsEditorReady] = useState(false);
+	const handleEditorReady = useCallback((editor: PlateEditorInstance | null) => {
+		console.log("[citation:editor] handleEditorReady", { ready: !!editor });
+		editorRef.current = editor;
+		setIsEditorReady(!!editor);
+	}, []);
+
+	// --- Citation jump highlight effect -----------------------------------
+	// Drives Plate's FindReplacePlugin to highlight the cited chunk:
+	//   1. Build candidate snippets from the chunk text (first sentence,
+	//      first 8 words, full chunk if short). Plate's decorate runs per-
+	//      block and won't cross block boundaries, so the shorter
+	//      candidates exist to give us something that fits in one
+	//      paragraph / heading.
+	//   2. For each candidate: setOption('search', ...) → redecorate →
+	//      wait two animation frames for React to flush → query the editor
+	//      DOM for `.${CITATION_HIGHLIGHT_CLASS}`. First hit wins.
+	//
+	//      Why a className and not a `data-*` attribute? Plate's
+	//      `PlateLeaf` runs its props through `useNodeAttributes`, which
+	//      only forwards `attributes`, `className`, `ref`, and `style` —
+	//      arbitrary `data-*` attributes are silently dropped. `className`
+	//      is the only escape hatch guaranteed to survive into the DOM.
+	//   3. On hit: smooth-scroll the first match into view, mark the
+	//      highlight active (so a click inside the editor can dismiss it),
+	//      release the pending atom.
+	//   4. On terminal miss: if the doc was truncated and we haven't
+	//      escalated yet, bump the fetch's `max_length` to the citation
+	//      cap and re-fetch — the post-refetch render will re-run this
+	//      effect against the larger preview. Otherwise, release the
+	//      atom and show the miss alert.
+	useEffect(() => {
+		console.log("[citation:effect] fired", {
+			isCitationTarget,
+			pendingDocId: pending?.documentId,
+			pendingChunkId: pending?.chunkId,
+			pendingChunkTextLen: pending?.chunkText?.length,
+			documentId,
+			isLocalFileMode,
+			isEditing,
+			hasMarkdown: !!editorDoc?.source_markdown,
+			markdownLen: editorDoc?.source_markdown?.length,
+			truncated: editorDoc?.truncated,
+			isEditorReady,
+			editorRefSet: !!editorRef.current,
+			maxLengthOverride,
+		});
+		if (!isCitationTarget || !pending) {
+			console.log("[citation:effect] guard ✗ no citation target / no pending");
+			return;
+		}
+		if (isLocalFileMode || isEditing) {
+			console.log("[citation:effect] guard ✗ localFileMode/editing");
+			return;
+		}
+		if (!editorDoc?.source_markdown) {
+			console.log("[citation:effect] guard ✗ source_markdown not ready");
+			return;
+		}
+		if (!isEditorReady) {
+			console.log("[citation:effect] guard ✗ editor not ready yet");
+			return;
+		}
+		const editor = editorRef.current;
+		if (!editor) {
+			console.log("[citation:effect] guard ✗ editorRef.current is null");
+			return;
+		}
+
+		if (lastAppliedChunkIdRef.current !== pending.chunkId) {
+			lastAppliedChunkIdRef.current = pending.chunkId;
+		}
+
+		let cancelled = false;
+
+		const finishMiss = () => {
+			console.log("[citation:effect] terminal miss — no candidate matched");
+			try {
+				editor.setOption(FindReplacePlugin, "search", "");
+				editor.api.redecorate();
+			} catch (err) {
+				console.warn("[EditorPanelContent] reset search after miss failed:", err);
+			}
+			const canEscalate =
+				editorDoc.truncated === true &&
+				(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD) < CITATION_MAX_LENGTH &&
+				escalatedForRef.current !== pending.chunkId;
+			console.log("[citation:effect] miss decision", {
+				truncated: editorDoc.truncated,
+				currentMaxLength: maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD,
+				canEscalate,
+			});
+			if (canEscalate) {
+				escalatedForRef.current = pending.chunkId;
+				setMaxLengthOverride(CITATION_MAX_LENGTH);
+				setFetchKey((k) => k + 1);
+				// Keep the atom set so the post-refetch render re-runs.
+				return;
+			}
+			setHighlightResult("miss");
+			setPendingHighlight(null);
+		};
+
+		const tryCandidates = async () => {
+			const candidates = buildCitationSearchCandidates(pending.chunkText);
+			console.log("[citation:effect] candidates built", {
+				count: candidates.length,
+				previews: candidates.map((c) => c.slice(0, 60)),
+			});
+			if (candidates.length === 0) {
+				if (!cancelled) finishMiss();
+				return;
+			}
+			// Resolve the editor's rendered DOM root via Slate's stable
+			// `[data-slate-editor="true"]` attribute (set by slate-react's
+			// `<Editable>`). Scoping queries to this root prevents
+			// `<mark>` elements rendered elsewhere on the page (e.g. chat
+			// search-highlight leaves in another mounted PlateEditor) from
+			// being mistaken for citation hits.
+			const editorRoot = document.querySelector<HTMLElement>('[data-slate-editor="true"]');
+			console.log("[citation:effect] editor root", {
+				hasRoot: !!editorRoot,
+			});
+			const root: ParentNode = editorRoot ?? document;
+
+			for (let i = 0; i < candidates.length; i++) {
+				const candidate = candidates[i];
+				if (cancelled) return;
+				try {
+					editor.setOption(FindReplacePlugin, "search", candidate);
+					editor.api.redecorate();
+					console.log(`[citation:effect] try #${i} setOption + redecorate`, {
+						len: candidate.length,
+						preview: candidate.slice(0, 80),
+					});
+				} catch (err) {
+					console.warn("[EditorPanelContent] setOption/redecorate failed:", err);
+					continue;
+				}
+				// Two rAFs: first lets Slate flush its onChange, second lets
+				// React commit the decoration leaves into the DOM.
+				await new Promise<void>((resolve) =>
+					requestAnimationFrame(() => requestAnimationFrame(() => resolve()))
+				);
+				if (cancelled) return;
+				// Primary probe: by our stable class on the rendered <mark>.
+				let el = root.querySelector<HTMLElement>(`.${CITATION_HIGHLIGHT_CLASS}`);
+				const classMarkCount = root.querySelectorAll(`.${CITATION_HIGHLIGHT_CLASS}`).length;
+				// Diagnostic fallback: any <mark> inside the editor root.
+				// If we ever see allMarks > 0 but classMarkCount === 0,
+				// the className was stripped again and we need to revisit
+				// `useNodeAttributes` filtering.
+				const allMarkCount = root.querySelectorAll("mark").length;
+				if (!el && allMarkCount > 0) {
+					el = root.querySelector<HTMLElement>("mark");
+				}
+				console.log(`[citation:effect] try #${i} DOM probe`, {
+					foundEl: !!el,
+					classMarkCount,
+					allMarkCount,
+					usedFallback: !!el && classMarkCount === 0,
+				});
+				if (el) {
+					try {
+						el.scrollIntoView({ block: "center", behavior: "smooth" });
+					} catch {
+						el.scrollIntoView();
+					}
+					isHighlightActiveRef.current = true;
+					setHighlightResult("exact");
+					console.log(`[citation:effect] ✓ exact via candidate #${i} — atom released`);
+					// No auto-clear timer — the highlight is intentionally
+					// permanent until the user clicks inside the editor (see
+					// `handleEditorPointerDown`) or another dismissal trigger
+					// fires (doc switch, edit-mode toggle, panel unmount,
+					// next citation jump). Sticky Plate mode keeps the
+					// editor mounted after the atom clears.
+					setPendingHighlight(null);
+					return;
+				}
+			}
+			if (!cancelled) finishMiss();
+		};
+
+		void tryCandidates();
+
+		return () => {
+			cancelled = true;
+		};
+	}, [
+		isCitationTarget,
+		pending,
+		documentId,
+		editorDoc?.source_markdown,
+		editorDoc?.truncated,
+		isLocalFileMode,
+		isEditing,
+		isEditorReady,
+		maxLengthOverride,
+		clearCitationSearch,
+		setPendingHighlight,
+	]);
+
+	// Cleanup any active highlight on unmount.
+	useEffect(() => {
+		return () => clearCitationSearch();
+	}, [clearCitationSearch]);
+
+	// Toggling into edit mode swaps Plate out of readOnly. Clear the citation
+	// search so stale leaves don't linger in the editing surface.
+	useEffect(() => {
+		if (isEditing) {
+			clearCitationSearch();
+			setHighlightResult(null);
+		}
+	}, [isEditing, clearCitationSearch]);
+
 	useEffect(() => {
 		return () => {
 			if (copyResetTimeoutRef.current) {
@ -367,6 +693,15 @@ export function EditorPanelContent({
 				EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) &&
 			!isLargeDocument
 		: false;
+	// Use PlateEditor for any of:
+	//   - Editable doc types (FILE/NOTE) — existing editing UX.
+	//   - Active citation jump in flight (`isCitationTarget`) — covers the
+	//     mount in the very first render where the atom is set but the
+	//     sticky effect hasn't fired yet.
+	//   - Sticky Plate mode latched on a previous citation jump — keeps
+	//     the editor mounted (with its decorations) after the highlight
+	//     effect clears the atom. Resets when the doc changes.
+	const renderInPlateEditor = isEditableType || isCitationTarget || stickyPlateMode;
 	const hasUnsavedChanges = editedMarkdown !== null;
 	const showDesktopHeader = !!onClose;
 	const showEditingActions = isEditableType && isEditing;
@ -381,6 +716,90 @@ export function EditorPanelContent({
 		setIsEditing(false);
 	}, [editorDoc?.source_markdown]);

+	const handleDownloadMarkdown = useCallback(async () => {
+		if (!searchSpaceId || !documentId) return;
+		setDownloading(true);
+		try {
+			const response = await authenticatedFetch(
+				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
+				{ method: "GET" }
+			);
+			if (!response.ok) throw new Error("Download failed");
+			const blob = await response.blob();
+			const url = URL.createObjectURL(blob);
+			const a = document.createElement("a");
+			a.href = url;
+			const disposition = response.headers.get("content-disposition");
+			const match = disposition?.match(/filename="(.+)"/);
+			a.download = match?.[1] ?? `${editorDoc?.title || "document"}.md`;
+			document.body.appendChild(a);
+			a.click();
+			a.remove();
+			URL.revokeObjectURL(url);
+			toast.success("Download started");
+		} catch {
+			toast.error("Failed to download document");
+		} finally {
+			setDownloading(false);
+		}
+	}, [documentId, editorDoc?.title, searchSpaceId]);
+
+	// We no longer surface an "approximate" status — Plate's FindReplacePlugin
+	// either decorates an exact match or it doesn't, and the candidate snippet
+	// strategy (first sentence → first 8 words → full chunk) means we either
+	// land on the citation start or fall through to the miss alert.
+	const showMissAlert = isCitationTarget && highlightResult === "miss";
+
+	const citationAlerts = showMissAlert && (
+		<Alert variant="destructive" className="mb-4">
+			<FileQuestionMark className="size-4" />
+			<AlertDescription className="flex items-center justify-between gap-4">
+				<span>Cited section couldn&apos;t be located in this view.</span>
+				{editorDoc?.truncated && (
+					<Button
+						variant="outline"
+						size="sm"
+						className="relative shrink-0"
+						disabled={downloading}
+						onClick={handleDownloadMarkdown}
+					>
+						<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
+							<Download className="size-3.5" />
+							Download .md
+						</span>
+						{downloading && <Spinner size="sm" className="absolute" />}
+					</Button>
+				)}
+			</AlertDescription>
+		</Alert>
+	);
+
+	const largeDocAlert = isLargeDocument && !isLocalFileMode && editorDoc && (
+		<Alert className="mb-4">
+			<FileText className="size-4" />
+			<AlertDescription className="flex items-center justify-between gap-4">
+				<span>
+					This document is too large for the editor (
+					{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
+					{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
+				</span>
+				<Button
+					variant="outline"
+					size="sm"
+					className="relative shrink-0"
+					disabled={downloading}
+					onClick={handleDownloadMarkdown}
+				>
+					<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
+						<Download className="size-3.5" />
+						Download .md
+					</span>
+					{downloading && <Spinner size="sm" className="absolute" />}
+				</Button>
+			</AlertDescription>
+		</Alert>
+	);
+
 	return (
 		<>
 			{showDesktopHeader ? (
@ -565,61 +984,6 @@ export function EditorPanelContent({
 							</p>
 						</div>
 					</div>
-				) : isLargeDocument && !isLocalFileMode ? (
-					<div className="h-full overflow-y-auto px-5 py-4">
-						<Alert className="mb-4">
-							<FileText className="size-4" />
-							<AlertDescription className="flex items-center justify-between gap-4">
-								<span>
-									This document is too large for the editor (
-									{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
-									{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
-								</span>
-								<Button
-									variant="outline"
-									size="sm"
-									className="relative shrink-0"
-									disabled={downloading}
-									onClick={async () => {
-										setDownloading(true);
-										try {
-											if (!searchSpaceId || !documentId) {
-												throw new Error("Missing document context");
-											}
-											const response = await authenticatedFetch(
-												`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
-												{ method: "GET" }
-											);
-											if (!response.ok) throw new Error("Download failed");
-											const blob = await response.blob();
-											const url = URL.createObjectURL(blob);
-											const a = document.createElement("a");
-											a.href = url;
-											const disposition = response.headers.get("content-disposition");
-											const match = disposition?.match(/filename="(.+)"/);
-											a.download = match?.[1] ?? `${editorDoc.title || "document"}.md`;
-											document.body.appendChild(a);
-											a.click();
-											a.remove();
-											URL.revokeObjectURL(url);
-											toast.success("Download started");
-										} catch {
-											toast.error("Failed to download document");
-										} finally {
-											setDownloading(false);
-										}
-									}}
-								>
-									<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
-										<Download className="size-3.5" />
-										Download .md
-									</span>
-									{downloading && <Spinner size="sm" className="absolute" />}
-								</Button>
-							</AlertDescription>
-						</Alert>
-						<MarkdownViewer content={editorDoc.source_markdown} />
-					</div>
 				) : editorRenderMode === "source_code" ? (
 					<div className="h-full overflow-hidden">
 						<SourceCodeEditor
@ -638,20 +1002,46 @@ export function EditorPanelContent({
 							}}
 						/>
 					</div>
-				) : isEditableType ? (
-					<PlateEditor
-						key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
-						preset="full"
-						markdown={editorDoc.source_markdown}
-						onMarkdownChange={handleMarkdownChange}
-						readOnly={!isEditing}
-						placeholder="Start writing..."
-						editorVariant="default"
-						allowModeToggle={false}
-						reserveToolbarSpace
-						defaultEditing={isEditing}
-						className="[&_[role=toolbar]]:!bg-sidebar"
-					/>
+				) : isLargeDocument && !isLocalFileMode && !isCitationTarget ? (
+					// Large doc, no active citation — fast Streamdown preview
+					// + download CTA. We only fall back to MarkdownViewer here
+					// because Plate is heavy on multi-MB docs and the user
+					// isn't waiting on a specific citation to render.
+					<div className="h-full overflow-y-auto px-5 py-4">
+						{largeDocAlert}
+						<MarkdownViewer content={editorDoc.source_markdown} />
+					</div>
+				) : renderInPlateEditor ? (
+					// Editable doc (FILE/NOTE) OR active citation jump (any
+					// doc type). The citation path uses Plate's
+					// FindReplacePlugin for native, decoration-based
+					// highlighting — see the citation-jump highlight effect
+					// above for how `editorRef` and `handleEditorReady` are
+					// wired.
+					<div className="flex h-full min-h-0 flex-col">
+						{(citationAlerts || (isLargeDocument && isCitationTarget && !isLocalFileMode)) && (
+							<div className="shrink-0 px-5 pt-4">
+								{isLargeDocument && isCitationTarget && largeDocAlert}
+								{citationAlerts}
+							</div>
+						)}
+						<div className="flex-1 min-h-0 overflow-hidden" onPointerDown={handleEditorPointerDown}>
+							<PlateEditor
+								key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
+								preset="full"
+								markdown={editorDoc.source_markdown}
+								onMarkdownChange={handleMarkdownChange}
+								readOnly={!isEditing}
+								placeholder="Start writing..."
+								editorVariant="default"
+								allowModeToggle={false}
+								reserveToolbarSpace
+								defaultEditing={isEditing}
+								className="[&_[role=toolbar]]:!bg-sidebar"
+								onEditorReady={handleEditorReady}
+							/>
+						</div>
+					</div>
 				) : (
 					<div className="h-full overflow-y-auto px-5 py-4">
 						<MarkdownViewer content={editorDoc.source_markdown} />
--- a/surfsense_web/components/editor/plate-editor.tsx
+++ b/surfsense_web/components/editor/plate-editor.tsx
@ -12,6 +12,12 @@ import { type EditorPreset, presetMap } from "@/components/editor/presets";
 import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
 import { Editor, EditorContainer } from "@/components/ui/editor";

+/** Live editor instance returned by `usePlateEditor`. Exposed via the
+ * `onEditorReady` prop so callers (e.g. `EditorPanelContent`) can drive
+ * plugin options imperatively — most notably setting
+ * `FindReplacePlugin`'s `search` option for citation-jump highlights. */
+export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
+
 export interface PlateEditorProps {
 	/** Markdown string to load as initial content */
 	markdown?: string;
@ -62,6 +68,15 @@ export interface PlateEditorProps {
 	 * without modifying the core editor component.
 	 */
 	extraPlugins?: AnyPluginConfig[];
+	/**
+	 * Called whenever the live editor instance (re)mounts, with `null` on
+	 * unmount. Used by callers that need to drive plugin options imperatively
+	 * — e.g. `EditorPanelContent` setting `FindReplacePlugin`'s `search`
+	 * option for citation-jump highlights. The callback is invoked exactly
+	 * once per editor lifetime (the parent's `key` prop forces a fresh
+	 * editor when needed, e.g. on edit-mode toggle).
+	 */
+	onEditorReady?: (editor: PlateEditorInstance | null) => void;
 }

 function PlateEditorContent({
@ -100,6 +115,7 @@ export function PlateEditor({
 	defaultEditing = false,
 	preset = "full",
 	extraPlugins = [],
+	onEditorReady,
 }: PlateEditorProps) {
 	const lastMarkdownRef = useRef(markdown);
 	const lastHtmlRef = useRef(html);
@ -156,6 +172,21 @@ export function PlateEditor({
 				: undefined,
 	});

+	// Expose the live editor instance to imperative callers (e.g. citation
+	// jump highlights). We deliberately don't depend on `onEditorReady`
+	// itself in the cleanup closure — callers commonly pass an arrow that
+	// closes over a stable ref setter, but if they pass a freshly-bound
+	// callback per render, the `onEditorReady?.(editor)` re-fires which is
+	// idempotent for ref-style setters.
+	const onEditorReadyRef = useRef(onEditorReady);
+	useEffect(() => {
+		onEditorReadyRef.current = onEditorReady;
+	}, [onEditorReady]);
+	useEffect(() => {
+		onEditorReadyRef.current?.(editor);
+		return () => onEditorReadyRef.current?.(null);
+	}, [editor]);
+
 	// Update editor content when html prop changes externally
 	useEffect(() => {
 		if (html !== undefined && html !== lastHtmlRef.current) {
--- a/surfsense_web/components/editor/presets.ts
+++ b/surfsense_web/components/editor/presets.ts
@ -1,5 +1,6 @@
 "use client";

+import { FindReplacePlugin } from "@platejs/find-replace";
 import type { AnyPluginConfig } from "platejs";
 import { TrailingBlockPlugin } from "platejs";

@ -17,6 +18,30 @@ import { SelectionKit } from "@/components/editor/plugins/selection-kit";
 import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit";
 import { TableKit } from "@/components/editor/plugins/table-kit";
 import { ToggleKit } from "@/components/editor/plugins/toggle-kit";
+import { SearchHighlightLeaf } from "@/components/ui/search-highlight-node";
+
+/**
+ * Citation-jump highlighter. Re-uses Plate's built-in `FindReplacePlugin`
+ * (decorate-only, no editing surface) to drive the "scroll-to-cited-text"
+ * UX in `EditorPanelContent`. We register it in every preset because:
+ *   - Decorate is a no-op when `search` is empty (single getOptions() check
+ *     per block), so cost is effectively zero for non-citation viewers.
+ *   - Keeping it preset-agnostic means citations work whether the doc is
+ *     opened in editable (`full`) or pure-viewer (`readonly`) modes.
+ *
+ * The parent component drives `setOption(FindReplacePlugin, 'search', ...)`
+ * + `editor.api.redecorate()` to trigger highlights, then queries the
+ * editor DOM for `.citation-highlight-leaf` to scroll the first match
+ * into view. (We can't use a `data-*` attribute here — Plate's
+ * `PlateLeaf` runs props through `useNodeAttributes`, which only forwards
+ * `attributes`, `className`, `ref`, `style`; arbitrary `data-*` props are
+ * silently dropped.) See `components/ui/search-highlight-node.tsx` for
+ * the leaf component and `CITATION_HIGHLIGHT_CLASS` constant.
+ */
+const CitationFindReplacePlugin = FindReplacePlugin.configure({
+	options: { search: "" },
+	render: { node: SearchHighlightLeaf },
+});

 /**
 * Full preset – every plugin kit enabled.
@ -38,6 +63,7 @@ export const fullPreset: AnyPluginConfig[] = [
 	...AutoformatKit,
 	...DndKit,
 	TrailingBlockPlugin,
+	CitationFindReplacePlugin,
 ];

 /**
@ -52,6 +78,7 @@ export const minimalPreset: AnyPluginConfig[] = [
 	...LinkKit,
 	...AutoformatKit,
 	TrailingBlockPlugin,
+	CitationFindReplacePlugin,
 ];

 /**
@ -68,6 +95,7 @@ export const readonlyPreset: AnyPluginConfig[] = [
 	...CalloutKit,
 	...ToggleKit,
 	...MathKit,
+	CitationFindReplacePlugin,
 ];

 /** All available preset names */
--- a/surfsense_web/components/new-chat/source-detail-panel.tsx
+++ b/surfsense_web/components/new-chat/source-detail-panel.tsx
@ -1,719 +0,0 @@
-"use client";
-
-import { useQuery } from "@tanstack/react-query";
-import {
-	BookOpen,
-	ChevronDown,
-	ChevronUp,
-	ExternalLink,
-	FileQuestionMark,
-	FileText,
-	Hash,
-	Loader2,
-	Sparkles,
-	X,
-} from "lucide-react";
-import { AnimatePresence, motion, useReducedMotion } from "motion/react";
-import { useTranslations } from "next-intl";
-import type React from "react";
-import { forwardRef, memo, type ReactNode, useCallback, useEffect, useRef, useState } from "react";
-import { createPortal } from "react-dom";
-import { MarkdownViewer } from "@/components/markdown-viewer";
-import { Badge } from "@/components/ui/badge";
-import { Button } from "@/components/ui/button";
-import { ScrollArea } from "@/components/ui/scroll-area";
-import { Spinner } from "@/components/ui/spinner";
-import type {
-	GetDocumentByChunkResponse,
-	GetSurfsenseDocsByChunkResponse,
-} from "@/contracts/types/document.types";
-import { documentsApiService } from "@/lib/apis/documents-api.service";
-import { cacheKeys } from "@/lib/query-client/cache-keys";
-import { cn } from "@/lib/utils";
-
-type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse;
-
-interface SourceDetailPanelProps {
-	open: boolean;
-	onOpenChange: (open: boolean) => void;
-	chunkId: number;
-	sourceType: string;
-	title: string;
-	description?: string;
-	url?: string;
-	children?: ReactNode;
-	isDocsChunk?: boolean;
-}
-
-const formatDocumentType = (type: string) => {
-	if (!type) return "";
-	return type
-		.split("_")
-		.map((word) => word.charAt(0) + word.slice(1).toLowerCase())
-		.join(" ");
-};
-
-// Chunk card component
-// For large documents (>30 chunks), we disable animation to prevent layout shifts
-// which break auto-scroll functionality
-interface ChunkCardProps {
-	chunk: { id: number; content: string };
-	localIndex: number;
-	chunkNumber: number;
-	totalChunks: number;
-	isCited: boolean;
-	isActive: boolean;
-	disableLayoutAnimation?: boolean;
-}
-
-const ChunkCard = memo(
-	forwardRef<HTMLDivElement, ChunkCardProps>(
-		({ chunk, localIndex, chunkNumber, totalChunks, isCited }, ref) => {
-			return (
-				<div
-					ref={ref}
-					data-chunk-index={localIndex}
-					className={cn(
-						"group relative rounded-2xl border-2 transition-all duration-300",
-						isCited
-							? "bg-linear-to-br from-primary/5 via-primary/10 to-primary/5 border-primary shadow-lg shadow-primary/10"
-							: "bg-card border-border/50 hover:border-border hover:shadow-md"
-					)}
-				>
-					{isCited && <div className="absolute inset-0 rounded-2xl bg-primary/5 blur-xl -z-10" />}
-
-					<div className="flex items-center justify-between px-5 py-4 border-b border-border/50">
-						<div className="flex items-center gap-3">
-							<div
-								className={cn(
-									"flex items-center justify-center w-8 h-8 rounded-full text-sm font-semibold transition-colors",
-									isCited
-										? "bg-primary text-primary-foreground"
-										: "bg-muted text-muted-foreground group-hover:bg-muted/80"
-								)}
-							>
-								{chunkNumber}
-							</div>
-							<span className="text-sm text-muted-foreground">
-								Chunk {chunkNumber} of {totalChunks}
-							</span>
-						</div>
-						{isCited && (
-							<Badge variant="default" className="gap-1.5 px-3 py-1">
-								<Sparkles className="h-3 w-3" />
-								Cited Source
-							</Badge>
-						)}
-					</div>
-
-					<div className="p-5 overflow-hidden">
-						<MarkdownViewer content={chunk.content} maxLength={100_000} />
-					</div>
-				</div>
-			);
-		}
-	)
-);
-ChunkCard.displayName = "ChunkCard";
-
-export function SourceDetailPanel({
-	open,
-	onOpenChange,
-	chunkId,
-	sourceType,
-	title,
-	description,
-	url,
-	children,
-	isDocsChunk = false,
-}: SourceDetailPanelProps) {
-	const t = useTranslations("dashboard");
-	const scrollAreaRef = useRef<HTMLDivElement>(null);
-	const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
-	const scrollTimersRef = useRef<ReturnType<typeof setTimeout>[]>([]);
-	const [activeChunkIndex, setActiveChunkIndex] = useState<number | null>(null);
-	const [mounted, setMounted] = useState(false);
-	const shouldReduceMotion = useReducedMotion();
-
-	useEffect(() => {
-		setMounted(true);
-	}, []);
-
-	const {
-		data: documentData,
-		isLoading: isDocumentByChunkFetching,
-		error: documentByChunkFetchingError,
-	} = useQuery<DocumentData>({
-		queryKey: isDocsChunk
-			? cacheKeys.documents.byChunk(`doc-${chunkId}`)
-			: cacheKeys.documents.byChunk(chunkId.toString()),
-		queryFn: async () => {
-			if (isDocsChunk) {
-				return documentsApiService.getSurfsenseDocByChunk(chunkId);
-			}
-			return documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 5 });
-		},
-		enabled: !!chunkId && open,
-		staleTime: 5 * 60 * 1000,
-	});
-
-	const totalChunks =
-		documentData && "total_chunks" in documentData
-			? (documentData.total_chunks ?? documentData.chunks.length)
-			: (documentData?.chunks?.length ?? 0);
-	const [beforeChunks, setBeforeChunks] = useState<
-		Array<{ id: number; content: string; created_at: string }>
-	>([]);
-	const [afterChunks, setAfterChunks] = useState<
-		Array<{ id: number; content: string; created_at: string }>
-	>([]);
-	const [loadingBefore, setLoadingBefore] = useState(false);
-	const [loadingAfter, setLoadingAfter] = useState(false);
-
-	useEffect(() => {
-		setBeforeChunks([]);
-		setAfterChunks([]);
-	}, [chunkId, open]);
-
-	const chunkStartIndex =
-		documentData && "chunk_start_index" in documentData ? (documentData.chunk_start_index ?? 0) : 0;
-	const initialChunks = documentData?.chunks ?? [];
-	const allChunks = [...beforeChunks, ...initialChunks, ...afterChunks];
-	const absoluteStart = chunkStartIndex - beforeChunks.length;
-	const absoluteEnd = chunkStartIndex + initialChunks.length + afterChunks.length;
-	const canLoadBefore = absoluteStart > 0;
-	const canLoadAfter = absoluteEnd < totalChunks;
-
-	const EXPAND_SIZE = 10;
-
-	const loadBefore = useCallback(async () => {
-		if (!documentData || !("search_space_id" in documentData) || !canLoadBefore) return;
-		setLoadingBefore(true);
-		try {
-			const count = Math.min(EXPAND_SIZE, absoluteStart);
-			const result = await documentsApiService.getDocumentChunks({
-				document_id: documentData.id,
-				page: 0,
-				page_size: count,
-				start_offset: absoluteStart - count,
-			});
-			const existingIds = new Set(allChunks.map((c) => c.id));
-			const newChunks = result.items
-				.filter((c) => !existingIds.has(c.id))
-				.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
-			setBeforeChunks((prev) => [...newChunks, ...prev]);
-		} catch (err) {
-			console.error("Failed to load earlier chunks:", err);
-		} finally {
-			setLoadingBefore(false);
-		}
-	}, [documentData, absoluteStart, canLoadBefore, allChunks]);
-
-	const loadAfter = useCallback(async () => {
-		if (!documentData || !("search_space_id" in documentData) || !canLoadAfter) return;
-		setLoadingAfter(true);
-		try {
-			const result = await documentsApiService.getDocumentChunks({
-				document_id: documentData.id,
-				page: 0,
-				page_size: EXPAND_SIZE,
-				start_offset: absoluteEnd,
-			});
-			const existingIds = new Set(allChunks.map((c) => c.id));
-			const newChunks = result.items
-				.filter((c) => !existingIds.has(c.id))
-				.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
-			setAfterChunks((prev) => [...prev, ...newChunks]);
-		} catch (err) {
-			console.error("Failed to load later chunks:", err);
-		} finally {
-			setLoadingAfter(false);
-		}
-	}, [documentData, absoluteEnd, canLoadAfter, allChunks]);
-
-	const isDirectRenderSource =
-		sourceType === "TAVILY_API" ||
-		sourceType === "LINKUP_API" ||
-		sourceType === "SEARXNG_API" ||
-		sourceType === "BAIDU_SEARCH_API";
-
-	const citedChunkIndex = allChunks.findIndex((chunk) => chunk.id === chunkId);
-
-	// Simple scroll function that scrolls to a chunk by index
-	const scrollToChunkByIndex = useCallback(
-		(chunkIndex: number, smooth = true) => {
-			const scrollContainer = scrollAreaRef.current;
-			if (!scrollContainer) return;
-
-			const viewport = scrollContainer.querySelector(
-				"[data-radix-scroll-area-viewport]"
-			) as HTMLElement | null;
-			if (!viewport) return;
-
-			const chunkElement = scrollContainer.querySelector(
-				`[data-chunk-index="${chunkIndex}"]`
-			) as HTMLElement | null;
-			if (!chunkElement) return;
-
-			// Get positions using getBoundingClientRect for accuracy
-			const viewportRect = viewport.getBoundingClientRect();
-			const chunkRect = chunkElement.getBoundingClientRect();
-
-			// Calculate where to scroll to center the chunk
-			const currentScrollTop = viewport.scrollTop;
-			const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
-			const scrollTarget =
-				chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
-
-			viewport.scrollTo({
-				top: Math.max(0, scrollTarget),
-				behavior: smooth && !shouldReduceMotion ? "smooth" : "auto",
-			});
-
-			setActiveChunkIndex(chunkIndex);
-		},
-		[shouldReduceMotion]
-	);
-
-	// Callback ref for the cited chunk - scrolls when the element mounts
-	const citedChunkRefCallback = useCallback(
-		(node: HTMLDivElement | null) => {
-			if (node && !hasScrolledRef.current && open) {
-				hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls
-
-				// Store the node reference for the delayed scroll
-				const scrollToCitedChunk = () => {
-					const scrollContainer = scrollAreaRef.current;
-					if (!scrollContainer || !node.isConnected) return false;
-
-					const viewport = scrollContainer.querySelector(
-						"[data-radix-scroll-area-viewport]"
-					) as HTMLElement | null;
-					if (!viewport) return false;
-
-					// Get positions
-					const viewportRect = viewport.getBoundingClientRect();
-					const chunkRect = node.getBoundingClientRect();
-
-					// Calculate scroll position to center the chunk
-					const currentScrollTop = viewport.scrollTop;
-					const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
-					const scrollTarget =
-						chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
-
-					viewport.scrollTo({
-						top: Math.max(0, scrollTarget),
-						behavior: "auto", // Instant scroll for initial positioning
-					});
-
-					return true;
-				};
-
-				// Scroll multiple times with delays to handle progressive content rendering
-				// Each subsequent scroll will correct for any layout shifts
-				const scrollAttempts = [50, 150, 300, 600, 1000];
-
-				scrollAttempts.forEach((delay) => {
-					scrollTimersRef.current.push(
-						setTimeout(() => {
-							scrollToCitedChunk();
-						}, delay)
-					);
-				});
-
-				// After final attempt, mark the cited chunk as active
-				scrollTimersRef.current.push(
-					setTimeout(
-						() => {
-							setActiveChunkIndex(citedChunkIndex);
-						},
-						scrollAttempts[scrollAttempts.length - 1] + 50
-					)
-				);
-			}
-		},
-		[open, citedChunkIndex]
-	);
-
-	// Reset scroll state when panel closes
-	useEffect(() => {
-		if (!open) {
-			scrollTimersRef.current.forEach(clearTimeout);
-			scrollTimersRef.current = [];
-			hasScrolledRef.current = false;
-			setActiveChunkIndex(null);
-		}
-		return () => {
-			scrollTimersRef.current.forEach(clearTimeout);
-			scrollTimersRef.current = [];
-		};
-	}, [open]);
-
-	// Handle escape key
-	useEffect(() => {
-		const handleEscape = (e: KeyboardEvent) => {
-			if (e.key === "Escape" && open) {
-				onOpenChange(false);
-			}
-		};
-		window.addEventListener("keydown", handleEscape);
-		return () => window.removeEventListener("keydown", handleEscape);
-	}, [open, onOpenChange]);
-
-	// Prevent body scroll when open
-	useEffect(() => {
-		if (open) {
-			document.body.style.overflow = "hidden";
-		} else {
-			document.body.style.overflow = "";
-		}
-		return () => {
-			document.body.style.overflow = "";
-		};
-	}, [open]);
-
-	const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => {
-		e.preventDefault();
-		e.stopPropagation();
-		window.open(clickUrl, "_blank", "noopener,noreferrer");
-	};
-
-	const scrollToChunk = useCallback(
-		(index: number) => {
-			scrollToChunkByIndex(index, true);
-		},
-		[scrollToChunkByIndex]
-	);
-
-	const panelContent = (
-		<AnimatePresence mode="wait">
-			{open && (
-				<>
-					{/* Backdrop */}
-					<motion.div
-						key="backdrop"
-						initial={{ opacity: 0 }}
-						animate={{ opacity: 1 }}
-						exit={{ opacity: 0 }}
-						transition={{ duration: 0.2 }}
-						className="fixed inset-0 z-50 bg-black/60 backdrop-blur-sm"
-						onClick={() => onOpenChange(false)}
-					/>
-
-					{/* Panel */}
-					<motion.div
-						key="panel"
-						initial={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
-						animate={{ opacity: 1, scale: 1, y: 0 }}
-						exit={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
-						transition={{
-							type: "spring",
-							damping: 30,
-							stiffness: 300,
-						}}
-						className="fixed inset-3 sm:inset-6 md:inset-10 lg:inset-16 z-50 flex flex-col bg-background rounded-3xl shadow-2xl border overflow-hidden"
-					>
-						{/* Header */}
-						<motion.div
-							initial={{ opacity: 0, y: -10 }}
-							animate={{ opacity: 1, y: 0 }}
-							transition={{ delay: 0.1 }}
-							className="flex items-center justify-between px-6 py-5 border-b bg-linear-to-r from-muted/50 to-muted/30"
-						>
-							<div className="min-w-0 flex-1">
-								<h2 className="text-xl font-semibold truncate">
-									{documentData?.title || title || "Source Document"}
-								</h2>
-								<p className="text-sm text-muted-foreground mt-0.5">
-									{documentData && "document_type" in documentData
-										? formatDocumentType(documentData.document_type)
-										: sourceType && formatDocumentType(sourceType)}
-									{totalChunks > 0 && (
-										<span className="ml-2">
-											• {totalChunks} chunk{totalChunks !== 1 ? "s" : ""}
-											{allChunks.length < totalChunks && ` (showing ${allChunks.length})`}
-										</span>
-									)}
-								</p>
-							</div>
-							<div className="flex items-center gap-3 shrink-0">
-								{url && (
-									<Button
-										size="sm"
-										variant="outline"
-										onClick={(e) => handleUrlClick(e, url)}
-										className="hidden sm:flex gap-2 rounded-xl"
-									>
-										<ExternalLink className="h-4 w-4" />
-										Open Source
-									</Button>
-								)}
-								<Button
-									size="icon"
-									variant="ghost"
-									onClick={() => onOpenChange(false)}
-									className="h-8 w-8 rounded-full"
-								>
-									<X className="h-4 w-4" />
-									<span className="sr-only">Close</span>
-								</Button>
-							</div>
-						</motion.div>
-
-						{/* Loading State */}
-						{!isDirectRenderSource && isDocumentByChunkFetching && (
-							<div className="flex-1 flex items-center justify-center">
-								<motion.div
-									initial={{ opacity: 0, scale: 0.9 }}
-									animate={{ opacity: 1, scale: 1 }}
-									className="flex flex-col items-center gap-4"
-								>
-									<Spinner size="lg" />
-									<p className="text-sm text-muted-foreground font-medium">
-										{t("loading_document")}
-									</p>
-								</motion.div>
-							</div>
-						)}
-
-						{/* Error State */}
-						{!isDirectRenderSource && documentByChunkFetchingError && (
-							<div className="flex-1 flex items-center justify-center">
-								<motion.div
-									initial={{ opacity: 0, scale: 0.9 }}
-									animate={{ opacity: 1, scale: 1 }}
-									className="flex flex-col items-center gap-4 text-center px-6"
-								>
-									<div className="w-20 h-20 rounded-full bg-muted/50 flex items-center justify-center">
-										<FileQuestionMark className="h-10 w-10 text-muted-foreground" />
-									</div>
-									<div>
-										<p className="font-semibold text-foreground text-lg">Document unavailable</p>
-										<p className="text-sm text-muted-foreground mt-2 max-w-md">
-											{documentByChunkFetchingError.message ||
-												"An unexpected error occurred. Please try again."}
-										</p>
-									</div>
-									<Button variant="outline" onClick={() => onOpenChange(false)} className="mt-2">
-										Close Panel
-									</Button>
-								</motion.div>
-							</div>
-						)}
-
-						{/* Direct render for web search providers */}
-						{isDirectRenderSource && (
-							<ScrollArea className="flex-1">
-								<div className="p-6 max-w-3xl mx-auto">
-									{url && (
-										<Button
-											size="default"
-											variant="outline"
-											onClick={(e) => handleUrlClick(e, url)}
-											className="w-full mb-6 sm:hidden rounded-xl"
-										>
-											<ExternalLink className="mr-2 h-4 w-4" />
-											Open in Browser
-										</Button>
-									)}
-									<motion.div
-										initial={{ opacity: 0, y: 10 }}
-										animate={{ opacity: 1, y: 0 }}
-										className="p-6 bg-muted/50 rounded-2xl border"
-									>
-										<h3 className="text-base font-semibold mb-4 flex items-center gap-2">
-											<BookOpen className="h-4 w-4" />
-											Source Information
-										</h3>
-										<div className="text-sm text-muted-foreground mb-3 font-medium">
-											{title || "Untitled"}
-										</div>
-										<div className="text-sm text-foreground leading-relaxed">
-											{description || "No content available"}
-										</div>
-									</motion.div>
-								</div>
-							</ScrollArea>
-						)}
-
-						{/* API-fetched document content */}
-						{!isDirectRenderSource && documentData && (
-							<div className="flex-1 flex overflow-hidden">
-								{/* Chunk Navigation Sidebar */}
-								{allChunks.length > 1 && (
-									<motion.div
-										initial={{ opacity: 0, x: -20 }}
-										animate={{ opacity: 1, x: 0 }}
-										transition={{ delay: 0.2 }}
-										className="hidden lg:flex flex-col w-16 border-r bg-muted/10 overflow-hidden"
-									>
-										<ScrollArea className="flex-1 h-full">
-											<div className="p-2 pt-3 flex flex-col gap-1.5">
-												{allChunks.map((chunk, idx) => {
-													const absNum = absoluteStart + idx + 1;
-													const isCited = chunk.id === chunkId;
-													const isActive = activeChunkIndex === idx;
-													return (
-														<motion.button
-															key={chunk.id}
-															type="button"
-															onClick={() => scrollToChunk(idx)}
-															initial={{ opacity: 0, scale: 0.8 }}
-															animate={{ opacity: 1, scale: 1 }}
-															transition={{ delay: Math.min(idx * 0.02, 0.2) }}
-															className={cn(
-																"relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center",
-																isCited
-																	? "bg-primary text-primary-foreground shadow-md"
-																	: isActive
-																		? "bg-muted text-foreground"
-																		: "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground"
-															)}
-															title={isCited ? `Chunk ${absNum} (Cited)` : `Chunk ${absNum}`}
-														>
-															{absNum}
-															{isCited && (
-																<span className="absolute -top-1.5 -right-1.5 flex items-center justify-center w-4 h-4 bg-primary rounded-full border-2 border-background shadow-sm">
-																	<Sparkles className="h-2.5 w-2.5 text-primary-foreground" />
-																</span>
-															)}
-														</motion.button>
-													);
-												})}
-											</div>
-										</ScrollArea>
-									</motion.div>
-								)}
-
-								{/* Main Content */}
-								<ScrollArea className="flex-1" ref={scrollAreaRef}>
-									<div className="p-6 lg:p-8 max-w-4xl mx-auto space-y-6">
-										{/* Document Metadata */}
-										{"document_metadata" in documentData &&
-											documentData.document_metadata &&
-											Object.keys(documentData.document_metadata).length > 0 && (
-												<motion.div
-													initial={{ opacity: 0, y: 10 }}
-													animate={{ opacity: 1, y: 0 }}
-													transition={{ delay: 0.1 }}
-													className="p-5 bg-muted/30 rounded-2xl border"
-												>
-													<h3 className="text-sm font-semibold mb-4 text-muted-foreground uppercase tracking-wider flex items-center gap-2">
-														<FileText className="h-4 w-4" />
-														Document Information
-													</h3>
-													<dl className="grid grid-cols-1 sm:grid-cols-2 gap-4 text-sm">
-														{Object.entries(documentData.document_metadata).map(([key, value]) => (
-															<div key={key} className="space-y-1">
-																<dt className="font-medium text-muted-foreground capitalize text-xs">
-																	{key.replace(/_/g, " ")}
-																</dt>
-																<dd className="text-foreground wrap-break-word">{String(value)}</dd>
-															</div>
-														))}
-													</dl>
-												</motion.div>
-											)}
-
-										{/* Chunks Header */}
-										<div className="flex items-center justify-between pt-2">
-											<h3 className="text-sm font-semibold text-muted-foreground uppercase tracking-wider flex items-center gap-2">
-												<Hash className="h-4 w-4" />
-												Chunks {absoluteStart + 1}–{absoluteEnd} of {totalChunks}
-											</h3>
-											{citedChunkIndex !== -1 && (
-												<Button
-													variant="ghost"
-													size="sm"
-													onClick={() => scrollToChunk(citedChunkIndex)}
-													className="gap-2 text-primary hover:text-primary"
-												>
-													<Sparkles className="h-3.5 w-3.5" />
-													Jump to cited
-												</Button>
-											)}
-										</div>
-
-										{/* Load Earlier */}
-										{canLoadBefore && (
-											<div className="flex items-center justify-center">
-												<Button
-													variant="outline"
-													size="sm"
-													onClick={loadBefore}
-													disabled={loadingBefore}
-													className="gap-2"
-												>
-													{loadingBefore ? (
-														<Loader2 className="h-3.5 w-3.5 animate-spin" />
-													) : (
-														<ChevronUp className="h-3.5 w-3.5" />
-													)}
-													{loadingBefore
-														? "Loading..."
-														: `Load ${Math.min(EXPAND_SIZE, absoluteStart)} earlier chunks`}
-												</Button>
-											</div>
-										)}
-
-										{/* Chunks */}
-										<div className="space-y-4">
-											{allChunks.map((chunk, idx) => {
-												const isCited = chunk.id === chunkId;
-												const chunkNumber = absoluteStart + idx + 1;
-												return (
-													<ChunkCard
-														key={chunk.id}
-														ref={isCited ? citedChunkRefCallback : undefined}
-														chunk={chunk}
-														localIndex={idx}
-														chunkNumber={chunkNumber}
-														totalChunks={totalChunks}
-														isCited={isCited}
-														isActive={activeChunkIndex === idx}
-														disableLayoutAnimation={allChunks.length > 30}
-													/>
-												);
-											})}
-										</div>
-
-										{/* Load Later */}
-										{canLoadAfter && (
-											<div className="flex items-center justify-center py-3">
-												<Button
-													variant="outline"
-													size="sm"
-													onClick={loadAfter}
-													disabled={loadingAfter}
-													className="gap-2"
-												>
-													{loadingAfter ? (
-														<Loader2 className="h-3.5 w-3.5 animate-spin" />
-													) : (
-														<ChevronDown className="h-3.5 w-3.5" />
-													)}
-													{loadingAfter
-														? "Loading..."
-														: `Load ${Math.min(EXPAND_SIZE, totalChunks - absoluteEnd)} later chunks`}
-												</Button>
-											</div>
-										)}
-									</div>
-								</ScrollArea>
-							</div>
-						)}
-					</motion.div>
-				</>
-			)}
-		</AnimatePresence>
-	);
-
-	if (!mounted) return <>{children}</>;
-
-	return (
-		<>
-			{children}
-			{createPortal(panelContent, globalThis.document.body)}
-		</>
-	);
-}
--- a/surfsense_web/components/settings/user-settings-dialog.tsx
+++ b/surfsense_web/components/settings/user-settings-dialog.tsx
@ -67,9 +67,6 @@ const DesktopShortcutsContent = dynamic(
 		import(
 			"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
 		).then((m) => ({ default: m.DesktopShortcutsContent })),
-		import(
-			"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
-		).then((m) => ({ default: m.DesktopShortcutsContent })),
 	{ ssr: false }
 );
 const MemoryContent = dynamic(
--- a/surfsense_web/components/ui/search-highlight-node.tsx
+++ b/surfsense_web/components/ui/search-highlight-node.tsx
@ -0,0 +1,45 @@
+"use client";
+
+import type { PlateLeafProps } from "platejs/react";
+import { PlateLeaf } from "platejs/react";
+
+/**
+ * Stable class name used to identify Plate-rendered citation highlight
+ * leaves in the DOM. We can't use a `data-*` attribute here — Plate's
+ * `PlateLeaf` runs its props through `useNodeAttributes`, which only
+ * forwards `attributes`, `className`, `ref`, and `style` to the rendered
+ * element; arbitrary `data-*` props are silently dropped (verified
+ * against `@platejs/core/dist/react/index.js` v52). So `className` is
+ * the only escape hatch that's guaranteed to survive into the DOM.
+ */
+export const CITATION_HIGHLIGHT_CLASS = "citation-highlight-leaf";
+
+/**
+ * Leaf rendered for ranges decorated by `@platejs/find-replace`'s
+ * `FindReplacePlugin`. We re-purpose that plugin to drive the citation-jump
+ * highlight: when a citation is staged, the parent sets the plugin's `search`
+ * option to a snippet of the chunk text and Plate decorates every match with
+ * `searchHighlight: true`. This component renders those decorations as a
+ * `<mark>` tagged with `CITATION_HIGHLIGHT_CLASS` so the parent can:
+ *   1. Query the first match in DOM order to scroll it into view.
+ *   2. Detect the active-highlight state without a separate React ref.
+ *
+ * The highlight is **persistent** — it does not auto-fade. The parent in
+ * `EditorPanelContent` clears it by setting the plugin's `search` option
+ * back to "" when one of: (a) the user clicks anywhere inside the editor,
+ * (b) the panel switches to a different document, (c) the user toggles
+ * into edit mode, (d) another citation jump is staged, (e) the panel
+ * unmounts. We use a brief entrance pulse (`citation-flash-in`, see
+ * `globals.css`) purely to draw the eye after `scrollIntoView` lands.
+ */
+export function SearchHighlightLeaf(props: PlateLeafProps) {
+	return (
+		<PlateLeaf
+			{...props}
+			as="mark"
+			className={`${CITATION_HIGHLIGHT_CLASS} bg-primary/15 ring-1 ring-primary/40 rounded-sm px-0.5 text-inherit animate-[citation-flash-in_400ms_ease-out]`}
+		>
+			{props.children}
+		</PlateLeaf>
+	);
+}
--- a/surfsense_web/lib/citation-search.ts
+++ b/surfsense_web/lib/citation-search.ts
@ -0,0 +1,125 @@
+/**
+ * Snippet generation for the citation-jump highlight, driven by Plate's
+ * `FindReplacePlugin`. The plugin runs `decorate` per-block and only matches
+ * within blocks whose children are all `Text` nodes (so it crosses inline
+ * marks like bold/italic but **not** block boundaries, and a block that
+ * contains even one inline element such as a link is silently skipped).
+ * That means a full chunk that spans heading + paragraph won't match as a
+ * single string — we have to pick a shorter snippet that fits inside one
+ * rendered block.
+ *
+ * `buildCitationSearchCandidates` returns search strings ordered from
+ * "most-specific anchor" to "broadest fallback":
+ *   1. First sentence of the chunk (capped at `FIRST_SENTENCE_MAX`).
+ *   2. First `FIRST_PHRASE_WORDS` words.
+ *   3. Each non-trivial line of the chunk, in source order — gives us a
+ *      separate attempt for each rendered block, so a heading line with
+ *      an inline link doesn't doom the whole jump.
+ *   4. Full chunk (only if it's already short enough to plausibly fit
+ *      inside one block).
+ *
+ * The caller tries each candidate in turn — set the plugin's `search`
+ * option, `editor.api.redecorate()`, then check the editor DOM for a
+ * `.citation-highlight-leaf` element. First candidate that produces one
+ * wins; subsequent candidates are skipped.
+ */
+
+const FIRST_SENTENCE_MAX = 120;
+const FIRST_PHRASE_WORDS = 8;
+const MIN_SNIPPET_LENGTH = 6;
+const FULL_CHUNK_MAX = FIRST_SENTENCE_MAX * 2;
+const MAX_LINE_CANDIDATES = 6;
+const LINE_CANDIDATE_MAX = FIRST_SENTENCE_MAX;
+
+function normalizeWhitespace(input: string): string {
+	return input.replace(/\s+/g, " ").trim();
+}
+
+/**
+ * Strip the markdown syntax that won't survive into the rendered editor's
+ * plain text, so the chunk text (which comes back from the indexer as raw
+ * source markdown) can be matched against the literal text values stored
+ * in Plate's Slate tree.
+ *
+ * Order matters: handle multi-char and "container" syntax before single-
+ * char emphasis, otherwise `**text**` collapses to `*text*` first.
+ *
+ * Heuristic only — we don't aim to be a full markdown parser, just to
+ * remove the common markers (`**bold**`, `[text](url)`, `# headings`,
+ * `- list`, etc.) that show up in connector-doc chunks and would break
+ * literal substring search.
+ */
+export function stripMarkdownForMatch(input: string): string {
+	let s = input;
+	s = s.replace(/```[a-z0-9_+-]*\n?([\s\S]*?)```/gi, (_, body: string) => body);
+	s = s.replace(/<!--[\s\S]*?-->/g, " ");
+	s = s.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
+	s = s.replace(/!\[([^\]]*)\]\[[^\]]*\]/g, "$1");
+	s = s.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1");
+	s = s.replace(/\[([^\]]+)\]\[[^\]]*\]/g, "$1");
+	s = s.replace(/<((?:https?|mailto):[^>\s]+)>/g, "$1");
+	s = s.replace(/`+([^`\n]+?)`+/g, "$1");
+	s = s.replace(/(\*\*|__)([\s\S]+?)\1/g, "$2");
+	s = s.replace(/(?<!\w)([*_])([^*_\n]+?)\1(?!\w)/g, "$2");
+	s = s.replace(/~~([^~]+)~~/g, "$1");
+	s = s.replace(/^[ \t]{0,3}#{1,6}[ \t]+/gm, "");
+	s = s.replace(/^[ \t]{0,3}(?:=+|-+)[ \t]*$/gm, "");
+	s = s.replace(/^[ \t]{0,3}>+[ \t]?/gm, "");
+	s = s.replace(/^[ \t]*[-*+][ \t]+/gm, "");
+	s = s.replace(/^[ \t]*\d+\.[ \t]+/gm, "");
+	s = s.replace(/^[ \t]{0,3}(?:[-*_])(?:[ \t]*[-*_]){2,}[ \t]*$/gm, "");
+	s = s.replace(/^[ \t]*\|?(?:[ \t]*:?-+:?[ \t]*\|)+[ \t]*:?-+:?[ \t]*\|?[ \t]*$/gm, "");
+	s = s.replace(/\\([\\`*_{}[\]()#+\-.!~>])/g, "$1");
+	return s;
+}
+
+export function buildCitationSearchCandidates(rawText: string): string[] {
+	if (!rawText) return [];
+	const stripped = stripMarkdownForMatch(rawText);
+	const normalized = normalizeWhitespace(stripped);
+	if (normalized.length < MIN_SNIPPET_LENGTH) return [];
+
+	const out: string[] = [];
+	const seen = new Set<string>();
+	const push = (s: string) => {
+		const t = normalizeWhitespace(s);
+		if (t.length >= MIN_SNIPPET_LENGTH && !seen.has(t)) {
+			out.push(t);
+			seen.add(t);
+		}
+	};
+
+	const sentenceMatch = normalized.match(/^[^.!?]+[.!?]/);
+	if (sentenceMatch) {
+		const sentence = sentenceMatch[0];
+		push(sentence.length > FIRST_SENTENCE_MAX ? sentence.slice(0, FIRST_SENTENCE_MAX) : sentence);
+	} else if (normalized.length > FIRST_SENTENCE_MAX) {
+		push(normalized.slice(0, FIRST_SENTENCE_MAX));
+	}
+
+	const words = normalized.split(" ").filter(Boolean);
+	if (words.length > FIRST_PHRASE_WORDS) {
+		push(words.slice(0, FIRST_PHRASE_WORDS).join(" "));
+	}
+
+	// Per-line candidates: each chunk line is roughly one block in the
+	// rendered editor. Trying them in order gives us a separate decorate
+	// attempt for each block, which matters when the first line is a
+	// heading containing a link (Plate's `FindReplacePlugin` will skip
+	// any block whose children aren't all text nodes).
+	const rawLines = stripped.split(/\r?\n/);
+	let lineCount = 0;
+	for (const line of rawLines) {
+		if (lineCount >= MAX_LINE_CANDIDATES) break;
+		const trimmed = normalizeWhitespace(line);
+		if (trimmed.length < MIN_SNIPPET_LENGTH) continue;
+		push(trimmed.length > LINE_CANDIDATE_MAX ? trimmed.slice(0, LINE_CANDIDATE_MAX) : trimmed);
+		lineCount++;
+	}
+
+	if (normalized.length <= FULL_CHUNK_MAX) {
+		push(normalized);
+	}
+
+	return out;
+}
--- a/surfsense_web/package.json
+++ b/surfsense_web/package.json
@ -36,6 +36,7 @@
 		"@platejs/code-block": "^52.0.11",
 		"@platejs/combobox": "^52.0.15",
 		"@platejs/dnd": "^52.0.11",
+		"@platejs/find-replace": "^52.3.10",
 		"@platejs/floating": "^52.0.11",
 		"@platejs/indent": "^52.0.11",
 		"@platejs/link": "^52.0.11",
--- a/surfsense_web/pnpm-lock.yaml
+++ b/surfsense_web/pnpm-lock.yaml
@ -53,6 +53,9 @@ importers:
      '@platejs/dnd':
        specifier: ^52.0.11
        version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
+      '@platejs/find-replace':
+        specifier: ^52.3.10
+        version: 52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
      '@platejs/floating':
        specifier: ^52.0.11
        version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
@ -2827,6 +2830,13 @@ packages:
      react-dnd-html5-backend: '>=14.0.0'
      react-dom: '>=18.0.0'

+  '@platejs/find-replace@52.3.10':
+    resolution: {integrity: sha512-V/MOMMUYxHfEn/skd2+YO213xSATFDVsl8FzVzVRV/XaxwwVefH2EPD1lAVIvmYjennTVTTsHHtEI9K9iOsEaA==}
+    peerDependencies:
+      platejs: '>=52.0.11'
+      react: '>=18.0.0'
+      react-dom: '>=18.0.0'
+
  '@platejs/floating@52.0.11':
    resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==}
    peerDependencies:
@ -11105,6 +11115,13 @@ snapshots:
      react-dnd-html5-backend: 16.0.1
      react-dom: 19.2.4(react@19.2.4)

+  '@platejs/find-replace@52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
+    dependencies:
+      platejs: 52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4))
+      react: 19.2.4
+      react-compiler-runtime: 1.0.0(react@19.2.4)
+      react-dom: 19.2.4(react@19.2.4)
+
  '@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
    dependencies:
      '@floating-ui/core': 1.7.4