feat: various UI fixes, prompt optimizations, and allowing duplicate docs

- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths. - Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`. - Added detailed comments for clarity on the changes and their implications. - Introduced new citation handling in the editor for improved user experience with citation jumps. - Updated package dependencies in the frontend for better functionality.
2026-06-02 19:55:18 +02:00 · 2026-04-28 21:30:53 -07:00 · 2026-04-28 21:30:53 -07:00 · b9a66cb417
commit b9a66cb417
parent e6433f78c4
26 changed files with 1540 additions and 852 deletions
--- a/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py
+++ b/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py
@ -0,0 +1,107 @@
 """133_drop_documents_content_hash_unique
 Revision ID: 133
 Revises: 132
 Create Date: 2026-04-29
 Drop the global UNIQUE constraint on ``documents.content_hash`` so the
 new-chat agent's ``write_file`` flow can persist legitimate file copies
 (two paths, identical content) without hitting a constraint that mirrors
 no real filesystem semantic.
 Path uniqueness still lives on ``documents.unique_identifier_hash`` (per
 search space), which is the right invariant — exactly like an inode at a
 given path on a POSIX filesystem.
 The non-unique INDEX on ``content_hash`` is preserved so connector
 indexers' "have we seen this content before?" lookup
 (:func:`app.tasks.document_processors.base.check_duplicate_document`,
 which already uses ``.scalars().first()`` and is therefore tolerant of
 duplicates) stays cheap.
 """
 from __future__ import annotations
 from collections.abc import Sequence
 from sqlalchemy import inspect
 from alembic import op
 revision: str = "133"
 down_revision: str | None = "132"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 def _existing_constraint_names(bind, table: str) -> set[str]:
    inspector = inspect(bind)
    return {c["name"] for c in inspector.get_unique_constraints(table)}
 def _existing_index_names(bind, table: str) -> set[str]:
    inspector = inspect(bind)
    return {i["name"] for i in inspector.get_indexes(table)}
 def upgrade() -> None:
    bind = op.get_bind()
    # Both the named UniqueConstraint (added in revision 8) and the
    # implicit-unique-index variant SQLAlchemy may emit need draining.
    constraints = _existing_constraint_names(bind, "documents")
    if "uq_documents_content_hash" in constraints:
        op.drop_constraint(
            "uq_documents_content_hash", "documents", type_="unique"
        )
    indexes = _existing_index_names(bind, "documents")
    # Some Postgres versions surface the unique constraint via a unique
    # index of the same name; check for that too.
    for idx_name in ("uq_documents_content_hash",):
        if idx_name in indexes:
            op.drop_index(idx_name, table_name="documents")
    # Ensure the non-unique index is present for fast lookups.
    if "ix_documents_content_hash" not in indexes:
        op.create_index(
            "ix_documents_content_hash",
            "documents",
            ["content_hash"],
            unique=False,
        )
 def downgrade() -> None:
    bind = op.get_bind()
    # Re-applying UNIQUE is destructive: there may now be legitimate
    # duplicates (e.g. two NOTE documents that share content because the
    # user explicitly copied one to a new path). To avoid the migration
    # silently deleting user data, we keep only the lowest-id row per
    # content_hash — same strategy revision 8 used when first introducing
    # the constraint.
    op.execute(
        """
        DELETE FROM documents
        WHERE id NOT IN (
            SELECT MIN(id)
            FROM documents
            GROUP BY content_hash
        )
        """
    )
    indexes = _existing_index_names(bind, "documents")
    if "ix_documents_content_hash" in indexes:
        op.drop_index("ix_documents_content_hash", table_name="documents")
    op.create_index(
        "ix_documents_content_hash",
        "documents",
        ["content_hash"],
        unique=False,
    )
    op.create_unique_constraint(
        "uq_documents_content_hash", "documents", ["content_hash"]
    )
--- a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py
@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState
 from langchain_core.callbacks import dispatch_custom_event
 from langgraph.runtime import Runtime
 from sqlalchemy import delete, select
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.agents.new_chat.filesystem_selection import FilesystemMode
@ -150,10 +151,11 @@ async def _create_document(
        virtual_path,
        search_space_id,
    )
-    # Guard against the unique_identifier_hash constraint: another row at the
+    # Filesystem-parity invariant: the only thing that *must* be unique is
-    # same virtual_path (this search space) already owns the hash. Callers are
+    # the path. Two notes can legitimately share content (e.g. ``cp a b``).
-    # expected to upsert via the wrapper, but this defends against bypasses
+    # Guard against the path-derived ``unique_identifier_hash`` constraint
-    # and gives a clean ValueError instead of a session-poisoning IntegrityError.
+    # so we surface a clean ValueError instead of letting the INSERT poison
    # the session with an IntegrityError.
    path_collision = await session.execute(
        select(Document.id).where(
            Document.search_space_id == search_space_id,
@ -165,17 +167,14 @@ async def _create_document(
            f"a document already exists at path '{virtual_path}' "
            "(unique_identifier_hash collision)"
        )
    # ``content_hash`` is intentionally NOT checked for uniqueness here.
    # In a real filesystem two files at different paths can hold identical
    # bytes, and the agent's ``write_file`` path needs that semantic to
    # support copy/duplicate operations. The hash remains useful as a
    # change-detection hint for connector indexers, which still consult it
    # via :func:`check_duplicate_document` but do so with a non-unique
    # lookup (``.first()``).
    content_hash = generate_content_hash(content, search_space_id)
    content_collision = await session.execute(
        select(Document.id).where(
            Document.search_space_id == search_space_id,
            Document.content_hash == content_hash,
        )
    )
    if content_collision.scalar_one_or_none() is not None:
        raise ValueError(
            f"a document with identical content already exists for path '{virtual_path}'"
        )
    doc = Document(
        title=title,
        document_type=DocumentType.NOTE,
@ -493,7 +492,14 @@ async def commit_staged_filesystem_state(
                            }
                        )
                else:
                    # Wrap each create in a SAVEPOINT so a residual
                    # ``IntegrityError`` (e.g. a deployment that hasn't run
                    # migration 133 yet, where ``documents.content_hash``
                    # still carries its legacy global UNIQUE constraint)
                    # rolls back only this one create instead of poisoning
                    # the whole turn's transaction.
                    try:
                        async with session.begin_nested():
                            new_doc = await _create_document(
                                session,
                                virtual_path=path,
@ -506,6 +512,23 @@ async def commit_staged_filesystem_state(
                            "kb_persistence: skipping %s create: %s", path, exc
                        )
                        continue
                    except IntegrityError as exc:
                        # The path-uniqueness check above already protected
                        # against ``unique_identifier_hash`` collisions, so
                        # the most likely culprit is the legacy
                        # ``ix_documents_content_hash`` UNIQUE constraint
                        # that migration 133 drops. Log loudly so operators
                        # know to run the migration; do NOT silently swallow.
                        msg = str(exc.orig) if exc.orig is not None else str(exc)
                        logger.error(
                            "kb_persistence: IntegrityError creating %s: %s. "
                            "If this mentions content_hash, run alembic "
                            "upgrade to apply migration 133 which drops the "
                            "global UNIQUE constraint on documents.content_hash.",
                            path,
                            msg,
                        )
                        continue
                    doc_id_by_path[path] = new_doc.id
                    committed_creates.append(
                        {
--- a/surfsense_backend/app/agents/new_chat/prompts/composer.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py
@ -38,12 +38,38 @@ from app.db import ChatVisibility
 # Provider variant detection
 # -----------------------------------------------------------------------------
-ProviderVariant = str  # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"
+# String literal alias for the supported provider-specific prompt variants.
 # When adding a new variant, also drop a matching ``providers/<variant>.md``
 # file in this package and (if appropriate) extend the regex matchers below.
 #
 # Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
 # to SurfSense's "supplemental hints" architecture (each fragment is a
 # focused style nudge, NOT a full system prompt — the main prompt is
 # already assembled from base/ + tools/ + routing/).
 ProviderVariant = str
 # Known values:
 #   "anthropic"        — Claude family (XML-friendly, narrative todos)
 #   "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
 #   "openai_classic"   — GPT-4 family (autonomous persistence)
 #   "openai_codex"     — gpt-*-codex (code-purist, terse, file:line refs)
 #   "google"           — Gemini (formal, <3-line, numbered workflow)
 #   "kimi"             — Moonshot Kimi-K* (action-bias, parallel tools)
 #   "grok"             — xAI Grok (extreme-terse, one-word ok)
 #   "deepseek"         — DeepSeek V3 / R1 (terse, R1-aware reasoning)
 #   "default"          — fallback, no provider-specific block emitted
 # IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
 # More specific patterns must come first (e.g. ``codex`` before
 # ``openai_reasoning`` because codex model ids contain ``gpt``).
 _OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE)
 _OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
 _OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
 _ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
 _GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
 _KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
 _GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
 _DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
 def detect_provider_variant(model_name: str | None) -> ProviderVariant:
@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
    Heuristic match on the model id; returns ``"default"`` when nothing
    matches so the composer can fall back to the empty placeholder file.
    Order is significant: more-specific patterns are tried first so
    ``gpt-5-codex`` routes to ``"openai_codex"`` rather than
    ``"openai_reasoning"`` (mirrors OpenCode's
    ``packages/opencode/src/session/system.ts`` dispatch).
    """
    if not model_name:
        return "default"
    name = model_name.strip()
    if _OPENAI_CODEX_RE.search(name):
        return "openai_codex"
    if _OPENAI_REASONING_RE.search(name):
        return "openai_reasoning"
    if _OPENAI_CLASSIC_RE.search(name):
@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
        return "anthropic"
    if _GOOGLE_RE.search(name):
        return "google"
    if _KIMI_RE.search(name):
        return "kimi"
    if _GROK_RE.search(name):
        return "grok"
    if _DEEPSEEK_RE.search(name):
        return "deepseek"
    return "default"
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md
@ -1,5 +1,20 @@
 <provider_hints>
-You are running on an Anthropic Claude model. Use XML tags liberally to structure
+You are running on an Anthropic Claude model.
-intermediate reasoning when the task is complex. Prefer step-by-step plans inside
+
-`<thinking>` blocks before producing the final answer.
+Structured reasoning:
 - Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
 - For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
 Professional objectivity:
 - Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
 - When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
 - Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
 Task management:
 - For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
 - Narrate progress through the todo list itself, not through chatty status lines.
 Tool calls:
 - Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
 - Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md
@ -0,0 +1,18 @@
 <provider_hints>
 You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
 Reasoning hygiene (R1-aware):
 - If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
 - Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
 - Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
 Output style:
 - Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
 - Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
 - For factual answers, cite once with `[citation:chunk_id]` and stop.
 Tool calls:
 - Issue independent tool calls in parallel within a single turn.
 - Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
 - Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/google.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/google.md
@ -1,4 +1,20 @@
 <provider_hints>
-You are running on a Google Gemini model. Prefer concise, structured responses.
+You are running on a Google Gemini model.
-When using tools, follow the function-calling protocol and avoid verbose preludes.
+
 Output style:
 - Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
 - No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
 - Format with GitHub-flavoured Markdown; assume monospace rendering.
 - For one-line factual answers, just answer. No headers, no bullets.
 Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
 1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
 2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
 3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
 4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
 Discipline:
 - Do not take significant actions beyond the clear scope of the user's request without confirming first.
 - Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
 - Path arguments must be the exact strings returned by tools; do not synthesise file paths.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md
@ -0,0 +1,17 @@
 <provider_hints>
 You are running on an xAI Grok model.
 Maximum terseness:
 - Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
 - No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
 - Avoid restating the user's question.
 - For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
 Tool discipline:
 - Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
 - For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
 Style:
 - No emojis unless the user asked. No nested bullets, no headers for short answers.
 - If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md
@ -0,0 +1,21 @@
 <provider_hints>
 You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
 Action bias:
 - Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
 - Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
 - Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
 Tool calls:
 - Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
 - When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
 - Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
 Language:
 - Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
 Discipline:
 - Stay on track. Never give the user more than what they asked for.
 - Fact-check before stating anything as factual; don't fabricate citations.
 - Keep it stupidly simple. Don't overcomplicate.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md
@ -1,5 +1,21 @@
 <provider_hints>
-You are running on a classic OpenAI chat model (GPT-4 family). Use direct
+You are running on a classic OpenAI chat model (GPT-4 family).
-function-calling for tools. When editing files, use the standard `edit_file`
+
-or `write_file` tools rather than diff-based patches.
+Persistence:
 - Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
 - When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
 - If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
 Planning:
 - Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
 - Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
 Output style:
 - Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
 - Don't dump tool output verbatim — summarise the relevant lines.
 - Don't add a closing recap unless the user asked for one. After completing the work, just stop.
 Tool calls:
 - Issue independent tool calls in parallel within one response.
 - Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md
@ -0,0 +1,19 @@
 <provider_hints>
 You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
 Output style:
 - Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
 - Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
 - Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
 - Skip headers and heavy formatting for simple confirmations.
 - No emojis, no em-dashes, no nested bullets. Single-level lists only.
 Code & structured-output tasks:
 - Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
 - Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
 - For multi-line snippets use fenced code blocks with a language tag.
 Tool calls:
 - Run independent tool calls in parallel; chain only when later calls need earlier results.
 - Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
 </provider_hints>
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md
@ -1,5 +1,21 @@
 <provider_hints>
-You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and
+You are running on an OpenAI reasoning model (GPT-5+ / o-series).
-direct in your responses. When editing files, prefer the `apply_patch` tool format
+
-where available. Avoid restating the user request before answering.
+Output style:
 - Be terse and direct. Don't restate the user's request before answering.
 - Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
 - Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
 - No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
 - Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
 Channels (for clients that support them):
 - `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
 - `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
 Tool calls:
 - Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
 - Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
 Autonomy:
 - Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
 </provider_hints>
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin):
    document_metadata = Column(JSON, nullable=True)
    content = Column(Text, nullable=False)
-    content_hash = Column(String, nullable=False, index=True, unique=True)
+    # ``content_hash`` is intentionally NOT globally unique. In a real
    # filesystem two files at different paths can hold identical bytes,
    # and the agent's ``write_file`` flow needs that semantic to support
    # copy / duplicate operations. Path uniqueness lives on
    # ``unique_identifier_hash`` (per search space). The hash remains
    # indexed because connector indexers consult it as a change-detection
    # / cross-source dedup hint via :func:`check_duplicate_document`.
    # See migration 133.
    content_hash = Column(String, nullable=False, index=True)
    unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
    embedding = Column(Vector(config.embedding_model_instance.dimension))
--- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
@ -25,17 +25,33 @@ class TestProviderVariantDetection:
    @pytest.mark.parametrize(
        "model_name,expected",
        [
            # GPT-4 family routes to "classic" (autonomous-persistence style)
            ("openai:gpt-4o-mini", "openai_classic"),
            ("openai:gpt-4-turbo", "openai_classic"),
            # GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
            ("openai:gpt-5", "openai_reasoning"),
            ("openai:gpt-5-codex", "openai_reasoning"),
            ("openai:o1-preview", "openai_reasoning"),
            ("openai:o3-mini", "openai_reasoning"),
            # Codex family beats reasoning (more specific). Mirrors OpenCode
            # ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
            ("openai:gpt-5-codex", "openai_codex"),
            ("openai:gpt-codex", "openai_codex"),
            ("openai:codex-mini", "openai_codex"),
            # Anthropic + Google
            ("anthropic:claude-3-5-sonnet", "anthropic"),
            ("anthropic/claude-opus-4", "anthropic"),
            ("google:gemini-2.0-flash", "google"),
            ("vertex:gemini-1.5-pro", "google"),
            # Newly-covered families
            ("moonshot:kimi-k2", "kimi"),
            ("openrouter:moonshot/kimi-k2.5", "kimi"),
            ("xai:grok-2", "grok"),
            ("openrouter:x-ai/grok-3", "grok"),
            ("openai:deepseek-v3", "deepseek"),
            ("deepseek:deepseek-r1", "deepseek"),
            # Unknown families fall back to default (no provider block emitted)
            ("groq:mixtral-8x7b", "default"),
            ("together:llama-3.1-70b", "default"),
            (None, "default"),
            ("", "default"),
        ],
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
    def test_detection(self, model_name: str | None, expected: str) -> None:
        assert detect_provider_variant(model_name) == expected
    def test_codex_takes_precedence_over_reasoning(self) -> None:
        """Regression guard: ``gpt-5-codex`` must NOT match the generic
        ``gpt-5`` reasoning regex first. Codex is the more specialised
        prompt and mirrors OpenCode's dispatch order.
        """
        from app.agents.new_chat.prompts.composer import detect_provider_variant
        assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
        assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
 class TestCompose:
    def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
@ -149,6 +175,52 @@ class TestCompose:
        prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
        assert "<provider_hints>" not in prompt
    @pytest.mark.parametrize(
        "model_name,expected_marker",
        [
            # Each marker is a unique-ish phrase from the corresponding fragment.
            # If a fragment is renamed/rewritten such that the marker is gone,
            # update both the fragment and this test deliberately.
            ("openai:gpt-5-codex", "Codex-class"),
            ("openai:gpt-5", "OpenAI reasoning model"),
            ("openai:gpt-4o", "classic OpenAI chat model"),
            ("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
            ("google:gemini-2.0-flash", "Google Gemini"),
            ("moonshot:kimi-k2", "Moonshot Kimi"),
            ("xai:grok-2", "xAI Grok"),
            ("deepseek:deepseek-r1", "DeepSeek"),
        ],
    )
    def test_each_known_variant_renders_with_its_marker(
        self,
        fixed_today: datetime,
        model_name: str,
        expected_marker: str,
    ) -> None:
        """Every supported variant must produce a ``<provider_hints>`` block
        containing its identifying marker. This pins the dispatch + the
        on-disk fragments together so a missing/renamed file is caught
        immediately.
        """
        prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
        assert "<provider_hints>" in prompt, (
            f"variant for {model_name!r} did not emit a provider_hints block; "
            "the corresponding providers/<variant>.md may be missing"
        )
        assert expected_marker in prompt, (
            f"variant for {model_name!r} emitted hints but lacked the "
            f"expected marker {expected_marker!r} — the fragment may have "
            "drifted from the dispatch table"
        )
    def test_provider_blocks_are_byte_stable_across_calls(
        self, fixed_today: datetime
    ) -> None:
        """Cache-stability guard: same model id → byte-identical prompt."""
        a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
        b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
        assert a == b
    def test_custom_system_instructions_override_default(
        self, fixed_today: datetime
    ) -> None:
--- a/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py
+++ b/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py
@ -0,0 +1,168 @@
 """Unit tests for kb_persistence filesystem-parity invariants.
 Specifically, these tests pin down that the agent-driven write_file flow
 treats path uniqueness — not content uniqueness — as the only hard
 invariant. This mirrors a real filesystem: ``cp a b`` produces two files
 with identical bytes living at different paths, and that should round-trip
 through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
 """
 from __future__ import annotations
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock
 import numpy as np
 import pytest
 from app.agents.new_chat.middleware import kb_persistence
 from app.db import Document
 class _FakeResult:
    """Minimal stand-in for ``sqlalchemy.engine.Result``."""
    def __init__(self, value: Any = None) -> None:
        self._value = value
    def scalar_one_or_none(self) -> Any:
        return self._value
    def scalar(self) -> Any:
        return self._value
 class _FakeSession:
    """Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
    Records every ``add`` so we can assert against the resulting Documents
    and Chunks. ``execute`` always returns "no row" by default — i.e. no
    folder hierarchy preexists and no path collision exists. Tests that
    want a path collision can override that on a per-call basis.
    """
    def __init__(self) -> None:
        self.added: list[Any] = []
        self.execute = AsyncMock(return_value=_FakeResult(None))
        self.flush = AsyncMock()
        # Simulate ``await session.flush()`` assigning an id to the doc;
        # we increment a counter so each Document gets a unique id.
        self._next_id = 1
        async def _flush_assigning_ids() -> None:
            for obj in self.added:
                if getattr(obj, "id", None) is None:
                    obj.id = self._next_id
                    self._next_id += 1
        self.flush.side_effect = _flush_assigning_ids
    def add(self, obj: Any) -> None:
        self.added.append(obj)
    def add_all(self, objs: list[Any]) -> None:
        self.added.extend(objs)
@pytest.fixture(autouse=True)
 def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
    """Avoid loading the embedding model in unit tests."""
    monkeypatch.setattr(
        kb_persistence,
        "embed_texts",
        lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
    )
    monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
@pytest.mark.asyncio
 async def test_create_document_allows_identical_content_at_different_paths() -> None:
    """The core regression: ``cp /a/notes.md /b/notes-copy.md``.
    Both create calls must succeed even though the bytes are byte-for-byte
    identical, because path is the only filesystem-style unique key.
    """
    session = _FakeSession()
    content = "# Same body\n\nIdentical content used by two different paths.\n"
    first = await kb_persistence._create_document(
        session,  # type: ignore[arg-type]
        virtual_path="/documents/a/notes.md",
        content=content,
        search_space_id=42,
        created_by_id="user-1",
    )
    assert isinstance(first, Document)
    assert first.title == "notes.md"
    # Second create with byte-identical content at a different path should
    # not raise — that's the whole point of the filesystem-parity fix.
    second = await kb_persistence._create_document(
        session,  # type: ignore[arg-type]
        virtual_path="/documents/b/notes-copy.md",
        content=content,
        search_space_id=42,
        created_by_id="user-1",
    )
    assert isinstance(second, Document)
    assert second.title == "notes-copy.md"
    # Both rows share the same content_hash but live at distinct paths
    # (distinct ``unique_identifier_hash``). That's the desired contract.
    assert first.content_hash == second.content_hash
    assert first.unique_identifier_hash != second.unique_identifier_hash
@pytest.mark.asyncio
 async def test_create_document_still_rejects_path_collision() -> None:
    """Path uniqueness remains the hard invariant.
    If ``unique_identifier_hash`` already points at an existing row in
    the same search space, the create call must raise ``ValueError``
    with a clear message — matching the behavior the commit loop relies
    on to upsert via the existing-row code path.
    """
    session = _FakeSession()
    # Path with no folder parts so ``_ensure_folder_hierarchy`` is a
    # no-op and the only SELECT executed is the path-collision check.
    # That SELECT returns an existing doc id, triggering the guard.
    session.execute = AsyncMock(return_value=_FakeResult(value=99))
    with pytest.raises(ValueError, match="already exists at path"):
        await kb_persistence._create_document(
            session,  # type: ignore[arg-type]
            virtual_path="/documents/notes.md",
            content="anything",
            search_space_id=42,
            created_by_id="user-1",
        )
@pytest.mark.asyncio
 async def test_create_document_does_not_query_for_content_hash_collision(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
    """Regression guard: the legacy second SELECT (content_hash collision
    pre-check) must be gone. Counting ``execute`` calls is a brittle but
    effective way to lock that in.
    The current flow runs exactly one ``execute`` for the path-collision
    SELECT (no folder parts in this path → ``_ensure_folder_hierarchy``
    short-circuits). If a future refactor reintroduces a content-hash
    SELECT, this test will fail loud.
    """
    session = _FakeSession()
    await kb_persistence._create_document(
        session,  # type: ignore[arg-type]
        virtual_path="/documents/notes.md",
        content="hello",
        search_space_id=42,
        created_by_id="user-1",
    )
    # Path-collision SELECT only. No content_hash SELECT.
    assert session.execute.await_count == 1, (
        f"Unexpected execute count {session.execute.await_count}; "
        "did the legacy content_hash collision pre-check get re-added?"
    )
--- a/surfsense_web/app/globals.css
+++ b/surfsense_web/app/globals.css
@ -210,6 +210,27 @@ button {
 	}
 }
 /* Citation-jump highlight — entrance pulse only. The `SearchHighlightLeaf`
   (see components/ui/search-highlight-node.tsx) is otherwise statically
   tinted; this animation runs once on mount to draw the eye to the cited
   text after `scrollIntoView` lands. The highlight itself is permanent
   until the user clicks inside the editor (or another dismissal trigger
   fires in `EditorPanelContent`). */
@keyframes citation-flash-in {
 	0% {
 		background-color: transparent;
 		box-shadow: 0 0 0 0 transparent;
 	}
 	40% {
 		background-color: color-mix(in oklab, var(--primary) 30%, transparent);
 		box-shadow: 0 0 0 3px color-mix(in oklab, var(--primary) 25%, transparent);
 	}
 	100% {
 		background-color: color-mix(in oklab, var(--primary) 15%, transparent);
 		box-shadow: 0 0 0 1px color-mix(in oklab, var(--primary) 40%, transparent);
 	}
 }
 /* Human-in-the-loop approval card animations */
@keyframes pulse-subtle {
 	0%,
--- a/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts
+++ b/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts
@ -0,0 +1,19 @@
 import { atom } from "jotai";
 /**
 * Cross-component handoff for citation jumps. Set by `InlineCitation` when a
 * numeric chunk badge is clicked (after the document has been resolved); read
 * by `DocumentTabContent` once the matching document tab mounts so it can
 * scroll to and softly highlight the cited chunk inside the rendered markdown.
 *
 * Cleared by `DocumentTabContent` only after a terminal state — exact /
 * approximate / miss — has been reached, so that an escalation refetch (2MB
 * preview → 16MB) keeps the pending intent alive across the re-render.
 */
 export interface PendingChunkHighlight {
 	documentId: number;
 	chunkId: number;
 	chunkText: string;
 }
 export const pendingChunkHighlightAtom = atom<PendingChunkHighlight | null>(null);
--- a/surfsense_web/components/assistant-ui/inline-citation.tsx
+++ b/surfsense_web/components/assistant-ui/inline-citation.tsx
@ -1,26 +1,45 @@
 "use client";
-import { FileText } from "lucide-react";
+import { useQuery, useQueryClient } from "@tanstack/react-query";
 import { useSetAtom } from "jotai";
 import { ExternalLink, FileText } from "lucide-react";
 import type { FC } from "react";
-import { useState } from "react";
+import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
 import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
 import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
 import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
-import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel";
+import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Citation } from "@/components/tool-ui/citation";
 import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
 import { Spinner } from "@/components/ui/spinner";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";
 interface InlineCitationProps {
 	chunkId: number;
 	isDocsChunk?: boolean;
 }
 const POPOVER_HOVER_CLOSE_DELAY_MS = 150;
 /**
- * Inline citation for knowledge-base chunks (numeric chunk IDs).
+ * Inline citation badge for knowledge-base chunks (numeric chunk IDs) and
- * Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel.
+ * Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as
- * Negative chunk IDs indicate anonymous/synthetic uploads and render as a static badge.
+ * a static "doc" pill (anonymous/synthetic uploads).
 *
 * Numeric KB chunks: clicking resolves the parent document via
 * `getDocumentByChunk`, opens the document in the right side panel (alongside
 * the chat — does not replace it), and stages the cited chunk text in
 * `pendingChunkHighlightAtom` so `EditorPanelContent` can scroll to and softly
 * highlight it inside the rendered markdown.
 *
 * Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that
 * lazily fetches and previews the cited chunk inline, since those docs aren't
 * indexed into the user's search space and have no tab to open.
 */
 export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => {
 	const [isOpen, setIsOpen] = useState(false);
 	if (chunkId < 0) {
 		return (
 			<Tooltip>
@ -38,26 +57,185 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk =
 		);
 	}
 	if (isDocsChunk) {
 		return <SurfsenseDocCitation chunkId={chunkId} />;
 	}
 	return <NumericChunkCitation chunkId={chunkId} />;
 };
 const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
 	const queryClient = useQueryClient();
 	const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
 	const openEditorPanel = useSetAtom(openEditorPanelAtom);
 	const [resolving, setResolving] = useState(false);
 	const handleClick = useCallback(async () => {
 		if (resolving) return;
 		setResolving(true);
 		console.log("[citation:click] start", { chunkId });
 		try {
 			const data = await queryClient.fetchQuery({
 				// Local key with explicit window. The shared `cacheKeys.documents.byChunk`
 				// is window-agnostic (latent footgun); namespace the call to avoid
 				// reusing a different-window cached result.
 				queryKey: ["documents", "by-chunk", chunkId, "w0"] as const,
 				queryFn: () =>
 					documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 0 }),
 				staleTime: 5 * 60 * 1000,
 			});
 			const cited = data.chunks.find((c) => c.id === chunkId) ?? data.chunks[0];
 			console.log("[citation:click] fetched doc-by-chunk", {
 				docId: data.id,
 				docTitle: data.title,
 				chunksReturned: data.chunks.length,
 				citedChunkId: cited?.id,
 				citedChunkContentLen: cited?.content?.length ?? 0,
 				citedChunkPreview:
 					cited?.content && cited.content.length > 120
 						? `${cited.content.slice(0, 120)}…(+${cited.content.length - 120})`
 						: (cited?.content ?? ""),
 			});
 			// Stage the highlight BEFORE opening the panel so `EditorPanelContent`
 			// already sees the pending intent on its very first render — avoids a
 			// "fetch → render → no-pending → next-tick render with pending" race.
 			setPendingHighlight({
 				documentId: data.id,
 				chunkId,
 				chunkText: cited?.content ?? "",
 			});
 			openEditorPanel({
 				documentId: data.id,
 				searchSpaceId: data.search_space_id,
 				title: data.title,
 			});
 			console.log("[citation:click] staged highlight + opened editor panel", {
 				documentId: data.id,
 			});
 		} catch (err) {
 			console.warn("[citation:click] failed", err);
 			toast.error(err instanceof Error ? err.message : "Couldn't open cited document");
 		} finally {
 			setResolving(false);
 		}
 	}, [chunkId, openEditorPanel, queryClient, resolving, setPendingHighlight]);
 	return (
 		<SourceDetailPanel
 			open={isOpen}
 			onOpenChange={setIsOpen}
 			chunkId={chunkId}
 			sourceType={isDocsChunk ? "SURFSENSE_DOCS" : ""}
 			title={isDocsChunk ? "Surfsense Documentation" : "Source"}
 			description=""
 			url=""
 			isDocsChunk={isDocsChunk}
 		>
 		<button
 			type="button"
-				onClick={() => setIsOpen(true)}
+			onClick={handleClick}
-				className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
+			disabled={resolving}
 			className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none disabled:cursor-progress disabled:opacity-70"
 			title={`View source chunk #${chunkId}`}
 			aria-label={`Jump to cited chunk ${chunkId}`}
 		>
-				{chunkId}
+			{resolving ? <Spinner size="xs" /> : chunkId}
 		</button>
-		</SourceDetailPanel>
+	);
 };
 const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
 	const [open, setOpen] = useState(false);
 	const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
 	const cancelClose = useCallback(() => {
 		if (closeTimerRef.current) {
 			clearTimeout(closeTimerRef.current);
 			closeTimerRef.current = null;
 		}
 	}, []);
 	const scheduleClose = useCallback(() => {
 		cancelClose();
 		closeTimerRef.current = setTimeout(() => {
 			setOpen(false);
 			closeTimerRef.current = null;
 		}, POPOVER_HOVER_CLOSE_DELAY_MS);
 	}, [cancelClose]);
 	useEffect(() => () => cancelClose(), [cancelClose]);
 	const { data, isLoading, error } = useQuery({
 		queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`),
 		queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId),
 		enabled: open,
 		staleTime: 5 * 60 * 1000,
 	});
 	const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0];
 	return (
 		<Popover open={open} onOpenChange={setOpen}>
 			<PopoverTrigger asChild>
 				<button
 					type="button"
 					onClick={() => setOpen((prev) => !prev)}
 					onMouseEnter={() => {
 						cancelClose();
 						setOpen(true);
 					}}
 					onMouseLeave={scheduleClose}
 					onFocus={() => {
 						cancelClose();
 						setOpen(true);
 					}}
 					onBlur={scheduleClose}
 					className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center gap-0.5 rounded-md bg-primary/10 px-1.5 text-[11px] font-medium text-primary align-baseline shadow-sm transition-colors hover:bg-primary/15 focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
 					aria-label={`Show Surfsense documentation chunk ${chunkId}`}
 					title="Surfsense documentation"
 				>
 					<FileText className="size-3" />
 					doc
 				</button>
 			</PopoverTrigger>
 			<PopoverContent
 				className="w-96 max-w-[calc(100vw-2rem)] p-0"
 				align="start"
 				sideOffset={6}
 				onMouseEnter={cancelClose}
 				onMouseLeave={scheduleClose}
 				onOpenAutoFocus={(e) => e.preventDefault()}
 			>
 				<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
 					<div className="min-w-0">
 						<p className="truncate text-sm font-medium">
 							{data?.title ?? "Surfsense documentation"}
 						</p>
 						<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
 					</div>
 					{data?.source && (
 						<a
 							href={data.source}
 							target="_blank"
 							rel="noopener noreferrer"
 							className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
 						>
 							<ExternalLink className="size-3" />
 							Open
 						</a>
 					)}
 				</div>
 				<div className="max-h-72 overflow-auto px-3 py-2 text-sm">
 					{isLoading && (
 						<div className="flex items-center gap-2 py-4 text-muted-foreground">
 							<Spinner size="xs" />
 							<span className="text-xs">Loading…</span>
 						</div>
 					)}
 					{error && (
 						<p className="py-4 text-xs text-destructive">
 							{error instanceof Error ? error.message : "Failed to load chunk"}
 						</p>
 					)}
 					{!isLoading && !error && citedChunk?.content && (
 						<MarkdownViewer content={citedChunk.content} maxLength={1500} />
 					)}
 					{!isLoading && !error && !citedChunk?.content && (
 						<p className="py-4 text-xs text-muted-foreground">No content available.</p>
 					)}
 				</div>
 			</PopoverContent>
 		</Popover>
 	);
 };
--- a/surfsense_web/components/editor-panel/editor-panel.tsx
+++ b/surfsense_web/components/editor-panel/editor-panel.tsx
@ -1,5 +1,6 @@
 "use client";
 import { FindReplacePlugin } from "@platejs/find-replace";
 import { useAtomValue, useSetAtom } from "jotai";
 import {
 	Check,
@ -14,17 +15,21 @@ import {
 import dynamic from "next/dynamic";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
 import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
 import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom";
 import { VersionHistoryButton } from "@/components/documents/version-history";
 import type { PlateEditorInstance } from "@/components/editor/plate-editor";
 import { SourceCodeEditor } from "@/components/editor/source-code-editor";
 import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Alert, AlertDescription } from "@/components/ui/alert";
 import { Button } from "@/components/ui/button";
 import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
 import { CITATION_HIGHLIGHT_CLASS } from "@/components/ui/search-highlight-node";
 import { Spinner } from "@/components/ui/spinner";
 import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
 import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils";
 import { buildCitationSearchCandidates } from "@/lib/citation-search";
 import { inferMonacoLanguageFromPath } from "@/lib/editor-language";
 const PlateEditor = dynamic(
@ -32,7 +37,10 @@ const PlateEditor = dynamic(
 	{ ssr: false, loading: () => <EditorPanelSkeleton /> }
 );
 type CitationHighlightStatus = "exact" | "miss";
 const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB
 const CITATION_MAX_LENGTH = 16 * 1024 * 1024; // 16MB on-demand cap for citation jumps
 interface EditorContent {
 	document_id: number;
@ -136,6 +144,61 @@ export function EditorPanelContent({
 	const [displayTitle, setDisplayTitle] = useState(title || "Untitled");
 	const isLocalFileMode = kind === "local_file";
 	const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown";
 	// --- Citation-jump highlight wiring ----------------------------------
 	// `EditorPanelContent` is the consumer of `pendingChunkHighlightAtom`: when
 	// a citation badge is clicked, the badge stages `{documentId, chunkId,
 	// chunkText}` and opens this panel. We drive Plate's `FindReplacePlugin`
 	// (registered in every preset) to highlight the cited text natively via
 	// Slate decorations — no DOM walking, no Range gymnastics. The state
 	// machine below escalates the document fetch from 2MB → 16MB once if no
 	// candidate snippet matched in the preview, and surfaces miss outcomes
 	// via an inline alert.
 	const pending = useAtomValue(pendingChunkHighlightAtom);
 	const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
 	const [fetchKey, setFetchKey] = useState(0);
 	const [maxLengthOverride, setMaxLengthOverride] = useState<number | null>(null);
 	const [highlightResult, setHighlightResult] = useState<CitationHighlightStatus | null>(null);
 	const editorRef = useRef<PlateEditorInstance | null>(null);
 	const escalatedForRef = useRef<number | null>(null);
 	const lastAppliedChunkIdRef = useRef<number | null>(null);
 	// Tracks whether a citation highlight is currently decorated in the
 	// editor. We use a ref (not state) because the click-to-dismiss handler
 	// runs in a stable callback that would otherwise close over stale state.
 	const isHighlightActiveRef = useRef(false);
 	// Once a citation jump targets this doc we have to keep `PlateEditor`
 	// mounted for the *rest of the doc session* — even after the highlight
 	// effect clears `pendingChunkHighlightAtom` (which it does as soon as
 	// the decoration is applied, so a follow-up citation on the same chunk
 	// can re-trigger). Without this latch, non-editable docs would re-render
 	// back into `MarkdownViewer` the instant `pending` is released, tearing
 	// down the Plate decorations and dropping the highlight after a frame.
 	const [stickyPlateMode, setStickyPlateMode] = useState(false);
 	const clearCitationSearch = useCallback(() => {
 		isHighlightActiveRef.current = false;
 		const editor = editorRef.current;
 		if (!editor) return;
 		try {
 			editor.setOption(FindReplacePlugin, "search", "");
 			editor.api.redecorate();
 		} catch (err) {
 			console.warn("[EditorPanelContent] clearCitationSearch failed:", err);
 		}
 	}, []);
 	// Dismiss the highlight when the user interacts with the editor surface.
 	// `onPointerDown` fires before focus / selection changes so the click
 	// itself feels responsive — the highlight clears in the same event tick
 	// that places the cursor. No-op when nothing is highlighted, so we don't
 	// thrash `redecorate` on every click in normal editing.
 	const handleEditorPointerDown = useCallback(() => {
 		if (!isHighlightActiveRef.current) return;
 		clearCitationSearch();
 		setHighlightResult(null);
 	}, [clearCitationSearch]);
 	const isCitationTarget = !!pending && !isLocalFileMode && pending.documentId === documentId;
 	const resolveLocalVirtualPath = useCallback(
 		async (candidatePath: string): Promise<string> => {
 			if (!electronAPI?.getAgentFilesystemMounts) {
@ -155,6 +218,8 @@ export function EditorPanelContent({
 	const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD;
 	// `fetchKey` is an explicit re-fetch trigger (escalation bumps it to force
 	// a new request even when documentId/searchSpaceId haven't changed).
 	useEffect(() => {
 		const controller = new AbortController();
 		setIsLoading(true);
@ -166,6 +231,12 @@ export function EditorPanelContent({
 		setIsEditing(false);
 		initialLoadDone.current = false;
 		changeCountRef.current = 0;
 		// Clear any in-flight FindReplacePlugin search before the editor
 		// re-mounts on new content (a fresh editor key is generated below
 		// from documentId + isEditing, so the previous editor + its
 		// decorations are about to be discarded anyway, but we belt-and-
 		// brace here for the case where only `fetchKey` changed).
 		clearCitationSearch();
 		const doFetch = async () => {
 			try {
@ -210,7 +281,11 @@ export function EditorPanelContent({
 				const url = new URL(
 					`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`
 				);
-				url.searchParams.set("max_length", String(LARGE_DOCUMENT_THRESHOLD));
+				url.searchParams.set("max_length", String(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD));
 				// `fetchKey` participates here so biome's noUnusedVariables sees it
 				// as consumed; bumping it forces a fresh request even when the URL
 				// is otherwise identical.
 				if (fetchKey > 0) url.searchParams.set("_n", String(fetchKey));
 				const response = await authenticatedFetch(url.toString(), { method: "GET" });
@ -256,8 +331,259 @@ export function EditorPanelContent({
 		resolveLocalVirtualPath,
 		searchSpaceId,
 		title,
 		fetchKey,
 		maxLengthOverride,
 		clearCitationSearch,
 	]);
 	// Reset citation-jump bookkeeping whenever the panel switches to a different
 	// document (or local file). Body only writes setters — the deps are the
 	// real triggers we want to react to.
 	// biome-ignore lint/correctness/useExhaustiveDependencies: documentId/localFilePath are intentional triggers.
 	useEffect(() => {
 		clearCitationSearch();
 		escalatedForRef.current = null;
 		lastAppliedChunkIdRef.current = null;
 		setHighlightResult(null);
 		setMaxLengthOverride(null);
 		setFetchKey(0);
 		// Drop sticky Plate mode when the panel moves to a different doc
 		// — the next doc starts in its preferred render mode (Plate for
 		// editable, MarkdownViewer for everything else) until/unless a
 		// citation jump targets it.
 		setStickyPlateMode(false);
 	}, [documentId, localFilePath, clearCitationSearch]);
 	// Latch sticky Plate mode the first time a citation jump targets this
 	// doc. We keep it sticky for the remainder of this doc session so the
 	// highlight effect's `setPendingHighlight(null)` doesn't unmount the
 	// editor mid-flight (see comment on `stickyPlateMode` declaration).
 	useEffect(() => {
 		if (isCitationTarget) setStickyPlateMode(true);
 	}, [isCitationTarget]);
 	// `isEditorReady` is what `useEffect` actually depends on — `editorRef`
 	// is a ref so changes don't trigger re-runs. We flip this to `true` once
 	// `PlateEditor` calls back with its live editor instance (its
 	// `usePlateEditor` value-init runs synchronously, so by the time this
 	// flips true the markdown is already deserialized into the Slate tree).
 	const [isEditorReady, setIsEditorReady] = useState(false);
 	const handleEditorReady = useCallback((editor: PlateEditorInstance | null) => {
 		console.log("[citation:editor] handleEditorReady", { ready: !!editor });
 		editorRef.current = editor;
 		setIsEditorReady(!!editor);
 	}, []);
 	// --- Citation jump highlight effect -----------------------------------
 	// Drives Plate's FindReplacePlugin to highlight the cited chunk:
 	//   1. Build candidate snippets from the chunk text (first sentence,
 	//      first 8 words, full chunk if short). Plate's decorate runs per-
 	//      block and won't cross block boundaries, so the shorter
 	//      candidates exist to give us something that fits in one
 	//      paragraph / heading.
 	//   2. For each candidate: setOption('search', ...) → redecorate →
 	//      wait two animation frames for React to flush → query the editor
 	//      DOM for `.${CITATION_HIGHLIGHT_CLASS}`. First hit wins.
 	//
 	//      Why a className and not a `data-*` attribute? Plate's
 	//      `PlateLeaf` runs its props through `useNodeAttributes`, which
 	//      only forwards `attributes`, `className`, `ref`, and `style` —
 	//      arbitrary `data-*` attributes are silently dropped. `className`
 	//      is the only escape hatch guaranteed to survive into the DOM.
 	//   3. On hit: smooth-scroll the first match into view, mark the
 	//      highlight active (so a click inside the editor can dismiss it),
 	//      release the pending atom.
 	//   4. On terminal miss: if the doc was truncated and we haven't
 	//      escalated yet, bump the fetch's `max_length` to the citation
 	//      cap and re-fetch — the post-refetch render will re-run this
 	//      effect against the larger preview. Otherwise, release the
 	//      atom and show the miss alert.
 	useEffect(() => {
 		console.log("[citation:effect] fired", {
 			isCitationTarget,
 			pendingDocId: pending?.documentId,
 			pendingChunkId: pending?.chunkId,
 			pendingChunkTextLen: pending?.chunkText?.length,
 			documentId,
 			isLocalFileMode,
 			isEditing,
 			hasMarkdown: !!editorDoc?.source_markdown,
 			markdownLen: editorDoc?.source_markdown?.length,
 			truncated: editorDoc?.truncated,
 			isEditorReady,
 			editorRefSet: !!editorRef.current,
 			maxLengthOverride,
 		});
 		if (!isCitationTarget || !pending) {
 			console.log("[citation:effect] guard ✗ no citation target / no pending");
 			return;
 		}
 		if (isLocalFileMode || isEditing) {
 			console.log("[citation:effect] guard ✗ localFileMode/editing");
 			return;
 		}
 		if (!editorDoc?.source_markdown) {
 			console.log("[citation:effect] guard ✗ source_markdown not ready");
 			return;
 		}
 		if (!isEditorReady) {
 			console.log("[citation:effect] guard ✗ editor not ready yet");
 			return;
 		}
 		const editor = editorRef.current;
 		if (!editor) {
 			console.log("[citation:effect] guard ✗ editorRef.current is null");
 			return;
 		}
 		if (lastAppliedChunkIdRef.current !== pending.chunkId) {
 			lastAppliedChunkIdRef.current = pending.chunkId;
 		}
 		let cancelled = false;
 		const finishMiss = () => {
 			console.log("[citation:effect] terminal miss — no candidate matched");
 			try {
 				editor.setOption(FindReplacePlugin, "search", "");
 				editor.api.redecorate();
 			} catch (err) {
 				console.warn("[EditorPanelContent] reset search after miss failed:", err);
 			}
 			const canEscalate =
 				editorDoc.truncated === true &&
 				(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD) < CITATION_MAX_LENGTH &&
 				escalatedForRef.current !== pending.chunkId;
 			console.log("[citation:effect] miss decision", {
 				truncated: editorDoc.truncated,
 				currentMaxLength: maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD,
 				canEscalate,
 			});
 			if (canEscalate) {
 				escalatedForRef.current = pending.chunkId;
 				setMaxLengthOverride(CITATION_MAX_LENGTH);
 				setFetchKey((k) => k + 1);
 				// Keep the atom set so the post-refetch render re-runs.
 				return;
 			}
 			setHighlightResult("miss");
 			setPendingHighlight(null);
 		};
 		const tryCandidates = async () => {
 			const candidates = buildCitationSearchCandidates(pending.chunkText);
 			console.log("[citation:effect] candidates built", {
 				count: candidates.length,
 				previews: candidates.map((c) => c.slice(0, 60)),
 			});
 			if (candidates.length === 0) {
 				if (!cancelled) finishMiss();
 				return;
 			}
 			// Resolve the editor's rendered DOM root via Slate's stable
 			// `[data-slate-editor="true"]` attribute (set by slate-react's
 			// `<Editable>`). Scoping queries to this root prevents
 			// `<mark>` elements rendered elsewhere on the page (e.g. chat
 			// search-highlight leaves in another mounted PlateEditor) from
 			// being mistaken for citation hits.
 			const editorRoot = document.querySelector<HTMLElement>('[data-slate-editor="true"]');
 			console.log("[citation:effect] editor root", {
 				hasRoot: !!editorRoot,
 			});
 			const root: ParentNode = editorRoot ?? document;
 			for (let i = 0; i < candidates.length; i++) {
 				const candidate = candidates[i];
 				if (cancelled) return;
 				try {
 					editor.setOption(FindReplacePlugin, "search", candidate);
 					editor.api.redecorate();
 					console.log(`[citation:effect] try #${i} setOption + redecorate`, {
 						len: candidate.length,
 						preview: candidate.slice(0, 80),
 					});
 				} catch (err) {
 					console.warn("[EditorPanelContent] setOption/redecorate failed:", err);
 					continue;
 				}
 				// Two rAFs: first lets Slate flush its onChange, second lets
 				// React commit the decoration leaves into the DOM.
 				await new Promise<void>((resolve) =>
 					requestAnimationFrame(() => requestAnimationFrame(() => resolve()))
 				);
 				if (cancelled) return;
 				// Primary probe: by our stable class on the rendered <mark>.
 				let el = root.querySelector<HTMLElement>(`.${CITATION_HIGHLIGHT_CLASS}`);
 				const classMarkCount = root.querySelectorAll(`.${CITATION_HIGHLIGHT_CLASS}`).length;
 				// Diagnostic fallback: any <mark> inside the editor root.
 				// If we ever see allMarks > 0 but classMarkCount === 0,
 				// the className was stripped again and we need to revisit
 				// `useNodeAttributes` filtering.
 				const allMarkCount = root.querySelectorAll("mark").length;
 				if (!el && allMarkCount > 0) {
 					el = root.querySelector<HTMLElement>("mark");
 				}
 				console.log(`[citation:effect] try #${i} DOM probe`, {
 					foundEl: !!el,
 					classMarkCount,
 					allMarkCount,
 					usedFallback: !!el && classMarkCount === 0,
 				});
 				if (el) {
 					try {
 						el.scrollIntoView({ block: "center", behavior: "smooth" });
 					} catch {
 						el.scrollIntoView();
 					}
 					isHighlightActiveRef.current = true;
 					setHighlightResult("exact");
 					console.log(`[citation:effect] ✓ exact via candidate #${i} — atom released`);
 					// No auto-clear timer — the highlight is intentionally
 					// permanent until the user clicks inside the editor (see
 					// `handleEditorPointerDown`) or another dismissal trigger
 					// fires (doc switch, edit-mode toggle, panel unmount,
 					// next citation jump). Sticky Plate mode keeps the
 					// editor mounted after the atom clears.
 					setPendingHighlight(null);
 					return;
 				}
 			}
 			if (!cancelled) finishMiss();
 		};
 		void tryCandidates();
 		return () => {
 			cancelled = true;
 		};
 	}, [
 		isCitationTarget,
 		pending,
 		documentId,
 		editorDoc?.source_markdown,
 		editorDoc?.truncated,
 		isLocalFileMode,
 		isEditing,
 		isEditorReady,
 		maxLengthOverride,
 		clearCitationSearch,
 		setPendingHighlight,
 	]);
 	// Cleanup any active highlight on unmount.
 	useEffect(() => {
 		return () => clearCitationSearch();
 	}, [clearCitationSearch]);
 	// Toggling into edit mode swaps Plate out of readOnly. Clear the citation
 	// search so stale leaves don't linger in the editing surface.
 	useEffect(() => {
 		if (isEditing) {
 			clearCitationSearch();
 			setHighlightResult(null);
 		}
 	}, [isEditing, clearCitationSearch]);
 	useEffect(() => {
 		return () => {
 			if (copyResetTimeoutRef.current) {
@ -367,6 +693,15 @@ export function EditorPanelContent({
 				EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) &&
 			!isLargeDocument
 		: false;
 	// Use PlateEditor for any of:
 	//   - Editable doc types (FILE/NOTE) — existing editing UX.
 	//   - Active citation jump in flight (`isCitationTarget`) — covers the
 	//     mount in the very first render where the atom is set but the
 	//     sticky effect hasn't fired yet.
 	//   - Sticky Plate mode latched on a previous citation jump — keeps
 	//     the editor mounted (with its decorations) after the highlight
 	//     effect clears the atom. Resets when the doc changes.
 	const renderInPlateEditor = isEditableType || isCitationTarget || stickyPlateMode;
 	const hasUnsavedChanges = editedMarkdown !== null;
 	const showDesktopHeader = !!onClose;
 	const showEditingActions = isEditableType && isEditing;
@ -381,6 +716,90 @@ export function EditorPanelContent({
 		setIsEditing(false);
 	}, [editorDoc?.source_markdown]);
 	const handleDownloadMarkdown = useCallback(async () => {
 		if (!searchSpaceId || !documentId) return;
 		setDownloading(true);
 		try {
 			const response = await authenticatedFetch(
 				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
 				{ method: "GET" }
 			);
 			if (!response.ok) throw new Error("Download failed");
 			const blob = await response.blob();
 			const url = URL.createObjectURL(blob);
 			const a = document.createElement("a");
 			a.href = url;
 			const disposition = response.headers.get("content-disposition");
 			const match = disposition?.match(/filename="(.+)"/);
 			a.download = match?.[1] ?? `${editorDoc?.title || "document"}.md`;
 			document.body.appendChild(a);
 			a.click();
 			a.remove();
 			URL.revokeObjectURL(url);
 			toast.success("Download started");
 		} catch {
 			toast.error("Failed to download document");
 		} finally {
 			setDownloading(false);
 		}
 	}, [documentId, editorDoc?.title, searchSpaceId]);
 	// We no longer surface an "approximate" status — Plate's FindReplacePlugin
 	// either decorates an exact match or it doesn't, and the candidate snippet
 	// strategy (first sentence → first 8 words → full chunk) means we either
 	// land on the citation start or fall through to the miss alert.
 	const showMissAlert = isCitationTarget && highlightResult === "miss";
 	const citationAlerts = showMissAlert && (
 		<Alert variant="destructive" className="mb-4">
 			<FileQuestionMark className="size-4" />
 			<AlertDescription className="flex items-center justify-between gap-4">
 				<span>Cited section couldn&apos;t be located in this view.</span>
 				{editorDoc?.truncated && (
 					<Button
 						variant="outline"
 						size="sm"
 						className="relative shrink-0"
 						disabled={downloading}
 						onClick={handleDownloadMarkdown}
 					>
 						<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
 							<Download className="size-3.5" />
 							Download .md
 						</span>
 						{downloading && <Spinner size="sm" className="absolute" />}
 					</Button>
 				)}
 			</AlertDescription>
 		</Alert>
 	);
 	const largeDocAlert = isLargeDocument && !isLocalFileMode && editorDoc && (
 		<Alert className="mb-4">
 			<FileText className="size-4" />
 			<AlertDescription className="flex items-center justify-between gap-4">
 				<span>
 					This document is too large for the editor (
 					{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
 					{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
 				</span>
 				<Button
 					variant="outline"
 					size="sm"
 					className="relative shrink-0"
 					disabled={downloading}
 					onClick={handleDownloadMarkdown}
 				>
 					<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
 						<Download className="size-3.5" />
 						Download .md
 					</span>
 					{downloading && <Spinner size="sm" className="absolute" />}
 				</Button>
 			</AlertDescription>
 		</Alert>
 	);
 	return (
 		<>
 			{showDesktopHeader ? (
@ -565,61 +984,6 @@ export function EditorPanelContent({
 							</p>
 						</div>
 					</div>
 				) : isLargeDocument && !isLocalFileMode ? (
 					<div className="h-full overflow-y-auto px-5 py-4">
 						<Alert className="mb-4">
 							<FileText className="size-4" />
 							<AlertDescription className="flex items-center justify-between gap-4">
 								<span>
 									This document is too large for the editor (
 									{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
 									{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
 								</span>
 								<Button
 									variant="outline"
 									size="sm"
 									className="relative shrink-0"
 									disabled={downloading}
 									onClick={async () => {
 										setDownloading(true);
 										try {
 											if (!searchSpaceId || !documentId) {
 												throw new Error("Missing document context");
 											}
 											const response = await authenticatedFetch(
 												`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
 												{ method: "GET" }
 											);
 											if (!response.ok) throw new Error("Download failed");
 											const blob = await response.blob();
 											const url = URL.createObjectURL(blob);
 											const a = document.createElement("a");
 											a.href = url;
 											const disposition = response.headers.get("content-disposition");
 											const match = disposition?.match(/filename="(.+)"/);
 											a.download = match?.[1] ?? `${editorDoc.title || "document"}.md`;
 											document.body.appendChild(a);
 											a.click();
 											a.remove();
 											URL.revokeObjectURL(url);
 											toast.success("Download started");
 										} catch {
 											toast.error("Failed to download document");
 										} finally {
 											setDownloading(false);
 										}
 									}}
 								>
 									<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
 										<Download className="size-3.5" />
 										Download .md
 									</span>
 									{downloading && <Spinner size="sm" className="absolute" />}
 								</Button>
 							</AlertDescription>
 						</Alert>
 						<MarkdownViewer content={editorDoc.source_markdown} />
 					</div>
 				) : editorRenderMode === "source_code" ? (
 					<div className="h-full overflow-hidden">
 						<SourceCodeEditor
@ -638,7 +1002,30 @@ export function EditorPanelContent({
 							}}
 						/>
 					</div>
-				) : isEditableType ? (
+				) : isLargeDocument && !isLocalFileMode && !isCitationTarget ? (
 					// Large doc, no active citation — fast Streamdown preview
 					// + download CTA. We only fall back to MarkdownViewer here
 					// because Plate is heavy on multi-MB docs and the user
 					// isn't waiting on a specific citation to render.
 					<div className="h-full overflow-y-auto px-5 py-4">
 						{largeDocAlert}
 						<MarkdownViewer content={editorDoc.source_markdown} />
 					</div>
 				) : renderInPlateEditor ? (
 					// Editable doc (FILE/NOTE) OR active citation jump (any
 					// doc type). The citation path uses Plate's
 					// FindReplacePlugin for native, decoration-based
 					// highlighting — see the citation-jump highlight effect
 					// above for how `editorRef` and `handleEditorReady` are
 					// wired.
 					<div className="flex h-full min-h-0 flex-col">
 						{(citationAlerts || (isLargeDocument && isCitationTarget && !isLocalFileMode)) && (
 							<div className="shrink-0 px-5 pt-4">
 								{isLargeDocument && isCitationTarget && largeDocAlert}
 								{citationAlerts}
 							</div>
 						)}
 						<div className="flex-1 min-h-0 overflow-hidden" onPointerDown={handleEditorPointerDown}>
 							<PlateEditor
 								key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
 								preset="full"
@ -651,7 +1038,10 @@ export function EditorPanelContent({
 								reserveToolbarSpace
 								defaultEditing={isEditing}
 								className="[&_[role=toolbar]]:!bg-sidebar"
 								onEditorReady={handleEditorReady}
 							/>
 						</div>
 					</div>
 				) : (
 					<div className="h-full overflow-y-auto px-5 py-4">
 						<MarkdownViewer content={editorDoc.source_markdown} />
--- a/surfsense_web/components/editor/plate-editor.tsx
+++ b/surfsense_web/components/editor/plate-editor.tsx
@ -12,6 +12,12 @@ import { type EditorPreset, presetMap } from "@/components/editor/presets";
 import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
 import { Editor, EditorContainer } from "@/components/ui/editor";
 /** Live editor instance returned by `usePlateEditor`. Exposed via the
 * `onEditorReady` prop so callers (e.g. `EditorPanelContent`) can drive
 * plugin options imperatively — most notably setting
 * `FindReplacePlugin`'s `search` option for citation-jump highlights. */
 export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
 export interface PlateEditorProps {
 	/** Markdown string to load as initial content */
 	markdown?: string;
@ -62,6 +68,15 @@ export interface PlateEditorProps {
 	 * without modifying the core editor component.
 	 */
 	extraPlugins?: AnyPluginConfig[];
 	/**
 	 * Called whenever the live editor instance (re)mounts, with `null` on
 	 * unmount. Used by callers that need to drive plugin options imperatively
 	 * — e.g. `EditorPanelContent` setting `FindReplacePlugin`'s `search`
 	 * option for citation-jump highlights. The callback is invoked exactly
 	 * once per editor lifetime (the parent's `key` prop forces a fresh
 	 * editor when needed, e.g. on edit-mode toggle).
 	 */
 	onEditorReady?: (editor: PlateEditorInstance | null) => void;
 }
 function PlateEditorContent({
@ -100,6 +115,7 @@ export function PlateEditor({
 	defaultEditing = false,
 	preset = "full",
 	extraPlugins = [],
 	onEditorReady,
 }: PlateEditorProps) {
 	const lastMarkdownRef = useRef(markdown);
 	const lastHtmlRef = useRef(html);
@ -156,6 +172,21 @@ export function PlateEditor({
 				: undefined,
 	});
 	// Expose the live editor instance to imperative callers (e.g. citation
 	// jump highlights). We deliberately don't depend on `onEditorReady`
 	// itself in the cleanup closure — callers commonly pass an arrow that
 	// closes over a stable ref setter, but if they pass a freshly-bound
 	// callback per render, the `onEditorReady?.(editor)` re-fires which is
 	// idempotent for ref-style setters.
 	const onEditorReadyRef = useRef(onEditorReady);
 	useEffect(() => {
 		onEditorReadyRef.current = onEditorReady;
 	}, [onEditorReady]);
 	useEffect(() => {
 		onEditorReadyRef.current?.(editor);
 		return () => onEditorReadyRef.current?.(null);
 	}, [editor]);
 	// Update editor content when html prop changes externally
 	useEffect(() => {
 		if (html !== undefined && html !== lastHtmlRef.current) {
--- a/surfsense_web/components/editor/presets.ts
+++ b/surfsense_web/components/editor/presets.ts
@ -1,5 +1,6 @@
 "use client";
 import { FindReplacePlugin } from "@platejs/find-replace";
 import type { AnyPluginConfig } from "platejs";
 import { TrailingBlockPlugin } from "platejs";
@ -17,6 +18,30 @@ import { SelectionKit } from "@/components/editor/plugins/selection-kit";
 import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit";
 import { TableKit } from "@/components/editor/plugins/table-kit";
 import { ToggleKit } from "@/components/editor/plugins/toggle-kit";
 import { SearchHighlightLeaf } from "@/components/ui/search-highlight-node";
 /**
 * Citation-jump highlighter. Re-uses Plate's built-in `FindReplacePlugin`
 * (decorate-only, no editing surface) to drive the "scroll-to-cited-text"
 * UX in `EditorPanelContent`. We register it in every preset because:
 *   - Decorate is a no-op when `search` is empty (single getOptions() check
 *     per block), so cost is effectively zero for non-citation viewers.
 *   - Keeping it preset-agnostic means citations work whether the doc is
 *     opened in editable (`full`) or pure-viewer (`readonly`) modes.
 *
 * The parent component drives `setOption(FindReplacePlugin, 'search', ...)`
 * + `editor.api.redecorate()` to trigger highlights, then queries the
 * editor DOM for `.citation-highlight-leaf` to scroll the first match
 * into view. (We can't use a `data-*` attribute here — Plate's
 * `PlateLeaf` runs props through `useNodeAttributes`, which only forwards
 * `attributes`, `className`, `ref`, `style`; arbitrary `data-*` props are
 * silently dropped.) See `components/ui/search-highlight-node.tsx` for
 * the leaf component and `CITATION_HIGHLIGHT_CLASS` constant.
 */
 const CitationFindReplacePlugin = FindReplacePlugin.configure({
 	options: { search: "" },
 	render: { node: SearchHighlightLeaf },
 });
 /**
 * Full preset – every plugin kit enabled.
@ -38,6 +63,7 @@ export const fullPreset: AnyPluginConfig[] = [
 	...AutoformatKit,
 	...DndKit,
 	TrailingBlockPlugin,
 	CitationFindReplacePlugin,
 ];
 /**
@ -52,6 +78,7 @@ export const minimalPreset: AnyPluginConfig[] = [
 	...LinkKit,
 	...AutoformatKit,
 	TrailingBlockPlugin,
 	CitationFindReplacePlugin,
 ];
 /**
@ -68,6 +95,7 @@ export const readonlyPreset: AnyPluginConfig[] = [
 	...CalloutKit,
 	...ToggleKit,
 	...MathKit,
 	CitationFindReplacePlugin,
 ];
 /** All available preset names */
--- a/surfsense_web/components/new-chat/source-detail-panel.tsx
+++ b/surfsense_web/components/new-chat/source-detail-panel.tsx
@ -1,719 +0,0 @@
 "use client";
 import { useQuery } from "@tanstack/react-query";
 import {
 	BookOpen,
 	ChevronDown,
 	ChevronUp,
 	ExternalLink,
 	FileQuestionMark,
 	FileText,
 	Hash,
 	Loader2,
 	Sparkles,
 	X,
 } from "lucide-react";
 import { AnimatePresence, motion, useReducedMotion } from "motion/react";
 import { useTranslations } from "next-intl";
 import type React from "react";
 import { forwardRef, memo, type ReactNode, useCallback, useEffect, useRef, useState } from "react";
 import { createPortal } from "react-dom";
 import { MarkdownViewer } from "@/components/markdown-viewer";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { ScrollArea } from "@/components/ui/scroll-area";
 import { Spinner } from "@/components/ui/spinner";
 import type {
 	GetDocumentByChunkResponse,
 	GetSurfsenseDocsByChunkResponse,
 } from "@/contracts/types/document.types";
 import { documentsApiService } from "@/lib/apis/documents-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";
 import { cn } from "@/lib/utils";
 type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse;
 interface SourceDetailPanelProps {
 	open: boolean;
 	onOpenChange: (open: boolean) => void;
 	chunkId: number;
 	sourceType: string;
 	title: string;
 	description?: string;
 	url?: string;
 	children?: ReactNode;
 	isDocsChunk?: boolean;
 }
 const formatDocumentType = (type: string) => {
 	if (!type) return "";
 	return type
 		.split("_")
 		.map((word) => word.charAt(0) + word.slice(1).toLowerCase())
 		.join(" ");
 };
 // Chunk card component
 // For large documents (>30 chunks), we disable animation to prevent layout shifts
 // which break auto-scroll functionality
 interface ChunkCardProps {
 	chunk: { id: number; content: string };
 	localIndex: number;
 	chunkNumber: number;
 	totalChunks: number;
 	isCited: boolean;
 	isActive: boolean;
 	disableLayoutAnimation?: boolean;
 }
 const ChunkCard = memo(
 	forwardRef<HTMLDivElement, ChunkCardProps>(
 		({ chunk, localIndex, chunkNumber, totalChunks, isCited }, ref) => {
 			return (
 				<div
 					ref={ref}
 					data-chunk-index={localIndex}
 					className={cn(
 						"group relative rounded-2xl border-2 transition-all duration-300",
 						isCited
 							? "bg-linear-to-br from-primary/5 via-primary/10 to-primary/5 border-primary shadow-lg shadow-primary/10"
 							: "bg-card border-border/50 hover:border-border hover:shadow-md"
 					)}
 				>
 					{isCited && <div className="absolute inset-0 rounded-2xl bg-primary/5 blur-xl -z-10" />}
 					<div className="flex items-center justify-between px-5 py-4 border-b border-border/50">
 						<div className="flex items-center gap-3">
 							<div
 								className={cn(
 									"flex items-center justify-center w-8 h-8 rounded-full text-sm font-semibold transition-colors",
 									isCited
 										? "bg-primary text-primary-foreground"
 										: "bg-muted text-muted-foreground group-hover:bg-muted/80"
 								)}
 							>
 								{chunkNumber}
 							</div>
 							<span className="text-sm text-muted-foreground">
 								Chunk {chunkNumber} of {totalChunks}
 							</span>
 						</div>
 						{isCited && (
 							<Badge variant="default" className="gap-1.5 px-3 py-1">
 								<Sparkles className="h-3 w-3" />
 								Cited Source
 							</Badge>
 						)}
 					</div>
 					<div className="p-5 overflow-hidden">
 						<MarkdownViewer content={chunk.content} maxLength={100_000} />
 					</div>
 				</div>
 			);
 		}
 	)
 );
 ChunkCard.displayName = "ChunkCard";
 export function SourceDetailPanel({
 	open,
 	onOpenChange,
 	chunkId,
 	sourceType,
 	title,
 	description,
 	url,
 	children,
 	isDocsChunk = false,
 }: SourceDetailPanelProps) {
 	const t = useTranslations("dashboard");
 	const scrollAreaRef = useRef<HTMLDivElement>(null);
 	const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
 	const scrollTimersRef = useRef<ReturnType<typeof setTimeout>[]>([]);
 	const [activeChunkIndex, setActiveChunkIndex] = useState<number | null>(null);
 	const [mounted, setMounted] = useState(false);
 	const shouldReduceMotion = useReducedMotion();
 	useEffect(() => {
 		setMounted(true);
 	}, []);
 	const {
 		data: documentData,
 		isLoading: isDocumentByChunkFetching,
 		error: documentByChunkFetchingError,
 	} = useQuery<DocumentData>({
 		queryKey: isDocsChunk
 			? cacheKeys.documents.byChunk(`doc-${chunkId}`)
 			: cacheKeys.documents.byChunk(chunkId.toString()),
 		queryFn: async () => {
 			if (isDocsChunk) {
 				return documentsApiService.getSurfsenseDocByChunk(chunkId);
 			}
 			return documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 5 });
 		},
 		enabled: !!chunkId && open,
 		staleTime: 5 * 60 * 1000,
 	});
 	const totalChunks =
 		documentData && "total_chunks" in documentData
 			? (documentData.total_chunks ?? documentData.chunks.length)
 			: (documentData?.chunks?.length ?? 0);
 	const [beforeChunks, setBeforeChunks] = useState<
 		Array<{ id: number; content: string; created_at: string }>
 	>([]);
 	const [afterChunks, setAfterChunks] = useState<
 		Array<{ id: number; content: string; created_at: string }>
 	>([]);
 	const [loadingBefore, setLoadingBefore] = useState(false);
 	const [loadingAfter, setLoadingAfter] = useState(false);
 	useEffect(() => {
 		setBeforeChunks([]);
 		setAfterChunks([]);
 	}, [chunkId, open]);
 	const chunkStartIndex =
 		documentData && "chunk_start_index" in documentData ? (documentData.chunk_start_index ?? 0) : 0;
 	const initialChunks = documentData?.chunks ?? [];
 	const allChunks = [...beforeChunks, ...initialChunks, ...afterChunks];
 	const absoluteStart = chunkStartIndex - beforeChunks.length;
 	const absoluteEnd = chunkStartIndex + initialChunks.length + afterChunks.length;
 	const canLoadBefore = absoluteStart > 0;
 	const canLoadAfter = absoluteEnd < totalChunks;
 	const EXPAND_SIZE = 10;
 	const loadBefore = useCallback(async () => {
 		if (!documentData || !("search_space_id" in documentData) || !canLoadBefore) return;
 		setLoadingBefore(true);
 		try {
 			const count = Math.min(EXPAND_SIZE, absoluteStart);
 			const result = await documentsApiService.getDocumentChunks({
 				document_id: documentData.id,
 				page: 0,
 				page_size: count,
 				start_offset: absoluteStart - count,
 			});
 			const existingIds = new Set(allChunks.map((c) => c.id));
 			const newChunks = result.items
 				.filter((c) => !existingIds.has(c.id))
 				.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
 			setBeforeChunks((prev) => [...newChunks, ...prev]);
 		} catch (err) {
 			console.error("Failed to load earlier chunks:", err);
 		} finally {
 			setLoadingBefore(false);
 		}
 	}, [documentData, absoluteStart, canLoadBefore, allChunks]);
 	const loadAfter = useCallback(async () => {
 		if (!documentData || !("search_space_id" in documentData) || !canLoadAfter) return;
 		setLoadingAfter(true);
 		try {
 			const result = await documentsApiService.getDocumentChunks({
 				document_id: documentData.id,
 				page: 0,
 				page_size: EXPAND_SIZE,
 				start_offset: absoluteEnd,
 			});
 			const existingIds = new Set(allChunks.map((c) => c.id));
 			const newChunks = result.items
 				.filter((c) => !existingIds.has(c.id))
 				.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
 			setAfterChunks((prev) => [...prev, ...newChunks]);
 		} catch (err) {
 			console.error("Failed to load later chunks:", err);
 		} finally {
 			setLoadingAfter(false);
 		}
 	}, [documentData, absoluteEnd, canLoadAfter, allChunks]);
 	const isDirectRenderSource =
 		sourceType === "TAVILY_API" ||
 		sourceType === "LINKUP_API" ||
 		sourceType === "SEARXNG_API" ||
 		sourceType === "BAIDU_SEARCH_API";
 	const citedChunkIndex = allChunks.findIndex((chunk) => chunk.id === chunkId);
 	// Simple scroll function that scrolls to a chunk by index
 	const scrollToChunkByIndex = useCallback(
 		(chunkIndex: number, smooth = true) => {
 			const scrollContainer = scrollAreaRef.current;
 			if (!scrollContainer) return;
 			const viewport = scrollContainer.querySelector(
 				"[data-radix-scroll-area-viewport]"
 			) as HTMLElement | null;
 			if (!viewport) return;
 			const chunkElement = scrollContainer.querySelector(
 				`[data-chunk-index="${chunkIndex}"]`
 			) as HTMLElement | null;
 			if (!chunkElement) return;
 			// Get positions using getBoundingClientRect for accuracy
 			const viewportRect = viewport.getBoundingClientRect();
 			const chunkRect = chunkElement.getBoundingClientRect();
 			// Calculate where to scroll to center the chunk
 			const currentScrollTop = viewport.scrollTop;
 			const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
 			const scrollTarget =
 				chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
 			viewport.scrollTo({
 				top: Math.max(0, scrollTarget),
 				behavior: smooth && !shouldReduceMotion ? "smooth" : "auto",
 			});
 			setActiveChunkIndex(chunkIndex);
 		},
 		[shouldReduceMotion]
 	);
 	// Callback ref for the cited chunk - scrolls when the element mounts
 	const citedChunkRefCallback = useCallback(
 		(node: HTMLDivElement | null) => {
 			if (node && !hasScrolledRef.current && open) {
 				hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls
 				// Store the node reference for the delayed scroll
 				const scrollToCitedChunk = () => {
 					const scrollContainer = scrollAreaRef.current;
 					if (!scrollContainer || !node.isConnected) return false;
 					const viewport = scrollContainer.querySelector(
 						"[data-radix-scroll-area-viewport]"
 					) as HTMLElement | null;
 					if (!viewport) return false;
 					// Get positions
 					const viewportRect = viewport.getBoundingClientRect();
 					const chunkRect = node.getBoundingClientRect();
 					// Calculate scroll position to center the chunk
 					const currentScrollTop = viewport.scrollTop;
 					const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
 					const scrollTarget =
 						chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
 					viewport.scrollTo({
 						top: Math.max(0, scrollTarget),
 						behavior: "auto", // Instant scroll for initial positioning
 					});
 					return true;
 				};
 				// Scroll multiple times with delays to handle progressive content rendering
 				// Each subsequent scroll will correct for any layout shifts
 				const scrollAttempts = [50, 150, 300, 600, 1000];
 				scrollAttempts.forEach((delay) => {
 					scrollTimersRef.current.push(
 						setTimeout(() => {
 							scrollToCitedChunk();
 						}, delay)
 					);
 				});
 				// After final attempt, mark the cited chunk as active
 				scrollTimersRef.current.push(
 					setTimeout(
 						() => {
 							setActiveChunkIndex(citedChunkIndex);
 						},
 						scrollAttempts[scrollAttempts.length - 1] + 50
 					)
 				);
 			}
 		},
 		[open, citedChunkIndex]
 	);
 	// Reset scroll state when panel closes
 	useEffect(() => {
 		if (!open) {
 			scrollTimersRef.current.forEach(clearTimeout);
 			scrollTimersRef.current = [];
 			hasScrolledRef.current = false;
 			setActiveChunkIndex(null);
 		}
 		return () => {
 			scrollTimersRef.current.forEach(clearTimeout);
 			scrollTimersRef.current = [];
 		};
 	}, [open]);
 	// Handle escape key
 	useEffect(() => {
 		const handleEscape = (e: KeyboardEvent) => {
 			if (e.key === "Escape" && open) {
 				onOpenChange(false);
 			}
 		};
 		window.addEventListener("keydown", handleEscape);
 		return () => window.removeEventListener("keydown", handleEscape);
 	}, [open, onOpenChange]);
 	// Prevent body scroll when open
 	useEffect(() => {
 		if (open) {
 			document.body.style.overflow = "hidden";
 		} else {
 			document.body.style.overflow = "";
 		}
 		return () => {
 			document.body.style.overflow = "";
 		};
 	}, [open]);
 	const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => {
 		e.preventDefault();
 		e.stopPropagation();
 		window.open(clickUrl, "_blank", "noopener,noreferrer");
 	};
 	const scrollToChunk = useCallback(
 		(index: number) => {
 			scrollToChunkByIndex(index, true);
 		},
 		[scrollToChunkByIndex]
 	);
 	const panelContent = (
 		<AnimatePresence mode="wait">
 			{open && (
 				<>
 					{/* Backdrop */}
 					<motion.div
 						key="backdrop"
 						initial={{ opacity: 0 }}
 						animate={{ opacity: 1 }}
 						exit={{ opacity: 0 }}
 						transition={{ duration: 0.2 }}
 						className="fixed inset-0 z-50 bg-black/60 backdrop-blur-sm"
 						onClick={() => onOpenChange(false)}
 					/>
 					{/* Panel */}
 					<motion.div
 						key="panel"
 						initial={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
 						animate={{ opacity: 1, scale: 1, y: 0 }}
 						exit={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
 						transition={{
 							type: "spring",
 							damping: 30,
 							stiffness: 300,
 						}}
 						className="fixed inset-3 sm:inset-6 md:inset-10 lg:inset-16 z-50 flex flex-col bg-background rounded-3xl shadow-2xl border overflow-hidden"
 					>
 						{/* Header */}
 						<motion.div
 							initial={{ opacity: 0, y: -10 }}
 							animate={{ opacity: 1, y: 0 }}
 							transition={{ delay: 0.1 }}
 							className="flex items-center justify-between px-6 py-5 border-b bg-linear-to-r from-muted/50 to-muted/30"
 						>
 							<div className="min-w-0 flex-1">
 								<h2 className="text-xl font-semibold truncate">
 									{documentData?.title || title || "Source Document"}
 								</h2>
 								<p className="text-sm text-muted-foreground mt-0.5">
 									{documentData && "document_type" in documentData
 										? formatDocumentType(documentData.document_type)
 										: sourceType && formatDocumentType(sourceType)}
 									{totalChunks > 0 && (
 										<span className="ml-2">
 											• {totalChunks} chunk{totalChunks !== 1 ? "s" : ""}
 											{allChunks.length < totalChunks && ` (showing ${allChunks.length})`}
 										</span>
 									)}
 								</p>
 							</div>
 							<div className="flex items-center gap-3 shrink-0">
 								{url && (
 									<Button
 										size="sm"
 										variant="outline"
 										onClick={(e) => handleUrlClick(e, url)}
 										className="hidden sm:flex gap-2 rounded-xl"
 									>
 										<ExternalLink className="h-4 w-4" />
 										Open Source
 									</Button>
 								)}
 								<Button
 									size="icon"
 									variant="ghost"
 									onClick={() => onOpenChange(false)}
 									className="h-8 w-8 rounded-full"
 								>
 									<X className="h-4 w-4" />
 									<span className="sr-only">Close</span>
 								</Button>
 							</div>
 						</motion.div>
 						{/* Loading State */}
 						{!isDirectRenderSource && isDocumentByChunkFetching && (
 							<div className="flex-1 flex items-center justify-center">
 								<motion.div
 									initial={{ opacity: 0, scale: 0.9 }}
 									animate={{ opacity: 1, scale: 1 }}
 									className="flex flex-col items-center gap-4"
 								>
 									<Spinner size="lg" />
 									<p className="text-sm text-muted-foreground font-medium">
 										{t("loading_document")}
 									</p>
 								</motion.div>
 							</div>
 						)}
 						{/* Error State */}
 						{!isDirectRenderSource && documentByChunkFetchingError && (
 							<div className="flex-1 flex items-center justify-center">
 								<motion.div
 									initial={{ opacity: 0, scale: 0.9 }}
 									animate={{ opacity: 1, scale: 1 }}
 									className="flex flex-col items-center gap-4 text-center px-6"
 								>
 									<div className="w-20 h-20 rounded-full bg-muted/50 flex items-center justify-center">
 										<FileQuestionMark className="h-10 w-10 text-muted-foreground" />
 									</div>
 									<div>
 										<p className="font-semibold text-foreground text-lg">Document unavailable</p>
 										<p className="text-sm text-muted-foreground mt-2 max-w-md">
 											{documentByChunkFetchingError.message ||
 												"An unexpected error occurred. Please try again."}
 										</p>
 									</div>
 									<Button variant="outline" onClick={() => onOpenChange(false)} className="mt-2">
 										Close Panel
 									</Button>
 								</motion.div>
 							</div>
 						)}
 						{/* Direct render for web search providers */}
 						{isDirectRenderSource && (
 							<ScrollArea className="flex-1">
 								<div className="p-6 max-w-3xl mx-auto">
 									{url && (
 										<Button
 											size="default"
 											variant="outline"
 											onClick={(e) => handleUrlClick(e, url)}
 											className="w-full mb-6 sm:hidden rounded-xl"
 										>
 											<ExternalLink className="mr-2 h-4 w-4" />
 											Open in Browser
 										</Button>
 									)}
 									<motion.div
 										initial={{ opacity: 0, y: 10 }}
 										animate={{ opacity: 1, y: 0 }}
 										className="p-6 bg-muted/50 rounded-2xl border"
 									>
 										<h3 className="text-base font-semibold mb-4 flex items-center gap-2">
 											<BookOpen className="h-4 w-4" />
 											Source Information
 										</h3>
 										<div className="text-sm text-muted-foreground mb-3 font-medium">
 											{title || "Untitled"}
 										</div>
 										<div className="text-sm text-foreground leading-relaxed">
 											{description || "No content available"}
 										</div>
 									</motion.div>
 								</div>
 							</ScrollArea>
 						)}
 						{/* API-fetched document content */}
 						{!isDirectRenderSource && documentData && (
 							<div className="flex-1 flex overflow-hidden">
 								{/* Chunk Navigation Sidebar */}
 								{allChunks.length > 1 && (
 									<motion.div
 										initial={{ opacity: 0, x: -20 }}
 										animate={{ opacity: 1, x: 0 }}
 										transition={{ delay: 0.2 }}
 										className="hidden lg:flex flex-col w-16 border-r bg-muted/10 overflow-hidden"
 									>
 										<ScrollArea className="flex-1 h-full">
 											<div className="p-2 pt-3 flex flex-col gap-1.5">
 												{allChunks.map((chunk, idx) => {
 													const absNum = absoluteStart + idx + 1;
 													const isCited = chunk.id === chunkId;
 													const isActive = activeChunkIndex === idx;
 													return (
 														<motion.button
 															key={chunk.id}
 															type="button"
 															onClick={() => scrollToChunk(idx)}
 															initial={{ opacity: 0, scale: 0.8 }}
 															animate={{ opacity: 1, scale: 1 }}
 															transition={{ delay: Math.min(idx * 0.02, 0.2) }}
 															className={cn(
 																"relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center",
 																isCited
 																	? "bg-primary text-primary-foreground shadow-md"
 																	: isActive
 																		? "bg-muted text-foreground"
 																		: "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground"
 															)}
 															title={isCited ? `Chunk ${absNum} (Cited)` : `Chunk ${absNum}`}
 														>
 															{absNum}
 															{isCited && (
 																<span className="absolute -top-1.5 -right-1.5 flex items-center justify-center w-4 h-4 bg-primary rounded-full border-2 border-background shadow-sm">
 																	<Sparkles className="h-2.5 w-2.5 text-primary-foreground" />
 																</span>
 															)}
 														</motion.button>
 													);
 												})}
 											</div>
 										</ScrollArea>
 									</motion.div>
 								)}
 								{/* Main Content */}
 								<ScrollArea className="flex-1" ref={scrollAreaRef}>
 									<div className="p-6 lg:p-8 max-w-4xl mx-auto space-y-6">
 										{/* Document Metadata */}
 										{"document_metadata" in documentData &&
 											documentData.document_metadata &&
 											Object.keys(documentData.document_metadata).length > 0 && (
 												<motion.div
 													initial={{ opacity: 0, y: 10 }}
 													animate={{ opacity: 1, y: 0 }}
 													transition={{ delay: 0.1 }}
 													className="p-5 bg-muted/30 rounded-2xl border"
 												>
 													<h3 className="text-sm font-semibold mb-4 text-muted-foreground uppercase tracking-wider flex items-center gap-2">
 														<FileText className="h-4 w-4" />
 														Document Information
 													</h3>
 													<dl className="grid grid-cols-1 sm:grid-cols-2 gap-4 text-sm">
 														{Object.entries(documentData.document_metadata).map(([key, value]) => (
 															<div key={key} className="space-y-1">
 																<dt className="font-medium text-muted-foreground capitalize text-xs">
 																	{key.replace(/_/g, " ")}
 																</dt>
 																<dd className="text-foreground wrap-break-word">{String(value)}</dd>
 															</div>
 														))}
 													</dl>
 												</motion.div>
 											)}
 										{/* Chunks Header */}
 										<div className="flex items-center justify-between pt-2">
 											<h3 className="text-sm font-semibold text-muted-foreground uppercase tracking-wider flex items-center gap-2">
 												<Hash className="h-4 w-4" />
 												Chunks {absoluteStart + 1}–{absoluteEnd} of {totalChunks}
 											</h3>
 											{citedChunkIndex !== -1 && (
 												<Button
 													variant="ghost"
 													size="sm"
 													onClick={() => scrollToChunk(citedChunkIndex)}
 													className="gap-2 text-primary hover:text-primary"
 												>
 													<Sparkles className="h-3.5 w-3.5" />
 													Jump to cited
 												</Button>
 											)}
 										</div>
 										{/* Load Earlier */}
 										{canLoadBefore && (
 											<div className="flex items-center justify-center">
 												<Button
 													variant="outline"
 													size="sm"
 													onClick={loadBefore}
 													disabled={loadingBefore}
 													className="gap-2"
 												>
 													{loadingBefore ? (
 														<Loader2 className="h-3.5 w-3.5 animate-spin" />
 													) : (
 														<ChevronUp className="h-3.5 w-3.5" />
 													)}
 													{loadingBefore
 														? "Loading..."
 														: `Load ${Math.min(EXPAND_SIZE, absoluteStart)} earlier chunks`}
 												</Button>
 											</div>
 										)}
 										{/* Chunks */}
 										<div className="space-y-4">
 											{allChunks.map((chunk, idx) => {
 												const isCited = chunk.id === chunkId;
 												const chunkNumber = absoluteStart + idx + 1;
 												return (
 													<ChunkCard
 														key={chunk.id}
 														ref={isCited ? citedChunkRefCallback : undefined}
 														chunk={chunk}
 														localIndex={idx}
 														chunkNumber={chunkNumber}
 														totalChunks={totalChunks}
 														isCited={isCited}
 														isActive={activeChunkIndex === idx}
 														disableLayoutAnimation={allChunks.length > 30}
 													/>
 												);
 											})}
 										</div>
 										{/* Load Later */}
 										{canLoadAfter && (
 											<div className="flex items-center justify-center py-3">
 												<Button
 													variant="outline"
 													size="sm"
 													onClick={loadAfter}
 													disabled={loadingAfter}
 													className="gap-2"
 												>
 													{loadingAfter ? (
 														<Loader2 className="h-3.5 w-3.5 animate-spin" />
 													) : (
 														<ChevronDown className="h-3.5 w-3.5" />
 													)}
 													{loadingAfter
 														? "Loading..."
 														: `Load ${Math.min(EXPAND_SIZE, totalChunks - absoluteEnd)} later chunks`}
 												</Button>
 											</div>
 										)}
 									</div>
 								</ScrollArea>
 							</div>
 						)}
 					</motion.div>
 				</>
 			)}
 		</AnimatePresence>
 	);
 	if (!mounted) return <>{children}</>;
 	return (
 		<>
 			{children}
 			{createPortal(panelContent, globalThis.document.body)}
 		</>
 	);
 }
--- a/surfsense_web/components/settings/user-settings-dialog.tsx
+++ b/surfsense_web/components/settings/user-settings-dialog.tsx
@ -67,9 +67,6 @@ const DesktopShortcutsContent = dynamic(
 		import(
 			"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
 		).then((m) => ({ default: m.DesktopShortcutsContent })),
 		import(
 			"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
 		).then((m) => ({ default: m.DesktopShortcutsContent })),
 	{ ssr: false }
 );
 const MemoryContent = dynamic(
--- a/surfsense_web/components/ui/search-highlight-node.tsx
+++ b/surfsense_web/components/ui/search-highlight-node.tsx
@ -0,0 +1,45 @@
 "use client";
 import type { PlateLeafProps } from "platejs/react";
 import { PlateLeaf } from "platejs/react";
 /**
 * Stable class name used to identify Plate-rendered citation highlight
 * leaves in the DOM. We can't use a `data-*` attribute here — Plate's
 * `PlateLeaf` runs its props through `useNodeAttributes`, which only
 * forwards `attributes`, `className`, `ref`, and `style` to the rendered
 * element; arbitrary `data-*` props are silently dropped (verified
 * against `@platejs/core/dist/react/index.js` v52). So `className` is
 * the only escape hatch that's guaranteed to survive into the DOM.
 */
 export const CITATION_HIGHLIGHT_CLASS = "citation-highlight-leaf";
 /**
 * Leaf rendered for ranges decorated by `@platejs/find-replace`'s
 * `FindReplacePlugin`. We re-purpose that plugin to drive the citation-jump
 * highlight: when a citation is staged, the parent sets the plugin's `search`
 * option to a snippet of the chunk text and Plate decorates every match with
 * `searchHighlight: true`. This component renders those decorations as a
 * `<mark>` tagged with `CITATION_HIGHLIGHT_CLASS` so the parent can:
 *   1. Query the first match in DOM order to scroll it into view.
 *   2. Detect the active-highlight state without a separate React ref.
 *
 * The highlight is **persistent** — it does not auto-fade. The parent in
 * `EditorPanelContent` clears it by setting the plugin's `search` option
 * back to "" when one of: (a) the user clicks anywhere inside the editor,
 * (b) the panel switches to a different document, (c) the user toggles
 * into edit mode, (d) another citation jump is staged, (e) the panel
 * unmounts. We use a brief entrance pulse (`citation-flash-in`, see
 * `globals.css`) purely to draw the eye after `scrollIntoView` lands.
 */
 export function SearchHighlightLeaf(props: PlateLeafProps) {
 	return (
 		<PlateLeaf
 			{...props}
 			as="mark"
 			className={`${CITATION_HIGHLIGHT_CLASS} bg-primary/15 ring-1 ring-primary/40 rounded-sm px-0.5 text-inherit animate-[citation-flash-in_400ms_ease-out]`}
 		>
 			{props.children}
 		</PlateLeaf>
 	);
 }
--- a/surfsense_web/lib/citation-search.ts
+++ b/surfsense_web/lib/citation-search.ts
@ -0,0 +1,125 @@
 /**
 * Snippet generation for the citation-jump highlight, driven by Plate's
 * `FindReplacePlugin`. The plugin runs `decorate` per-block and only matches
 * within blocks whose children are all `Text` nodes (so it crosses inline
 * marks like bold/italic but **not** block boundaries, and a block that
 * contains even one inline element such as a link is silently skipped).
 * That means a full chunk that spans heading + paragraph won't match as a
 * single string — we have to pick a shorter snippet that fits inside one
 * rendered block.
 *
 * `buildCitationSearchCandidates` returns search strings ordered from
 * "most-specific anchor" to "broadest fallback":
 *   1. First sentence of the chunk (capped at `FIRST_SENTENCE_MAX`).
 *   2. First `FIRST_PHRASE_WORDS` words.
 *   3. Each non-trivial line of the chunk, in source order — gives us a
 *      separate attempt for each rendered block, so a heading line with
 *      an inline link doesn't doom the whole jump.
 *   4. Full chunk (only if it's already short enough to plausibly fit
 *      inside one block).
 *
 * The caller tries each candidate in turn — set the plugin's `search`
 * option, `editor.api.redecorate()`, then check the editor DOM for a
 * `.citation-highlight-leaf` element. First candidate that produces one
 * wins; subsequent candidates are skipped.
 */
 const FIRST_SENTENCE_MAX = 120;
 const FIRST_PHRASE_WORDS = 8;
 const MIN_SNIPPET_LENGTH = 6;
 const FULL_CHUNK_MAX = FIRST_SENTENCE_MAX * 2;
 const MAX_LINE_CANDIDATES = 6;
 const LINE_CANDIDATE_MAX = FIRST_SENTENCE_MAX;
 function normalizeWhitespace(input: string): string {
 	return input.replace(/\s+/g, " ").trim();
 }
 /**
 * Strip the markdown syntax that won't survive into the rendered editor's
 * plain text, so the chunk text (which comes back from the indexer as raw
 * source markdown) can be matched against the literal text values stored
 * in Plate's Slate tree.
 *
 * Order matters: handle multi-char and "container" syntax before single-
 * char emphasis, otherwise `**text**` collapses to `*text*` first.
 *
 * Heuristic only — we don't aim to be a full markdown parser, just to
 * remove the common markers (`**bold**`, `[text](url)`, `# headings`,
 * `- list`, etc.) that show up in connector-doc chunks and would break
 * literal substring search.
 */
 export function stripMarkdownForMatch(input: string): string {
 	let s = input;
 	s = s.replace(/```[a-z0-9_+-]*\n?([\s\S]*?)```/gi, (_, body: string) => body);
 	s = s.replace(/<!--[\s\S]*?-->/g, " ");
 	s = s.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
 	s = s.replace(/!\[([^\]]*)\]\[[^\]]*\]/g, "$1");
 	s = s.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1");
 	s = s.replace(/\[([^\]]+)\]\[[^\]]*\]/g, "$1");
 	s = s.replace(/<((?:https?|mailto):[^>\s]+)>/g, "$1");
 	s = s.replace(/`+([^`\n]+?)`+/g, "$1");
 	s = s.replace(/(\*\*|__)([\s\S]+?)\1/g, "$2");
 	s = s.replace(/(?<!\w)([*_])([^*_\n]+?)\1(?!\w)/g, "$2");
 	s = s.replace(/~~([^~]+)~~/g, "$1");
 	s = s.replace(/^[ \t]{0,3}#{1,6}[ \t]+/gm, "");
 	s = s.replace(/^[ \t]{0,3}(?:=+|-+)[ \t]*$/gm, "");
 	s = s.replace(/^[ \t]{0,3}>+[ \t]?/gm, "");
 	s = s.replace(/^[ \t]*[-*+][ \t]+/gm, "");
 	s = s.replace(/^[ \t]*\d+\.[ \t]+/gm, "");
 	s = s.replace(/^[ \t]{0,3}(?:[-*_])(?:[ \t]*[-*_]){2,}[ \t]*$/gm, "");
 	s = s.replace(/^[ \t]*\|?(?:[ \t]*:?-+:?[ \t]*\|)+[ \t]*:?-+:?[ \t]*\|?[ \t]*$/gm, "");
 	s = s.replace(/\\([\\`*_{}[\]()#+\-.!~>])/g, "$1");
 	return s;
 }
 export function buildCitationSearchCandidates(rawText: string): string[] {
 	if (!rawText) return [];
 	const stripped = stripMarkdownForMatch(rawText);
 	const normalized = normalizeWhitespace(stripped);
 	if (normalized.length < MIN_SNIPPET_LENGTH) return [];
 	const out: string[] = [];
 	const seen = new Set<string>();
 	const push = (s: string) => {
 		const t = normalizeWhitespace(s);
 		if (t.length >= MIN_SNIPPET_LENGTH && !seen.has(t)) {
 			out.push(t);
 			seen.add(t);
 		}
 	};
 	const sentenceMatch = normalized.match(/^[^.!?]+[.!?]/);
 	if (sentenceMatch) {
 		const sentence = sentenceMatch[0];
 		push(sentence.length > FIRST_SENTENCE_MAX ? sentence.slice(0, FIRST_SENTENCE_MAX) : sentence);
 	} else if (normalized.length > FIRST_SENTENCE_MAX) {
 		push(normalized.slice(0, FIRST_SENTENCE_MAX));
 	}
 	const words = normalized.split(" ").filter(Boolean);
 	if (words.length > FIRST_PHRASE_WORDS) {
 		push(words.slice(0, FIRST_PHRASE_WORDS).join(" "));
 	}
 	// Per-line candidates: each chunk line is roughly one block in the
 	// rendered editor. Trying them in order gives us a separate decorate
 	// attempt for each block, which matters when the first line is a
 	// heading containing a link (Plate's `FindReplacePlugin` will skip
 	// any block whose children aren't all text nodes).
 	const rawLines = stripped.split(/\r?\n/);
 	let lineCount = 0;
 	for (const line of rawLines) {
 		if (lineCount >= MAX_LINE_CANDIDATES) break;
 		const trimmed = normalizeWhitespace(line);
 		if (trimmed.length < MIN_SNIPPET_LENGTH) continue;
 		push(trimmed.length > LINE_CANDIDATE_MAX ? trimmed.slice(0, LINE_CANDIDATE_MAX) : trimmed);
 		lineCount++;
 	}
 	if (normalized.length <= FULL_CHUNK_MAX) {
 		push(normalized);
 	}
 	return out;
 }
--- a/surfsense_web/package.json
+++ b/surfsense_web/package.json
@ -36,6 +36,7 @@
 		"@platejs/code-block": "^52.0.11",
 		"@platejs/combobox": "^52.0.15",
 		"@platejs/dnd": "^52.0.11",
 		"@platejs/find-replace": "^52.3.10",
 		"@platejs/floating": "^52.0.11",
 		"@platejs/indent": "^52.0.11",
 		"@platejs/link": "^52.0.11",
--- a/surfsense_web/pnpm-lock.yaml
+++ b/surfsense_web/pnpm-lock.yaml
@ -53,6 +53,9 @@ importers:
      '@platejs/dnd':
        specifier: ^52.0.11
        version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
      '@platejs/find-replace':
        specifier: ^52.3.10
        version: 52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
      '@platejs/floating':
        specifier: ^52.0.11
        version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
@ -2827,6 +2830,13 @@ packages:
      react-dnd-html5-backend: '>=14.0.0'
      react-dom: '>=18.0.0'
  '@platejs/find-replace@52.3.10':
    resolution: {integrity: sha512-V/MOMMUYxHfEn/skd2+YO213xSATFDVsl8FzVzVRV/XaxwwVefH2EPD1lAVIvmYjennTVTTsHHtEI9K9iOsEaA==}
    peerDependencies:
      platejs: '>=52.0.11'
      react: '>=18.0.0'
      react-dom: '>=18.0.0'
  '@platejs/floating@52.0.11':
    resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==}
    peerDependencies:
@ -11105,6 +11115,13 @@ snapshots:
      react-dnd-html5-backend: 16.0.1
      react-dom: 19.2.4(react@19.2.4)
  '@platejs/find-replace@52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
    dependencies:
      platejs: 52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4))
      react: 19.2.4
      react-compiler-runtime: 1.0.0(react@19.2.4)
      react-dom: 19.2.4(react@19.2.4)
  '@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
    dependencies:
      '@floating-ui/core': 1.7.4