From b9a66cb417d04bd445b6be1a7838a2278ae3cefe Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Tue, 28 Apr 2026 21:30:53 -0700 Subject: [PATCH] feat: various UI fixes, prompt optimizations, and allowing duplicate docs - Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths. - Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`. - Added detailed comments for clarity on the changes and their implications. - Introduced new citation handling in the editor for improved user experience with citation jumps. - Updated package dependencies in the frontend for better functionality. --- .../133_drop_documents_content_hash_unique.py | 107 +++ .../new_chat/middleware/kb_persistence.py | 65 +- .../app/agents/new_chat/prompts/composer.py | 41 +- .../new_chat/prompts/providers/anthropic.md | 21 +- .../new_chat/prompts/providers/deepseek.md | 18 + .../new_chat/prompts/providers/google.md | 20 +- .../agents/new_chat/prompts/providers/grok.md | 17 + .../agents/new_chat/prompts/providers/kimi.md | 21 + .../prompts/providers/openai_classic.md | 22 +- .../prompts/providers/openai_codex.md | 19 + .../prompts/providers/openai_reasoning.md | 22 +- surfsense_backend/app/db.py | 10 +- .../agents/new_chat/prompts/test_composer.py | 74 +- .../test_kb_persistence_filesystem_parity.py | 168 ++++ surfsense_web/app/globals.css | 21 + .../pending-chunk-highlight.atom.ts | 19 + .../assistant-ui/inline-citation.tsx | 228 +++++- .../components/editor-panel/editor-panel.tsx | 530 +++++++++++-- .../components/editor/plate-editor.tsx | 31 + surfsense_web/components/editor/presets.ts | 28 + .../new-chat/source-detail-panel.tsx | 719 ------------------ .../settings/user-settings-dialog.tsx | 3 - .../components/ui/search-highlight-node.tsx | 45 ++ surfsense_web/lib/citation-search.ts | 125 +++ surfsense_web/package.json | 1 + surfsense_web/pnpm-lock.yaml | 17 + 26 files changed, 1540 insertions(+), 852 deletions(-) create mode 100644 surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py create mode 100644 surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md create mode 100644 surfsense_backend/app/agents/new_chat/prompts/providers/grok.md create mode 100644 surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md create mode 100644 surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md create mode 100644 surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py create mode 100644 surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts delete mode 100644 surfsense_web/components/new-chat/source-detail-panel.tsx create mode 100644 surfsense_web/components/ui/search-highlight-node.tsx create mode 100644 surfsense_web/lib/citation-search.ts diff --git a/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py b/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py new file mode 100644 index 000000000..88c3e203f --- /dev/null +++ b/surfsense_backend/alembic/versions/133_drop_documents_content_hash_unique.py @@ -0,0 +1,107 @@ +"""133_drop_documents_content_hash_unique + +Revision ID: 133 +Revises: 132 +Create Date: 2026-04-29 + +Drop the global UNIQUE constraint on ``documents.content_hash`` so the +new-chat agent's ``write_file`` flow can persist legitimate file copies +(two paths, identical content) without hitting a constraint that mirrors +no real filesystem semantic. + +Path uniqueness still lives on ``documents.unique_identifier_hash`` (per +search space), which is the right invariant — exactly like an inode at a +given path on a POSIX filesystem. + +The non-unique INDEX on ``content_hash`` is preserved so connector +indexers' "have we seen this content before?" lookup +(:func:`app.tasks.document_processors.base.check_duplicate_document`, +which already uses ``.scalars().first()`` and is therefore tolerant of +duplicates) stays cheap. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from sqlalchemy import inspect + +from alembic import op + +revision: str = "133" +down_revision: str | None = "132" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def _existing_constraint_names(bind, table: str) -> set[str]: + inspector = inspect(bind) + return {c["name"] for c in inspector.get_unique_constraints(table)} + + +def _existing_index_names(bind, table: str) -> set[str]: + inspector = inspect(bind) + return {i["name"] for i in inspector.get_indexes(table)} + + +def upgrade() -> None: + bind = op.get_bind() + + # Both the named UniqueConstraint (added in revision 8) and the + # implicit-unique-index variant SQLAlchemy may emit need draining. + constraints = _existing_constraint_names(bind, "documents") + if "uq_documents_content_hash" in constraints: + op.drop_constraint( + "uq_documents_content_hash", "documents", type_="unique" + ) + + indexes = _existing_index_names(bind, "documents") + # Some Postgres versions surface the unique constraint via a unique + # index of the same name; check for that too. + for idx_name in ("uq_documents_content_hash",): + if idx_name in indexes: + op.drop_index(idx_name, table_name="documents") + + # Ensure the non-unique index is present for fast lookups. + if "ix_documents_content_hash" not in indexes: + op.create_index( + "ix_documents_content_hash", + "documents", + ["content_hash"], + unique=False, + ) + + +def downgrade() -> None: + bind = op.get_bind() + + # Re-applying UNIQUE is destructive: there may now be legitimate + # duplicates (e.g. two NOTE documents that share content because the + # user explicitly copied one to a new path). To avoid the migration + # silently deleting user data, we keep only the lowest-id row per + # content_hash — same strategy revision 8 used when first introducing + # the constraint. + op.execute( + """ + DELETE FROM documents + WHERE id NOT IN ( + SELECT MIN(id) + FROM documents + GROUP BY content_hash + ) + """ + ) + + indexes = _existing_index_names(bind, "documents") + if "ix_documents_content_hash" in indexes: + op.drop_index("ix_documents_content_hash", table_name="documents") + + op.create_index( + "ix_documents_content_hash", + "documents", + ["content_hash"], + unique=False, + ) + op.create_unique_constraint( + "uq_documents_content_hash", "documents", ["content_hash"] + ) diff --git a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py index 5682977d9..378b83950 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py +++ b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py @@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState from langchain_core.callbacks import dispatch_custom_event from langgraph.runtime import Runtime from sqlalchemy import delete, select +from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.filesystem_selection import FilesystemMode @@ -150,10 +151,11 @@ async def _create_document( virtual_path, search_space_id, ) - # Guard against the unique_identifier_hash constraint: another row at the - # same virtual_path (this search space) already owns the hash. Callers are - # expected to upsert via the wrapper, but this defends against bypasses - # and gives a clean ValueError instead of a session-poisoning IntegrityError. + # Filesystem-parity invariant: the only thing that *must* be unique is + # the path. Two notes can legitimately share content (e.g. ``cp a b``). + # Guard against the path-derived ``unique_identifier_hash`` constraint + # so we surface a clean ValueError instead of letting the INSERT poison + # the session with an IntegrityError. path_collision = await session.execute( select(Document.id).where( Document.search_space_id == search_space_id, @@ -165,17 +167,14 @@ async def _create_document( f"a document already exists at path '{virtual_path}' " "(unique_identifier_hash collision)" ) + # ``content_hash`` is intentionally NOT checked for uniqueness here. + # In a real filesystem two files at different paths can hold identical + # bytes, and the agent's ``write_file`` path needs that semantic to + # support copy/duplicate operations. The hash remains useful as a + # change-detection hint for connector indexers, which still consult it + # via :func:`check_duplicate_document` but do so with a non-unique + # lookup (``.first()``). content_hash = generate_content_hash(content, search_space_id) - content_collision = await session.execute( - select(Document.id).where( - Document.search_space_id == search_space_id, - Document.content_hash == content_hash, - ) - ) - if content_collision.scalar_one_or_none() is not None: - raise ValueError( - f"a document with identical content already exists for path '{virtual_path}'" - ) doc = Document( title=title, document_type=DocumentType.NOTE, @@ -493,19 +492,43 @@ async def commit_staged_filesystem_state( } ) else: + # Wrap each create in a SAVEPOINT so a residual + # ``IntegrityError`` (e.g. a deployment that hasn't run + # migration 133 yet, where ``documents.content_hash`` + # still carries its legacy global UNIQUE constraint) + # rolls back only this one create instead of poisoning + # the whole turn's transaction. try: - new_doc = await _create_document( - session, - virtual_path=path, - content=content, - search_space_id=search_space_id, - created_by_id=created_by_id, - ) + async with session.begin_nested(): + new_doc = await _create_document( + session, + virtual_path=path, + content=content, + search_space_id=search_space_id, + created_by_id=created_by_id, + ) except ValueError as exc: logger.warning( "kb_persistence: skipping %s create: %s", path, exc ) continue + except IntegrityError as exc: + # The path-uniqueness check above already protected + # against ``unique_identifier_hash`` collisions, so + # the most likely culprit is the legacy + # ``ix_documents_content_hash`` UNIQUE constraint + # that migration 133 drops. Log loudly so operators + # know to run the migration; do NOT silently swallow. + msg = str(exc.orig) if exc.orig is not None else str(exc) + logger.error( + "kb_persistence: IntegrityError creating %s: %s. " + "If this mentions content_hash, run alembic " + "upgrade to apply migration 133 which drops the " + "global UNIQUE constraint on documents.content_hash.", + path, + msg, + ) + continue doc_id_by_path[path] = new_doc.id committed_creates.append( { diff --git a/surfsense_backend/app/agents/new_chat/prompts/composer.py b/surfsense_backend/app/agents/new_chat/prompts/composer.py index 44060f75f..bad033490 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/composer.py +++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py @@ -38,12 +38,38 @@ from app.db import ChatVisibility # Provider variant detection # ----------------------------------------------------------------------------- -ProviderVariant = str # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default" +# String literal alias for the supported provider-specific prompt variants. +# When adding a new variant, also drop a matching ``providers/.md`` +# file in this package and (if appropriate) extend the regex matchers below. +# +# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted +# to SurfSense's "supplemental hints" architecture (each fragment is a +# focused style nudge, NOT a full system prompt — the main prompt is +# already assembled from base/ + tools/ + routing/). +ProviderVariant = str +# Known values: +# "anthropic" — Claude family (XML-friendly, narrative todos) +# "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic) +# "openai_classic" — GPT-4 family (autonomous persistence) +# "openai_codex" — gpt-*-codex (code-purist, terse, file:line refs) +# "google" — Gemini (formal, <3-line, numbered workflow) +# "kimi" — Moonshot Kimi-K* (action-bias, parallel tools) +# "grok" — xAI Grok (extreme-terse, one-word ok) +# "deepseek" — DeepSeek V3 / R1 (terse, R1-aware reasoning) +# "default" — fallback, no provider-specific block emitted +# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`. +# More specific patterns must come first (e.g. ``codex`` before +# ``openai_reasoning`` because codex model ids contain ``gpt``). + +_OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE) _OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE) _OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE) _ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE) _GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE) +_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE) +_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE) +_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE) def detect_provider_variant(model_name: str | None) -> ProviderVariant: @@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant: Heuristic match on the model id; returns ``"default"`` when nothing matches so the composer can fall back to the empty placeholder file. + + Order is significant: more-specific patterns are tried first so + ``gpt-5-codex`` routes to ``"openai_codex"`` rather than + ``"openai_reasoning"`` (mirrors OpenCode's + ``packages/opencode/src/session/system.ts`` dispatch). """ if not model_name: return "default" name = model_name.strip() + if _OPENAI_CODEX_RE.search(name): + return "openai_codex" if _OPENAI_REASONING_RE.search(name): return "openai_reasoning" if _OPENAI_CLASSIC_RE.search(name): @@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant: return "anthropic" if _GOOGLE_RE.search(name): return "google" + if _KIMI_RE.search(name): + return "kimi" + if _GROK_RE.search(name): + return "grok" + if _DEEPSEEK_RE.search(name): + return "deepseek" return "default" diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md b/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md index 6e22ef265..f574da541 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md @@ -1,5 +1,20 @@ -You are running on an Anthropic Claude model. Use XML tags liberally to structure -intermediate reasoning when the task is complex. Prefer step-by-step plans inside -`` blocks before producing the final answer. +You are running on an Anthropic Claude model. + +Structured reasoning: +- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `...` blocks are encouraged before tool calls or before producing a complex final answer. +- For multi-step requests, briefly outline a plan inside a `` block before issuing the first tool call. + +Professional objectivity: +- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation. +- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption. +- Disagree with the user when the evidence warrants it; respectful correction beats false agreement. + +Task management: +- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions. +- Narrate progress through the todo list itself, not through chatty status lines. + +Tool calls: +- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output. +- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead. diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md b/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md new file mode 100644 index 000000000..8acf008ca --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md @@ -0,0 +1,18 @@ + +You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning). + +Reasoning hygiene (R1-aware): +- If the model surfaces explicit `` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis. +- Never paste the contents of `` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale. +- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural. + +Output style: +- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail. +- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action. +- For factual answers, cite once with `[citation:chunk_id]` and stop. + +Tool calls: +- Issue independent tool calls in parallel within a single turn. +- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data. +- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user. + diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/google.md b/surfsense_backend/app/agents/new_chat/prompts/providers/google.md index 4b31a8388..cac3b328b 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/providers/google.md +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/google.md @@ -1,4 +1,20 @@ -You are running on a Google Gemini model. Prefer concise, structured responses. -When using tools, follow the function-calling protocol and avoid verbose preludes. +You are running on a Google Gemini model. + +Output style: +- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows. +- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer. +- Format with GitHub-flavoured Markdown; assume monospace rendering. +- For one-line factual answers, just answer. No headers, no bullets. + +Workflow for non-trivial tasks (Understand → Plan → Act → Verify): +1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything. +2. **Plan:** when the task touches multiple steps, share an extremely concise plan first. +3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent. +4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer. + +Discipline: +- Do not take significant actions beyond the clear scope of the user's request without confirming first. +- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it. +- Path arguments must be the exact strings returned by tools; do not synthesise file paths. diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md b/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md new file mode 100644 index 000000000..95b8fcc14 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md @@ -0,0 +1,17 @@ + +You are running on an xAI Grok model. + +Maximum terseness: +- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice. +- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer. +- Avoid restating the user's question. +- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop. + +Tool discipline: +- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act. +- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition. + +Style: +- No emojis unless the user asked. No nested bullets, no headers for short answers. +- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…". + diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md b/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md new file mode 100644 index 000000000..c3c11ad5e --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md @@ -0,0 +1,21 @@ + +You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+). + +Action bias: +- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool. +- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line. +- Be thorough in actions (test what you build, verify what you change). Be brief in explanations. + +Tool calls: +- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model. +- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours). +- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory. + +Language: +- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise. + +Discipline: +- Stay on track. Never give the user more than what they asked for. +- Fact-check before stating anything as factual; don't fabricate citations. +- Keep it stupidly simple. Don't overcomplicate. + diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md index 7ea4366c4..9128609e0 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md @@ -1,5 +1,21 @@ -You are running on a classic OpenAI chat model (GPT-4 family). Use direct -function-calling for tools. When editing files, use the standard `edit_file` -or `write_file` tools rather than diff-based patches. +You are running on a classic OpenAI chat model (GPT-4 family). + +Persistence: +- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X. +- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn. +- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop. + +Planning: +- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go. +- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec."). + +Output style: +- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code. +- Don't dump tool output verbatim — summarise the relevant lines. +- Don't add a closing recap unless the user asked for one. After completing the work, just stop. + +Tool calls: +- Issue independent tool calls in parallel within one response. +- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback). diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md new file mode 100644 index 000000000..6167d4b06 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md @@ -0,0 +1,19 @@ + +You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex). + +Output style: +- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead. +- Reference sources as `path:line` (or `chunk:`) so they're clickable. Stand-alone paths per reference, even when repeated. +- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number. +- Skip headers and heavy formatting for simple confirmations. +- No emojis, no em-dashes, no nested bullets. Single-level lists only. + +Code & structured-output tasks: +- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in. +- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move. +- For multi-line snippets use fenced code blocks with a language tag. + +Tool calls: +- Run independent tool calls in parallel; chain only when later calls need earlier results. +- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did. + diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md index 935d3f207..dd7a61536 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md +++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md @@ -1,5 +1,21 @@ -You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and -direct in your responses. When editing files, prefer the `apply_patch` tool format -where available. Avoid restating the user request before answering. +You are running on an OpenAI reasoning model (GPT-5+ / o-series). + +Output style: +- Be terse and direct. Don't restate the user's request before answering. +- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action. +- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps. +- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`. +- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets. + +Channels (for clients that support them): +- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps. +- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references. + +Tool calls: +- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output. +- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did. + +Autonomy: +- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied. diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index fcd342d29..75342a8e1 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin): document_metadata = Column(JSON, nullable=True) content = Column(Text, nullable=False) - content_hash = Column(String, nullable=False, index=True, unique=True) + # ``content_hash`` is intentionally NOT globally unique. In a real + # filesystem two files at different paths can hold identical bytes, + # and the agent's ``write_file`` flow needs that semantic to support + # copy / duplicate operations. Path uniqueness lives on + # ``unique_identifier_hash`` (per search space). The hash remains + # indexed because connector indexers consult it as a change-detection + # / cross-source dedup hint via :func:`check_duplicate_document`. + # See migration 133. + content_hash = Column(String, nullable=False, index=True) unique_identifier_hash = Column(String, nullable=True, index=True, unique=True) embedding = Column(Vector(config.embedding_model_instance.dimension)) diff --git a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py index d35b7aa8b..d08bbc8cf 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py +++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py @@ -25,17 +25,33 @@ class TestProviderVariantDetection: @pytest.mark.parametrize( "model_name,expected", [ + # GPT-4 family routes to "classic" (autonomous-persistence style) ("openai:gpt-4o-mini", "openai_classic"), ("openai:gpt-4-turbo", "openai_classic"), + # GPT-5 / o-series route to "reasoning" (channel-aware pragmatic) ("openai:gpt-5", "openai_reasoning"), - ("openai:gpt-5-codex", "openai_reasoning"), ("openai:o1-preview", "openai_reasoning"), ("openai:o3-mini", "openai_reasoning"), + # Codex family beats reasoning (more specific). Mirrors OpenCode + # ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt. + ("openai:gpt-5-codex", "openai_codex"), + ("openai:gpt-codex", "openai_codex"), + ("openai:codex-mini", "openai_codex"), + # Anthropic + Google ("anthropic:claude-3-5-sonnet", "anthropic"), ("anthropic/claude-opus-4", "anthropic"), ("google:gemini-2.0-flash", "google"), ("vertex:gemini-1.5-pro", "google"), + # Newly-covered families + ("moonshot:kimi-k2", "kimi"), + ("openrouter:moonshot/kimi-k2.5", "kimi"), + ("xai:grok-2", "grok"), + ("openrouter:x-ai/grok-3", "grok"), + ("openai:deepseek-v3", "deepseek"), + ("deepseek:deepseek-r1", "deepseek"), + # Unknown families fall back to default (no provider block emitted) ("groq:mixtral-8x7b", "default"), + ("together:llama-3.1-70b", "default"), (None, "default"), ("", "default"), ], @@ -43,6 +59,16 @@ class TestProviderVariantDetection: def test_detection(self, model_name: str | None, expected: str) -> None: assert detect_provider_variant(model_name) == expected + def test_codex_takes_precedence_over_reasoning(self) -> None: + """Regression guard: ``gpt-5-codex`` must NOT match the generic + ``gpt-5`` reasoning regex first. Codex is the more specialised + prompt and mirrors OpenCode's dispatch order. + """ + from app.agents.new_chat.prompts.composer import detect_provider_variant + + assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex" + assert detect_provider_variant("openai:gpt-5") == "openai_reasoning" + class TestCompose: def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None: @@ -149,6 +175,52 @@ class TestCompose: prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo") assert "" not in prompt + @pytest.mark.parametrize( + "model_name,expected_marker", + [ + # Each marker is a unique-ish phrase from the corresponding fragment. + # If a fragment is renamed/rewritten such that the marker is gone, + # update both the fragment and this test deliberately. + ("openai:gpt-5-codex", "Codex-class"), + ("openai:gpt-5", "OpenAI reasoning model"), + ("openai:gpt-4o", "classic OpenAI chat model"), + ("anthropic:claude-3-5-sonnet", "Anthropic Claude"), + ("google:gemini-2.0-flash", "Google Gemini"), + ("moonshot:kimi-k2", "Moonshot Kimi"), + ("xai:grok-2", "xAI Grok"), + ("deepseek:deepseek-r1", "DeepSeek"), + ], + ) + def test_each_known_variant_renders_with_its_marker( + self, + fixed_today: datetime, + model_name: str, + expected_marker: str, + ) -> None: + """Every supported variant must produce a ```` block + containing its identifying marker. This pins the dispatch + the + on-disk fragments together so a missing/renamed file is caught + immediately. + """ + prompt = compose_system_prompt(today=fixed_today, model_name=model_name) + assert "" in prompt, ( + f"variant for {model_name!r} did not emit a provider_hints block; " + "the corresponding providers/.md may be missing" + ) + assert expected_marker in prompt, ( + f"variant for {model_name!r} emitted hints but lacked the " + f"expected marker {expected_marker!r} — the fragment may have " + "drifted from the dispatch table" + ) + + def test_provider_blocks_are_byte_stable_across_calls( + self, fixed_today: datetime + ) -> None: + """Cache-stability guard: same model id → byte-identical prompt.""" + a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2") + b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2") + assert a == b + def test_custom_system_instructions_override_default( self, fixed_today: datetime ) -> None: diff --git a/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py b/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py new file mode 100644 index 000000000..8b464d48d --- /dev/null +++ b/surfsense_backend/tests/unit/middleware/test_kb_persistence_filesystem_parity.py @@ -0,0 +1,168 @@ +"""Unit tests for kb_persistence filesystem-parity invariants. + +Specifically, these tests pin down that the agent-driven write_file flow +treats path uniqueness — not content uniqueness — as the only hard +invariant. This mirrors a real filesystem: ``cp a b`` produces two files +with identical bytes living at different paths, and that should round-trip +through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import numpy as np +import pytest + +from app.agents.new_chat.middleware import kb_persistence +from app.db import Document + + +class _FakeResult: + """Minimal stand-in for ``sqlalchemy.engine.Result``.""" + + def __init__(self, value: Any = None) -> None: + self._value = value + + def scalar_one_or_none(self) -> Any: + return self._value + + def scalar(self) -> Any: + return self._value + + +class _FakeSession: + """Minimal AsyncSession stand-in scoped to ``_create_document`` needs. + + Records every ``add`` so we can assert against the resulting Documents + and Chunks. ``execute`` always returns "no row" by default — i.e. no + folder hierarchy preexists and no path collision exists. Tests that + want a path collision can override that on a per-call basis. + """ + + def __init__(self) -> None: + self.added: list[Any] = [] + self.execute = AsyncMock(return_value=_FakeResult(None)) + self.flush = AsyncMock() + + # Simulate ``await session.flush()`` assigning an id to the doc; + # we increment a counter so each Document gets a unique id. + self._next_id = 1 + + async def _flush_assigning_ids() -> None: + for obj in self.added: + if getattr(obj, "id", None) is None: + obj.id = self._next_id + self._next_id += 1 + + self.flush.side_effect = _flush_assigning_ids + + def add(self, obj: Any) -> None: + self.added.append(obj) + + def add_all(self, objs: list[Any]) -> None: + self.added.extend(objs) + + +@pytest.fixture(autouse=True) +def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None: + """Avoid loading the embedding model in unit tests.""" + monkeypatch.setattr( + kb_persistence, + "embed_texts", + lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts], + ) + monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content]) + + +@pytest.mark.asyncio +async def test_create_document_allows_identical_content_at_different_paths() -> None: + """The core regression: ``cp /a/notes.md /b/notes-copy.md``. + + Both create calls must succeed even though the bytes are byte-for-byte + identical, because path is the only filesystem-style unique key. + """ + session = _FakeSession() + content = "# Same body\n\nIdentical content used by two different paths.\n" + + first = await kb_persistence._create_document( + session, # type: ignore[arg-type] + virtual_path="/documents/a/notes.md", + content=content, + search_space_id=42, + created_by_id="user-1", + ) + assert isinstance(first, Document) + assert first.title == "notes.md" + + # Second create with byte-identical content at a different path should + # not raise — that's the whole point of the filesystem-parity fix. + second = await kb_persistence._create_document( + session, # type: ignore[arg-type] + virtual_path="/documents/b/notes-copy.md", + content=content, + search_space_id=42, + created_by_id="user-1", + ) + assert isinstance(second, Document) + assert second.title == "notes-copy.md" + + # Both rows share the same content_hash but live at distinct paths + # (distinct ``unique_identifier_hash``). That's the desired contract. + assert first.content_hash == second.content_hash + assert first.unique_identifier_hash != second.unique_identifier_hash + + +@pytest.mark.asyncio +async def test_create_document_still_rejects_path_collision() -> None: + """Path uniqueness remains the hard invariant. + + If ``unique_identifier_hash`` already points at an existing row in + the same search space, the create call must raise ``ValueError`` + with a clear message — matching the behavior the commit loop relies + on to upsert via the existing-row code path. + """ + session = _FakeSession() + + # Path with no folder parts so ``_ensure_folder_hierarchy`` is a + # no-op and the only SELECT executed is the path-collision check. + # That SELECT returns an existing doc id, triggering the guard. + session.execute = AsyncMock(return_value=_FakeResult(value=99)) + + with pytest.raises(ValueError, match="already exists at path"): + await kb_persistence._create_document( + session, # type: ignore[arg-type] + virtual_path="/documents/notes.md", + content="anything", + search_space_id=42, + created_by_id="user-1", + ) + + +@pytest.mark.asyncio +async def test_create_document_does_not_query_for_content_hash_collision( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Regression guard: the legacy second SELECT (content_hash collision + pre-check) must be gone. Counting ``execute`` calls is a brittle but + effective way to lock that in. + + The current flow runs exactly one ``execute`` for the path-collision + SELECT (no folder parts in this path → ``_ensure_folder_hierarchy`` + short-circuits). If a future refactor reintroduces a content-hash + SELECT, this test will fail loud. + """ + session = _FakeSession() + await kb_persistence._create_document( + session, # type: ignore[arg-type] + virtual_path="/documents/notes.md", + content="hello", + search_space_id=42, + created_by_id="user-1", + ) + # Path-collision SELECT only. No content_hash SELECT. + assert session.execute.await_count == 1, ( + f"Unexpected execute count {session.execute.await_count}; " + "did the legacy content_hash collision pre-check get re-added?" + ) diff --git a/surfsense_web/app/globals.css b/surfsense_web/app/globals.css index a37ddb8f3..f54bc2197 100644 --- a/surfsense_web/app/globals.css +++ b/surfsense_web/app/globals.css @@ -210,6 +210,27 @@ button { } } +/* Citation-jump highlight — entrance pulse only. The `SearchHighlightLeaf` + (see components/ui/search-highlight-node.tsx) is otherwise statically + tinted; this animation runs once on mount to draw the eye to the cited + text after `scrollIntoView` lands. The highlight itself is permanent + until the user clicks inside the editor (or another dismissal trigger + fires in `EditorPanelContent`). */ +@keyframes citation-flash-in { + 0% { + background-color: transparent; + box-shadow: 0 0 0 0 transparent; + } + 40% { + background-color: color-mix(in oklab, var(--primary) 30%, transparent); + box-shadow: 0 0 0 3px color-mix(in oklab, var(--primary) 25%, transparent); + } + 100% { + background-color: color-mix(in oklab, var(--primary) 15%, transparent); + box-shadow: 0 0 0 1px color-mix(in oklab, var(--primary) 40%, transparent); + } +} + /* Human-in-the-loop approval card animations */ @keyframes pulse-subtle { 0%, diff --git a/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts b/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts new file mode 100644 index 000000000..a3f8357e8 --- /dev/null +++ b/surfsense_web/atoms/document-viewer/pending-chunk-highlight.atom.ts @@ -0,0 +1,19 @@ +import { atom } from "jotai"; + +/** + * Cross-component handoff for citation jumps. Set by `InlineCitation` when a + * numeric chunk badge is clicked (after the document has been resolved); read + * by `DocumentTabContent` once the matching document tab mounts so it can + * scroll to and softly highlight the cited chunk inside the rendered markdown. + * + * Cleared by `DocumentTabContent` only after a terminal state — exact / + * approximate / miss — has been reached, so that an escalation refetch (2MB + * preview → 16MB) keeps the pending intent alive across the re-render. + */ +export interface PendingChunkHighlight { + documentId: number; + chunkId: number; + chunkText: string; +} + +export const pendingChunkHighlightAtom = atom(null); diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index eb4bd9af8..ae8d434a8 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -1,26 +1,45 @@ "use client"; -import { FileText } from "lucide-react"; +import { useQuery, useQueryClient } from "@tanstack/react-query"; +import { useSetAtom } from "jotai"; +import { ExternalLink, FileText } from "lucide-react"; import type { FC } from "react"; -import { useState } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { toast } from "sonner"; +import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom"; +import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom"; import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context"; -import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel"; +import { MarkdownViewer } from "@/components/markdown-viewer"; import { Citation } from "@/components/tool-ui/citation"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { Spinner } from "@/components/ui/spinner"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; +import { documentsApiService } from "@/lib/apis/documents-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; interface InlineCitationProps { chunkId: number; isDocsChunk?: boolean; } +const POPOVER_HOVER_CLOSE_DELAY_MS = 150; + /** - * Inline citation for knowledge-base chunks (numeric chunk IDs). - * Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel. - * Negative chunk IDs indicate anonymous/synthetic uploads and render as a static badge. + * Inline citation badge for knowledge-base chunks (numeric chunk IDs) and + * Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as + * a static "doc" pill (anonymous/synthetic uploads). + * + * Numeric KB chunks: clicking resolves the parent document via + * `getDocumentByChunk`, opens the document in the right side panel (alongside + * the chat — does not replace it), and stages the cited chunk text in + * `pendingChunkHighlightAtom` so `EditorPanelContent` can scroll to and softly + * highlight it inside the rendered markdown. + * + * Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that + * lazily fetches and previews the cited chunk inline, since those docs aren't + * indexed into the user's search space and have no tab to open. */ export const InlineCitation: FC = ({ chunkId, isDocsChunk = false }) => { - const [isOpen, setIsOpen] = useState(false); - if (chunkId < 0) { return ( @@ -38,26 +57,185 @@ export const InlineCitation: FC = ({ chunkId, isDocsChunk = ); } + if (isDocsChunk) { + return ; + } + + return ; +}; + +const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => { + const queryClient = useQueryClient(); + const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom); + const openEditorPanel = useSetAtom(openEditorPanelAtom); + const [resolving, setResolving] = useState(false); + + const handleClick = useCallback(async () => { + if (resolving) return; + setResolving(true); + console.log("[citation:click] start", { chunkId }); + try { + const data = await queryClient.fetchQuery({ + // Local key with explicit window. The shared `cacheKeys.documents.byChunk` + // is window-agnostic (latent footgun); namespace the call to avoid + // reusing a different-window cached result. + queryKey: ["documents", "by-chunk", chunkId, "w0"] as const, + queryFn: () => + documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 0 }), + staleTime: 5 * 60 * 1000, + }); + const cited = data.chunks.find((c) => c.id === chunkId) ?? data.chunks[0]; + console.log("[citation:click] fetched doc-by-chunk", { + docId: data.id, + docTitle: data.title, + chunksReturned: data.chunks.length, + citedChunkId: cited?.id, + citedChunkContentLen: cited?.content?.length ?? 0, + citedChunkPreview: + cited?.content && cited.content.length > 120 + ? `${cited.content.slice(0, 120)}…(+${cited.content.length - 120})` + : (cited?.content ?? ""), + }); + // Stage the highlight BEFORE opening the panel so `EditorPanelContent` + // already sees the pending intent on its very first render — avoids a + // "fetch → render → no-pending → next-tick render with pending" race. + setPendingHighlight({ + documentId: data.id, + chunkId, + chunkText: cited?.content ?? "", + }); + openEditorPanel({ + documentId: data.id, + searchSpaceId: data.search_space_id, + title: data.title, + }); + console.log("[citation:click] staged highlight + opened editor panel", { + documentId: data.id, + }); + } catch (err) { + console.warn("[citation:click] failed", err); + toast.error(err instanceof Error ? err.message : "Couldn't open cited document"); + } finally { + setResolving(false); + } + }, [chunkId, openEditorPanel, queryClient, resolving, setPendingHighlight]); + return ( - - + ); +}; + +const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => { + const [open, setOpen] = useState(false); + const closeTimerRef = useRef | null>(null); + + const cancelClose = useCallback(() => { + if (closeTimerRef.current) { + clearTimeout(closeTimerRef.current); + closeTimerRef.current = null; + } + }, []); + + const scheduleClose = useCallback(() => { + cancelClose(); + closeTimerRef.current = setTimeout(() => { + setOpen(false); + closeTimerRef.current = null; + }, POPOVER_HOVER_CLOSE_DELAY_MS); + }, [cancelClose]); + + useEffect(() => () => cancelClose(), [cancelClose]); + + const { data, isLoading, error } = useQuery({ + queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`), + queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId), + enabled: open, + staleTime: 5 * 60 * 1000, + }); + + const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0]; + + return ( + + + + + e.preventDefault()} > - {chunkId} - - +
+
+

+ {data?.title ?? "Surfsense documentation"} +

+

Chunk #{chunkId}

+
+ {data?.source && ( + + + Open + + )} +
+
+ {isLoading && ( +
+ + Loading… +
+ )} + {error && ( +

+ {error instanceof Error ? error.message : "Failed to load chunk"} +

+ )} + {!isLoading && !error && citedChunk?.content && ( + + )} + {!isLoading && !error && !citedChunk?.content && ( +

No content available.

+ )} +
+ + ); }; diff --git a/surfsense_web/components/editor-panel/editor-panel.tsx b/surfsense_web/components/editor-panel/editor-panel.tsx index 3b69ae6e0..0c4e9485b 100644 --- a/surfsense_web/components/editor-panel/editor-panel.tsx +++ b/surfsense_web/components/editor-panel/editor-panel.tsx @@ -1,5 +1,6 @@ "use client"; +import { FindReplacePlugin } from "@platejs/find-replace"; import { useAtomValue, useSetAtom } from "jotai"; import { Check, @@ -14,17 +15,21 @@ import { import dynamic from "next/dynamic"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; +import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom"; import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom"; import { VersionHistoryButton } from "@/components/documents/version-history"; +import type { PlateEditorInstance } from "@/components/editor/plate-editor"; import { SourceCodeEditor } from "@/components/editor/source-code-editor"; import { MarkdownViewer } from "@/components/markdown-viewer"; import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer"; +import { CITATION_HIGHLIGHT_CLASS } from "@/components/ui/search-highlight-node"; import { Spinner } from "@/components/ui/spinner"; import { useMediaQuery } from "@/hooks/use-media-query"; import { useElectronAPI } from "@/hooks/use-platform"; import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils"; +import { buildCitationSearchCandidates } from "@/lib/citation-search"; import { inferMonacoLanguageFromPath } from "@/lib/editor-language"; const PlateEditor = dynamic( @@ -32,7 +37,10 @@ const PlateEditor = dynamic( { ssr: false, loading: () => } ); +type CitationHighlightStatus = "exact" | "miss"; + const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB +const CITATION_MAX_LENGTH = 16 * 1024 * 1024; // 16MB on-demand cap for citation jumps interface EditorContent { document_id: number; @@ -136,6 +144,61 @@ export function EditorPanelContent({ const [displayTitle, setDisplayTitle] = useState(title || "Untitled"); const isLocalFileMode = kind === "local_file"; const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown"; + + // --- Citation-jump highlight wiring ---------------------------------- + // `EditorPanelContent` is the consumer of `pendingChunkHighlightAtom`: when + // a citation badge is clicked, the badge stages `{documentId, chunkId, + // chunkText}` and opens this panel. We drive Plate's `FindReplacePlugin` + // (registered in every preset) to highlight the cited text natively via + // Slate decorations — no DOM walking, no Range gymnastics. The state + // machine below escalates the document fetch from 2MB → 16MB once if no + // candidate snippet matched in the preview, and surfaces miss outcomes + // via an inline alert. + const pending = useAtomValue(pendingChunkHighlightAtom); + const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom); + const [fetchKey, setFetchKey] = useState(0); + const [maxLengthOverride, setMaxLengthOverride] = useState(null); + const [highlightResult, setHighlightResult] = useState(null); + const editorRef = useRef(null); + const escalatedForRef = useRef(null); + const lastAppliedChunkIdRef = useRef(null); + // Tracks whether a citation highlight is currently decorated in the + // editor. We use a ref (not state) because the click-to-dismiss handler + // runs in a stable callback that would otherwise close over stale state. + const isHighlightActiveRef = useRef(false); + // Once a citation jump targets this doc we have to keep `PlateEditor` + // mounted for the *rest of the doc session* — even after the highlight + // effect clears `pendingChunkHighlightAtom` (which it does as soon as + // the decoration is applied, so a follow-up citation on the same chunk + // can re-trigger). Without this latch, non-editable docs would re-render + // back into `MarkdownViewer` the instant `pending` is released, tearing + // down the Plate decorations and dropping the highlight after a frame. + const [stickyPlateMode, setStickyPlateMode] = useState(false); + + const clearCitationSearch = useCallback(() => { + isHighlightActiveRef.current = false; + const editor = editorRef.current; + if (!editor) return; + try { + editor.setOption(FindReplacePlugin, "search", ""); + editor.api.redecorate(); + } catch (err) { + console.warn("[EditorPanelContent] clearCitationSearch failed:", err); + } + }, []); + + // Dismiss the highlight when the user interacts with the editor surface. + // `onPointerDown` fires before focus / selection changes so the click + // itself feels responsive — the highlight clears in the same event tick + // that places the cursor. No-op when nothing is highlighted, so we don't + // thrash `redecorate` on every click in normal editing. + const handleEditorPointerDown = useCallback(() => { + if (!isHighlightActiveRef.current) return; + clearCitationSearch(); + setHighlightResult(null); + }, [clearCitationSearch]); + + const isCitationTarget = !!pending && !isLocalFileMode && pending.documentId === documentId; const resolveLocalVirtualPath = useCallback( async (candidatePath: string): Promise => { if (!electronAPI?.getAgentFilesystemMounts) { @@ -155,6 +218,8 @@ export function EditorPanelContent({ const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD; + // `fetchKey` is an explicit re-fetch trigger (escalation bumps it to force + // a new request even when documentId/searchSpaceId haven't changed). useEffect(() => { const controller = new AbortController(); setIsLoading(true); @@ -166,6 +231,12 @@ export function EditorPanelContent({ setIsEditing(false); initialLoadDone.current = false; changeCountRef.current = 0; + // Clear any in-flight FindReplacePlugin search before the editor + // re-mounts on new content (a fresh editor key is generated below + // from documentId + isEditing, so the previous editor + its + // decorations are about to be discarded anyway, but we belt-and- + // brace here for the case where only `fetchKey` changed). + clearCitationSearch(); const doFetch = async () => { try { @@ -210,7 +281,11 @@ export function EditorPanelContent({ const url = new URL( `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content` ); - url.searchParams.set("max_length", String(LARGE_DOCUMENT_THRESHOLD)); + url.searchParams.set("max_length", String(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD)); + // `fetchKey` participates here so biome's noUnusedVariables sees it + // as consumed; bumping it forces a fresh request even when the URL + // is otherwise identical. + if (fetchKey > 0) url.searchParams.set("_n", String(fetchKey)); const response = await authenticatedFetch(url.toString(), { method: "GET" }); @@ -256,8 +331,259 @@ export function EditorPanelContent({ resolveLocalVirtualPath, searchSpaceId, title, + fetchKey, + maxLengthOverride, + clearCitationSearch, ]); + // Reset citation-jump bookkeeping whenever the panel switches to a different + // document (or local file). Body only writes setters — the deps are the + // real triggers we want to react to. + // biome-ignore lint/correctness/useExhaustiveDependencies: documentId/localFilePath are intentional triggers. + useEffect(() => { + clearCitationSearch(); + escalatedForRef.current = null; + lastAppliedChunkIdRef.current = null; + setHighlightResult(null); + setMaxLengthOverride(null); + setFetchKey(0); + // Drop sticky Plate mode when the panel moves to a different doc + // — the next doc starts in its preferred render mode (Plate for + // editable, MarkdownViewer for everything else) until/unless a + // citation jump targets it. + setStickyPlateMode(false); + }, [documentId, localFilePath, clearCitationSearch]); + + // Latch sticky Plate mode the first time a citation jump targets this + // doc. We keep it sticky for the remainder of this doc session so the + // highlight effect's `setPendingHighlight(null)` doesn't unmount the + // editor mid-flight (see comment on `stickyPlateMode` declaration). + useEffect(() => { + if (isCitationTarget) setStickyPlateMode(true); + }, [isCitationTarget]); + + // `isEditorReady` is what `useEffect` actually depends on — `editorRef` + // is a ref so changes don't trigger re-runs. We flip this to `true` once + // `PlateEditor` calls back with its live editor instance (its + // `usePlateEditor` value-init runs synchronously, so by the time this + // flips true the markdown is already deserialized into the Slate tree). + const [isEditorReady, setIsEditorReady] = useState(false); + const handleEditorReady = useCallback((editor: PlateEditorInstance | null) => { + console.log("[citation:editor] handleEditorReady", { ready: !!editor }); + editorRef.current = editor; + setIsEditorReady(!!editor); + }, []); + + // --- Citation jump highlight effect ----------------------------------- + // Drives Plate's FindReplacePlugin to highlight the cited chunk: + // 1. Build candidate snippets from the chunk text (first sentence, + // first 8 words, full chunk if short). Plate's decorate runs per- + // block and won't cross block boundaries, so the shorter + // candidates exist to give us something that fits in one + // paragraph / heading. + // 2. For each candidate: setOption('search', ...) → redecorate → + // wait two animation frames for React to flush → query the editor + // DOM for `.${CITATION_HIGHLIGHT_CLASS}`. First hit wins. + // + // Why a className and not a `data-*` attribute? Plate's + // `PlateLeaf` runs its props through `useNodeAttributes`, which + // only forwards `attributes`, `className`, `ref`, and `style` — + // arbitrary `data-*` attributes are silently dropped. `className` + // is the only escape hatch guaranteed to survive into the DOM. + // 3. On hit: smooth-scroll the first match into view, mark the + // highlight active (so a click inside the editor can dismiss it), + // release the pending atom. + // 4. On terminal miss: if the doc was truncated and we haven't + // escalated yet, bump the fetch's `max_length` to the citation + // cap and re-fetch — the post-refetch render will re-run this + // effect against the larger preview. Otherwise, release the + // atom and show the miss alert. + useEffect(() => { + console.log("[citation:effect] fired", { + isCitationTarget, + pendingDocId: pending?.documentId, + pendingChunkId: pending?.chunkId, + pendingChunkTextLen: pending?.chunkText?.length, + documentId, + isLocalFileMode, + isEditing, + hasMarkdown: !!editorDoc?.source_markdown, + markdownLen: editorDoc?.source_markdown?.length, + truncated: editorDoc?.truncated, + isEditorReady, + editorRefSet: !!editorRef.current, + maxLengthOverride, + }); + if (!isCitationTarget || !pending) { + console.log("[citation:effect] guard ✗ no citation target / no pending"); + return; + } + if (isLocalFileMode || isEditing) { + console.log("[citation:effect] guard ✗ localFileMode/editing"); + return; + } + if (!editorDoc?.source_markdown) { + console.log("[citation:effect] guard ✗ source_markdown not ready"); + return; + } + if (!isEditorReady) { + console.log("[citation:effect] guard ✗ editor not ready yet"); + return; + } + const editor = editorRef.current; + if (!editor) { + console.log("[citation:effect] guard ✗ editorRef.current is null"); + return; + } + + if (lastAppliedChunkIdRef.current !== pending.chunkId) { + lastAppliedChunkIdRef.current = pending.chunkId; + } + + let cancelled = false; + + const finishMiss = () => { + console.log("[citation:effect] terminal miss — no candidate matched"); + try { + editor.setOption(FindReplacePlugin, "search", ""); + editor.api.redecorate(); + } catch (err) { + console.warn("[EditorPanelContent] reset search after miss failed:", err); + } + const canEscalate = + editorDoc.truncated === true && + (maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD) < CITATION_MAX_LENGTH && + escalatedForRef.current !== pending.chunkId; + console.log("[citation:effect] miss decision", { + truncated: editorDoc.truncated, + currentMaxLength: maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD, + canEscalate, + }); + if (canEscalate) { + escalatedForRef.current = pending.chunkId; + setMaxLengthOverride(CITATION_MAX_LENGTH); + setFetchKey((k) => k + 1); + // Keep the atom set so the post-refetch render re-runs. + return; + } + setHighlightResult("miss"); + setPendingHighlight(null); + }; + + const tryCandidates = async () => { + const candidates = buildCitationSearchCandidates(pending.chunkText); + console.log("[citation:effect] candidates built", { + count: candidates.length, + previews: candidates.map((c) => c.slice(0, 60)), + }); + if (candidates.length === 0) { + if (!cancelled) finishMiss(); + return; + } + // Resolve the editor's rendered DOM root via Slate's stable + // `[data-slate-editor="true"]` attribute (set by slate-react's + // ``). Scoping queries to this root prevents + // `` elements rendered elsewhere on the page (e.g. chat + // search-highlight leaves in another mounted PlateEditor) from + // being mistaken for citation hits. + const editorRoot = document.querySelector('[data-slate-editor="true"]'); + console.log("[citation:effect] editor root", { + hasRoot: !!editorRoot, + }); + const root: ParentNode = editorRoot ?? document; + + for (let i = 0; i < candidates.length; i++) { + const candidate = candidates[i]; + if (cancelled) return; + try { + editor.setOption(FindReplacePlugin, "search", candidate); + editor.api.redecorate(); + console.log(`[citation:effect] try #${i} setOption + redecorate`, { + len: candidate.length, + preview: candidate.slice(0, 80), + }); + } catch (err) { + console.warn("[EditorPanelContent] setOption/redecorate failed:", err); + continue; + } + // Two rAFs: first lets Slate flush its onChange, second lets + // React commit the decoration leaves into the DOM. + await new Promise((resolve) => + requestAnimationFrame(() => requestAnimationFrame(() => resolve())) + ); + if (cancelled) return; + // Primary probe: by our stable class on the rendered . + let el = root.querySelector(`.${CITATION_HIGHLIGHT_CLASS}`); + const classMarkCount = root.querySelectorAll(`.${CITATION_HIGHLIGHT_CLASS}`).length; + // Diagnostic fallback: any inside the editor root. + // If we ever see allMarks > 0 but classMarkCount === 0, + // the className was stripped again and we need to revisit + // `useNodeAttributes` filtering. + const allMarkCount = root.querySelectorAll("mark").length; + if (!el && allMarkCount > 0) { + el = root.querySelector("mark"); + } + console.log(`[citation:effect] try #${i} DOM probe`, { + foundEl: !!el, + classMarkCount, + allMarkCount, + usedFallback: !!el && classMarkCount === 0, + }); + if (el) { + try { + el.scrollIntoView({ block: "center", behavior: "smooth" }); + } catch { + el.scrollIntoView(); + } + isHighlightActiveRef.current = true; + setHighlightResult("exact"); + console.log(`[citation:effect] ✓ exact via candidate #${i} — atom released`); + // No auto-clear timer — the highlight is intentionally + // permanent until the user clicks inside the editor (see + // `handleEditorPointerDown`) or another dismissal trigger + // fires (doc switch, edit-mode toggle, panel unmount, + // next citation jump). Sticky Plate mode keeps the + // editor mounted after the atom clears. + setPendingHighlight(null); + return; + } + } + if (!cancelled) finishMiss(); + }; + + void tryCandidates(); + + return () => { + cancelled = true; + }; + }, [ + isCitationTarget, + pending, + documentId, + editorDoc?.source_markdown, + editorDoc?.truncated, + isLocalFileMode, + isEditing, + isEditorReady, + maxLengthOverride, + clearCitationSearch, + setPendingHighlight, + ]); + + // Cleanup any active highlight on unmount. + useEffect(() => { + return () => clearCitationSearch(); + }, [clearCitationSearch]); + + // Toggling into edit mode swaps Plate out of readOnly. Clear the citation + // search so stale leaves don't linger in the editing surface. + useEffect(() => { + if (isEditing) { + clearCitationSearch(); + setHighlightResult(null); + } + }, [isEditing, clearCitationSearch]); + useEffect(() => { return () => { if (copyResetTimeoutRef.current) { @@ -367,6 +693,15 @@ export function EditorPanelContent({ EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) && !isLargeDocument : false; + // Use PlateEditor for any of: + // - Editable doc types (FILE/NOTE) — existing editing UX. + // - Active citation jump in flight (`isCitationTarget`) — covers the + // mount in the very first render where the atom is set but the + // sticky effect hasn't fired yet. + // - Sticky Plate mode latched on a previous citation jump — keeps + // the editor mounted (with its decorations) after the highlight + // effect clears the atom. Resets when the doc changes. + const renderInPlateEditor = isEditableType || isCitationTarget || stickyPlateMode; const hasUnsavedChanges = editedMarkdown !== null; const showDesktopHeader = !!onClose; const showEditingActions = isEditableType && isEditing; @@ -381,6 +716,90 @@ export function EditorPanelContent({ setIsEditing(false); }, [editorDoc?.source_markdown]); + const handleDownloadMarkdown = useCallback(async () => { + if (!searchSpaceId || !documentId) return; + setDownloading(true); + try { + const response = await authenticatedFetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`, + { method: "GET" } + ); + if (!response.ok) throw new Error("Download failed"); + const blob = await response.blob(); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + const disposition = response.headers.get("content-disposition"); + const match = disposition?.match(/filename="(.+)"/); + a.download = match?.[1] ?? `${editorDoc?.title || "document"}.md`; + document.body.appendChild(a); + a.click(); + a.remove(); + URL.revokeObjectURL(url); + toast.success("Download started"); + } catch { + toast.error("Failed to download document"); + } finally { + setDownloading(false); + } + }, [documentId, editorDoc?.title, searchSpaceId]); + + // We no longer surface an "approximate" status — Plate's FindReplacePlugin + // either decorates an exact match or it doesn't, and the candidate snippet + // strategy (first sentence → first 8 words → full chunk) means we either + // land on the citation start or fall through to the miss alert. + const showMissAlert = isCitationTarget && highlightResult === "miss"; + + const citationAlerts = showMissAlert && ( + + + + Cited section couldn't be located in this view. + {editorDoc?.truncated && ( + + )} + + + ); + + const largeDocAlert = isLargeDocument && !isLocalFileMode && editorDoc && ( + + + + + This document is too large for the editor ( + {Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "} + {editorDoc.chunk_count ?? 0} chunks). Showing a preview below. + + + + + ); + return ( <> {showDesktopHeader ? ( @@ -565,61 +984,6 @@ export function EditorPanelContent({

- ) : isLargeDocument && !isLocalFileMode ? ( -
- - - - - This document is too large for the editor ( - {Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "} - {editorDoc.chunk_count ?? 0} chunks). Showing a preview below. - - - - - -
) : editorRenderMode === "source_code" ? (
- ) : isEditableType ? ( - + ) : isLargeDocument && !isLocalFileMode && !isCitationTarget ? ( + // Large doc, no active citation — fast Streamdown preview + // + download CTA. We only fall back to MarkdownViewer here + // because Plate is heavy on multi-MB docs and the user + // isn't waiting on a specific citation to render. +
+ {largeDocAlert} + +
+ ) : renderInPlateEditor ? ( + // Editable doc (FILE/NOTE) OR active citation jump (any + // doc type). The citation path uses Plate's + // FindReplacePlugin for native, decoration-based + // highlighting — see the citation-jump highlight effect + // above for how `editorRef` and `handleEditorReady` are + // wired. +
+ {(citationAlerts || (isLargeDocument && isCitationTarget && !isLocalFileMode)) && ( +
+ {isLargeDocument && isCitationTarget && largeDocAlert} + {citationAlerts} +
+ )} +
+ +
+
) : (
diff --git a/surfsense_web/components/editor/plate-editor.tsx b/surfsense_web/components/editor/plate-editor.tsx index 481a420fb..eef18ef6a 100644 --- a/surfsense_web/components/editor/plate-editor.tsx +++ b/surfsense_web/components/editor/plate-editor.tsx @@ -12,6 +12,12 @@ import { type EditorPreset, presetMap } from "@/components/editor/presets"; import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx"; import { Editor, EditorContainer } from "@/components/ui/editor"; +/** Live editor instance returned by `usePlateEditor`. Exposed via the + * `onEditorReady` prop so callers (e.g. `EditorPanelContent`) can drive + * plugin options imperatively — most notably setting + * `FindReplacePlugin`'s `search` option for citation-jump highlights. */ +export type PlateEditorInstance = ReturnType; + export interface PlateEditorProps { /** Markdown string to load as initial content */ markdown?: string; @@ -62,6 +68,15 @@ export interface PlateEditorProps { * without modifying the core editor component. */ extraPlugins?: AnyPluginConfig[]; + /** + * Called whenever the live editor instance (re)mounts, with `null` on + * unmount. Used by callers that need to drive plugin options imperatively + * — e.g. `EditorPanelContent` setting `FindReplacePlugin`'s `search` + * option for citation-jump highlights. The callback is invoked exactly + * once per editor lifetime (the parent's `key` prop forces a fresh + * editor when needed, e.g. on edit-mode toggle). + */ + onEditorReady?: (editor: PlateEditorInstance | null) => void; } function PlateEditorContent({ @@ -100,6 +115,7 @@ export function PlateEditor({ defaultEditing = false, preset = "full", extraPlugins = [], + onEditorReady, }: PlateEditorProps) { const lastMarkdownRef = useRef(markdown); const lastHtmlRef = useRef(html); @@ -156,6 +172,21 @@ export function PlateEditor({ : undefined, }); + // Expose the live editor instance to imperative callers (e.g. citation + // jump highlights). We deliberately don't depend on `onEditorReady` + // itself in the cleanup closure — callers commonly pass an arrow that + // closes over a stable ref setter, but if they pass a freshly-bound + // callback per render, the `onEditorReady?.(editor)` re-fires which is + // idempotent for ref-style setters. + const onEditorReadyRef = useRef(onEditorReady); + useEffect(() => { + onEditorReadyRef.current = onEditorReady; + }, [onEditorReady]); + useEffect(() => { + onEditorReadyRef.current?.(editor); + return () => onEditorReadyRef.current?.(null); + }, [editor]); + // Update editor content when html prop changes externally useEffect(() => { if (html !== undefined && html !== lastHtmlRef.current) { diff --git a/surfsense_web/components/editor/presets.ts b/surfsense_web/components/editor/presets.ts index c207b5e56..49f53ecf1 100644 --- a/surfsense_web/components/editor/presets.ts +++ b/surfsense_web/components/editor/presets.ts @@ -1,5 +1,6 @@ "use client"; +import { FindReplacePlugin } from "@platejs/find-replace"; import type { AnyPluginConfig } from "platejs"; import { TrailingBlockPlugin } from "platejs"; @@ -17,6 +18,30 @@ import { SelectionKit } from "@/components/editor/plugins/selection-kit"; import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit"; import { TableKit } from "@/components/editor/plugins/table-kit"; import { ToggleKit } from "@/components/editor/plugins/toggle-kit"; +import { SearchHighlightLeaf } from "@/components/ui/search-highlight-node"; + +/** + * Citation-jump highlighter. Re-uses Plate's built-in `FindReplacePlugin` + * (decorate-only, no editing surface) to drive the "scroll-to-cited-text" + * UX in `EditorPanelContent`. We register it in every preset because: + * - Decorate is a no-op when `search` is empty (single getOptions() check + * per block), so cost is effectively zero for non-citation viewers. + * - Keeping it preset-agnostic means citations work whether the doc is + * opened in editable (`full`) or pure-viewer (`readonly`) modes. + * + * The parent component drives `setOption(FindReplacePlugin, 'search', ...)` + * + `editor.api.redecorate()` to trigger highlights, then queries the + * editor DOM for `.citation-highlight-leaf` to scroll the first match + * into view. (We can't use a `data-*` attribute here — Plate's + * `PlateLeaf` runs props through `useNodeAttributes`, which only forwards + * `attributes`, `className`, `ref`, `style`; arbitrary `data-*` props are + * silently dropped.) See `components/ui/search-highlight-node.tsx` for + * the leaf component and `CITATION_HIGHLIGHT_CLASS` constant. + */ +const CitationFindReplacePlugin = FindReplacePlugin.configure({ + options: { search: "" }, + render: { node: SearchHighlightLeaf }, +}); /** * Full preset – every plugin kit enabled. @@ -38,6 +63,7 @@ export const fullPreset: AnyPluginConfig[] = [ ...AutoformatKit, ...DndKit, TrailingBlockPlugin, + CitationFindReplacePlugin, ]; /** @@ -52,6 +78,7 @@ export const minimalPreset: AnyPluginConfig[] = [ ...LinkKit, ...AutoformatKit, TrailingBlockPlugin, + CitationFindReplacePlugin, ]; /** @@ -68,6 +95,7 @@ export const readonlyPreset: AnyPluginConfig[] = [ ...CalloutKit, ...ToggleKit, ...MathKit, + CitationFindReplacePlugin, ]; /** All available preset names */ diff --git a/surfsense_web/components/new-chat/source-detail-panel.tsx b/surfsense_web/components/new-chat/source-detail-panel.tsx deleted file mode 100644 index aded206c7..000000000 --- a/surfsense_web/components/new-chat/source-detail-panel.tsx +++ /dev/null @@ -1,719 +0,0 @@ -"use client"; - -import { useQuery } from "@tanstack/react-query"; -import { - BookOpen, - ChevronDown, - ChevronUp, - ExternalLink, - FileQuestionMark, - FileText, - Hash, - Loader2, - Sparkles, - X, -} from "lucide-react"; -import { AnimatePresence, motion, useReducedMotion } from "motion/react"; -import { useTranslations } from "next-intl"; -import type React from "react"; -import { forwardRef, memo, type ReactNode, useCallback, useEffect, useRef, useState } from "react"; -import { createPortal } from "react-dom"; -import { MarkdownViewer } from "@/components/markdown-viewer"; -import { Badge } from "@/components/ui/badge"; -import { Button } from "@/components/ui/button"; -import { ScrollArea } from "@/components/ui/scroll-area"; -import { Spinner } from "@/components/ui/spinner"; -import type { - GetDocumentByChunkResponse, - GetSurfsenseDocsByChunkResponse, -} from "@/contracts/types/document.types"; -import { documentsApiService } from "@/lib/apis/documents-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; -import { cn } from "@/lib/utils"; - -type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse; - -interface SourceDetailPanelProps { - open: boolean; - onOpenChange: (open: boolean) => void; - chunkId: number; - sourceType: string; - title: string; - description?: string; - url?: string; - children?: ReactNode; - isDocsChunk?: boolean; -} - -const formatDocumentType = (type: string) => { - if (!type) return ""; - return type - .split("_") - .map((word) => word.charAt(0) + word.slice(1).toLowerCase()) - .join(" "); -}; - -// Chunk card component -// For large documents (>30 chunks), we disable animation to prevent layout shifts -// which break auto-scroll functionality -interface ChunkCardProps { - chunk: { id: number; content: string }; - localIndex: number; - chunkNumber: number; - totalChunks: number; - isCited: boolean; - isActive: boolean; - disableLayoutAnimation?: boolean; -} - -const ChunkCard = memo( - forwardRef( - ({ chunk, localIndex, chunkNumber, totalChunks, isCited }, ref) => { - return ( -
- {isCited &&
} - -
-
-
- {chunkNumber} -
- - Chunk {chunkNumber} of {totalChunks} - -
- {isCited && ( - - - Cited Source - - )} -
- -
- -
-
- ); - } - ) -); -ChunkCard.displayName = "ChunkCard"; - -export function SourceDetailPanel({ - open, - onOpenChange, - chunkId, - sourceType, - title, - description, - url, - children, - isDocsChunk = false, -}: SourceDetailPanelProps) { - const t = useTranslations("dashboard"); - const scrollAreaRef = useRef(null); - const hasScrolledRef = useRef(false); // Use ref to avoid stale closures - const scrollTimersRef = useRef[]>([]); - const [activeChunkIndex, setActiveChunkIndex] = useState(null); - const [mounted, setMounted] = useState(false); - const shouldReduceMotion = useReducedMotion(); - - useEffect(() => { - setMounted(true); - }, []); - - const { - data: documentData, - isLoading: isDocumentByChunkFetching, - error: documentByChunkFetchingError, - } = useQuery({ - queryKey: isDocsChunk - ? cacheKeys.documents.byChunk(`doc-${chunkId}`) - : cacheKeys.documents.byChunk(chunkId.toString()), - queryFn: async () => { - if (isDocsChunk) { - return documentsApiService.getSurfsenseDocByChunk(chunkId); - } - return documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 5 }); - }, - enabled: !!chunkId && open, - staleTime: 5 * 60 * 1000, - }); - - const totalChunks = - documentData && "total_chunks" in documentData - ? (documentData.total_chunks ?? documentData.chunks.length) - : (documentData?.chunks?.length ?? 0); - const [beforeChunks, setBeforeChunks] = useState< - Array<{ id: number; content: string; created_at: string }> - >([]); - const [afterChunks, setAfterChunks] = useState< - Array<{ id: number; content: string; created_at: string }> - >([]); - const [loadingBefore, setLoadingBefore] = useState(false); - const [loadingAfter, setLoadingAfter] = useState(false); - - useEffect(() => { - setBeforeChunks([]); - setAfterChunks([]); - }, [chunkId, open]); - - const chunkStartIndex = - documentData && "chunk_start_index" in documentData ? (documentData.chunk_start_index ?? 0) : 0; - const initialChunks = documentData?.chunks ?? []; - const allChunks = [...beforeChunks, ...initialChunks, ...afterChunks]; - const absoluteStart = chunkStartIndex - beforeChunks.length; - const absoluteEnd = chunkStartIndex + initialChunks.length + afterChunks.length; - const canLoadBefore = absoluteStart > 0; - const canLoadAfter = absoluteEnd < totalChunks; - - const EXPAND_SIZE = 10; - - const loadBefore = useCallback(async () => { - if (!documentData || !("search_space_id" in documentData) || !canLoadBefore) return; - setLoadingBefore(true); - try { - const count = Math.min(EXPAND_SIZE, absoluteStart); - const result = await documentsApiService.getDocumentChunks({ - document_id: documentData.id, - page: 0, - page_size: count, - start_offset: absoluteStart - count, - }); - const existingIds = new Set(allChunks.map((c) => c.id)); - const newChunks = result.items - .filter((c) => !existingIds.has(c.id)) - .map((c) => ({ id: c.id, content: c.content, created_at: c.created_at })); - setBeforeChunks((prev) => [...newChunks, ...prev]); - } catch (err) { - console.error("Failed to load earlier chunks:", err); - } finally { - setLoadingBefore(false); - } - }, [documentData, absoluteStart, canLoadBefore, allChunks]); - - const loadAfter = useCallback(async () => { - if (!documentData || !("search_space_id" in documentData) || !canLoadAfter) return; - setLoadingAfter(true); - try { - const result = await documentsApiService.getDocumentChunks({ - document_id: documentData.id, - page: 0, - page_size: EXPAND_SIZE, - start_offset: absoluteEnd, - }); - const existingIds = new Set(allChunks.map((c) => c.id)); - const newChunks = result.items - .filter((c) => !existingIds.has(c.id)) - .map((c) => ({ id: c.id, content: c.content, created_at: c.created_at })); - setAfterChunks((prev) => [...prev, ...newChunks]); - } catch (err) { - console.error("Failed to load later chunks:", err); - } finally { - setLoadingAfter(false); - } - }, [documentData, absoluteEnd, canLoadAfter, allChunks]); - - const isDirectRenderSource = - sourceType === "TAVILY_API" || - sourceType === "LINKUP_API" || - sourceType === "SEARXNG_API" || - sourceType === "BAIDU_SEARCH_API"; - - const citedChunkIndex = allChunks.findIndex((chunk) => chunk.id === chunkId); - - // Simple scroll function that scrolls to a chunk by index - const scrollToChunkByIndex = useCallback( - (chunkIndex: number, smooth = true) => { - const scrollContainer = scrollAreaRef.current; - if (!scrollContainer) return; - - const viewport = scrollContainer.querySelector( - "[data-radix-scroll-area-viewport]" - ) as HTMLElement | null; - if (!viewport) return; - - const chunkElement = scrollContainer.querySelector( - `[data-chunk-index="${chunkIndex}"]` - ) as HTMLElement | null; - if (!chunkElement) return; - - // Get positions using getBoundingClientRect for accuracy - const viewportRect = viewport.getBoundingClientRect(); - const chunkRect = chunkElement.getBoundingClientRect(); - - // Calculate where to scroll to center the chunk - const currentScrollTop = viewport.scrollTop; - const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop; - const scrollTarget = - chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2; - - viewport.scrollTo({ - top: Math.max(0, scrollTarget), - behavior: smooth && !shouldReduceMotion ? "smooth" : "auto", - }); - - setActiveChunkIndex(chunkIndex); - }, - [shouldReduceMotion] - ); - - // Callback ref for the cited chunk - scrolls when the element mounts - const citedChunkRefCallback = useCallback( - (node: HTMLDivElement | null) => { - if (node && !hasScrolledRef.current && open) { - hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls - - // Store the node reference for the delayed scroll - const scrollToCitedChunk = () => { - const scrollContainer = scrollAreaRef.current; - if (!scrollContainer || !node.isConnected) return false; - - const viewport = scrollContainer.querySelector( - "[data-radix-scroll-area-viewport]" - ) as HTMLElement | null; - if (!viewport) return false; - - // Get positions - const viewportRect = viewport.getBoundingClientRect(); - const chunkRect = node.getBoundingClientRect(); - - // Calculate scroll position to center the chunk - const currentScrollTop = viewport.scrollTop; - const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop; - const scrollTarget = - chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2; - - viewport.scrollTo({ - top: Math.max(0, scrollTarget), - behavior: "auto", // Instant scroll for initial positioning - }); - - return true; - }; - - // Scroll multiple times with delays to handle progressive content rendering - // Each subsequent scroll will correct for any layout shifts - const scrollAttempts = [50, 150, 300, 600, 1000]; - - scrollAttempts.forEach((delay) => { - scrollTimersRef.current.push( - setTimeout(() => { - scrollToCitedChunk(); - }, delay) - ); - }); - - // After final attempt, mark the cited chunk as active - scrollTimersRef.current.push( - setTimeout( - () => { - setActiveChunkIndex(citedChunkIndex); - }, - scrollAttempts[scrollAttempts.length - 1] + 50 - ) - ); - } - }, - [open, citedChunkIndex] - ); - - // Reset scroll state when panel closes - useEffect(() => { - if (!open) { - scrollTimersRef.current.forEach(clearTimeout); - scrollTimersRef.current = []; - hasScrolledRef.current = false; - setActiveChunkIndex(null); - } - return () => { - scrollTimersRef.current.forEach(clearTimeout); - scrollTimersRef.current = []; - }; - }, [open]); - - // Handle escape key - useEffect(() => { - const handleEscape = (e: KeyboardEvent) => { - if (e.key === "Escape" && open) { - onOpenChange(false); - } - }; - window.addEventListener("keydown", handleEscape); - return () => window.removeEventListener("keydown", handleEscape); - }, [open, onOpenChange]); - - // Prevent body scroll when open - useEffect(() => { - if (open) { - document.body.style.overflow = "hidden"; - } else { - document.body.style.overflow = ""; - } - return () => { - document.body.style.overflow = ""; - }; - }, [open]); - - const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => { - e.preventDefault(); - e.stopPropagation(); - window.open(clickUrl, "_blank", "noopener,noreferrer"); - }; - - const scrollToChunk = useCallback( - (index: number) => { - scrollToChunkByIndex(index, true); - }, - [scrollToChunkByIndex] - ); - - const panelContent = ( - - {open && ( - <> - {/* Backdrop */} - onOpenChange(false)} - /> - - {/* Panel */} - - {/* Header */} - -
-

- {documentData?.title || title || "Source Document"} -

-

- {documentData && "document_type" in documentData - ? formatDocumentType(documentData.document_type) - : sourceType && formatDocumentType(sourceType)} - {totalChunks > 0 && ( - - • {totalChunks} chunk{totalChunks !== 1 ? "s" : ""} - {allChunks.length < totalChunks && ` (showing ${allChunks.length})`} - - )} -

-
-
- {url && ( - - )} - -
-
- - {/* Loading State */} - {!isDirectRenderSource && isDocumentByChunkFetching && ( -
- - -

- {t("loading_document")} -

-
-
- )} - - {/* Error State */} - {!isDirectRenderSource && documentByChunkFetchingError && ( -
- -
- -
-
-

Document unavailable

-

- {documentByChunkFetchingError.message || - "An unexpected error occurred. Please try again."} -

-
- -
-
- )} - - {/* Direct render for web search providers */} - {isDirectRenderSource && ( - -
- {url && ( - - )} - -

- - Source Information -

-
- {title || "Untitled"} -
-
- {description || "No content available"} -
-
-
-
- )} - - {/* API-fetched document content */} - {!isDirectRenderSource && documentData && ( -
- {/* Chunk Navigation Sidebar */} - {allChunks.length > 1 && ( - - -
- {allChunks.map((chunk, idx) => { - const absNum = absoluteStart + idx + 1; - const isCited = chunk.id === chunkId; - const isActive = activeChunkIndex === idx; - return ( - scrollToChunk(idx)} - initial={{ opacity: 0, scale: 0.8 }} - animate={{ opacity: 1, scale: 1 }} - transition={{ delay: Math.min(idx * 0.02, 0.2) }} - className={cn( - "relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center", - isCited - ? "bg-primary text-primary-foreground shadow-md" - : isActive - ? "bg-muted text-foreground" - : "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground" - )} - title={isCited ? `Chunk ${absNum} (Cited)` : `Chunk ${absNum}`} - > - {absNum} - {isCited && ( - - - - )} - - ); - })} -
-
-
- )} - - {/* Main Content */} - -
- {/* Document Metadata */} - {"document_metadata" in documentData && - documentData.document_metadata && - Object.keys(documentData.document_metadata).length > 0 && ( - -

- - Document Information -

-
- {Object.entries(documentData.document_metadata).map(([key, value]) => ( -
-
- {key.replace(/_/g, " ")} -
-
{String(value)}
-
- ))} -
-
- )} - - {/* Chunks Header */} -
-

- - Chunks {absoluteStart + 1}–{absoluteEnd} of {totalChunks} -

- {citedChunkIndex !== -1 && ( - - )} -
- - {/* Load Earlier */} - {canLoadBefore && ( -
- -
- )} - - {/* Chunks */} -
- {allChunks.map((chunk, idx) => { - const isCited = chunk.id === chunkId; - const chunkNumber = absoluteStart + idx + 1; - return ( - 30} - /> - ); - })} -
- - {/* Load Later */} - {canLoadAfter && ( -
- -
- )} -
-
-
- )} -
- - )} -
- ); - - if (!mounted) return <>{children}; - - return ( - <> - {children} - {createPortal(panelContent, globalThis.document.body)} - - ); -} diff --git a/surfsense_web/components/settings/user-settings-dialog.tsx b/surfsense_web/components/settings/user-settings-dialog.tsx index 7352a82ee..a04ce16dd 100644 --- a/surfsense_web/components/settings/user-settings-dialog.tsx +++ b/surfsense_web/components/settings/user-settings-dialog.tsx @@ -67,9 +67,6 @@ const DesktopShortcutsContent = dynamic( import( "@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent" ).then((m) => ({ default: m.DesktopShortcutsContent })), - import( - "@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent" - ).then((m) => ({ default: m.DesktopShortcutsContent })), { ssr: false } ); const MemoryContent = dynamic( diff --git a/surfsense_web/components/ui/search-highlight-node.tsx b/surfsense_web/components/ui/search-highlight-node.tsx new file mode 100644 index 000000000..e3f316cce --- /dev/null +++ b/surfsense_web/components/ui/search-highlight-node.tsx @@ -0,0 +1,45 @@ +"use client"; + +import type { PlateLeafProps } from "platejs/react"; +import { PlateLeaf } from "platejs/react"; + +/** + * Stable class name used to identify Plate-rendered citation highlight + * leaves in the DOM. We can't use a `data-*` attribute here — Plate's + * `PlateLeaf` runs its props through `useNodeAttributes`, which only + * forwards `attributes`, `className`, `ref`, and `style` to the rendered + * element; arbitrary `data-*` props are silently dropped (verified + * against `@platejs/core/dist/react/index.js` v52). So `className` is + * the only escape hatch that's guaranteed to survive into the DOM. + */ +export const CITATION_HIGHLIGHT_CLASS = "citation-highlight-leaf"; + +/** + * Leaf rendered for ranges decorated by `@platejs/find-replace`'s + * `FindReplacePlugin`. We re-purpose that plugin to drive the citation-jump + * highlight: when a citation is staged, the parent sets the plugin's `search` + * option to a snippet of the chunk text and Plate decorates every match with + * `searchHighlight: true`. This component renders those decorations as a + * `` tagged with `CITATION_HIGHLIGHT_CLASS` so the parent can: + * 1. Query the first match in DOM order to scroll it into view. + * 2. Detect the active-highlight state without a separate React ref. + * + * The highlight is **persistent** — it does not auto-fade. The parent in + * `EditorPanelContent` clears it by setting the plugin's `search` option + * back to "" when one of: (a) the user clicks anywhere inside the editor, + * (b) the panel switches to a different document, (c) the user toggles + * into edit mode, (d) another citation jump is staged, (e) the panel + * unmounts. We use a brief entrance pulse (`citation-flash-in`, see + * `globals.css`) purely to draw the eye after `scrollIntoView` lands. + */ +export function SearchHighlightLeaf(props: PlateLeafProps) { + return ( + + {props.children} + + ); +} diff --git a/surfsense_web/lib/citation-search.ts b/surfsense_web/lib/citation-search.ts new file mode 100644 index 000000000..f80f13076 --- /dev/null +++ b/surfsense_web/lib/citation-search.ts @@ -0,0 +1,125 @@ +/** + * Snippet generation for the citation-jump highlight, driven by Plate's + * `FindReplacePlugin`. The plugin runs `decorate` per-block and only matches + * within blocks whose children are all `Text` nodes (so it crosses inline + * marks like bold/italic but **not** block boundaries, and a block that + * contains even one inline element such as a link is silently skipped). + * That means a full chunk that spans heading + paragraph won't match as a + * single string — we have to pick a shorter snippet that fits inside one + * rendered block. + * + * `buildCitationSearchCandidates` returns search strings ordered from + * "most-specific anchor" to "broadest fallback": + * 1. First sentence of the chunk (capped at `FIRST_SENTENCE_MAX`). + * 2. First `FIRST_PHRASE_WORDS` words. + * 3. Each non-trivial line of the chunk, in source order — gives us a + * separate attempt for each rendered block, so a heading line with + * an inline link doesn't doom the whole jump. + * 4. Full chunk (only if it's already short enough to plausibly fit + * inside one block). + * + * The caller tries each candidate in turn — set the plugin's `search` + * option, `editor.api.redecorate()`, then check the editor DOM for a + * `.citation-highlight-leaf` element. First candidate that produces one + * wins; subsequent candidates are skipped. + */ + +const FIRST_SENTENCE_MAX = 120; +const FIRST_PHRASE_WORDS = 8; +const MIN_SNIPPET_LENGTH = 6; +const FULL_CHUNK_MAX = FIRST_SENTENCE_MAX * 2; +const MAX_LINE_CANDIDATES = 6; +const LINE_CANDIDATE_MAX = FIRST_SENTENCE_MAX; + +function normalizeWhitespace(input: string): string { + return input.replace(/\s+/g, " ").trim(); +} + +/** + * Strip the markdown syntax that won't survive into the rendered editor's + * plain text, so the chunk text (which comes back from the indexer as raw + * source markdown) can be matched against the literal text values stored + * in Plate's Slate tree. + * + * Order matters: handle multi-char and "container" syntax before single- + * char emphasis, otherwise `**text**` collapses to `*text*` first. + * + * Heuristic only — we don't aim to be a full markdown parser, just to + * remove the common markers (`**bold**`, `[text](url)`, `# headings`, + * `- list`, etc.) that show up in connector-doc chunks and would break + * literal substring search. + */ +export function stripMarkdownForMatch(input: string): string { + let s = input; + s = s.replace(/```[a-z0-9_+-]*\n?([\s\S]*?)```/gi, (_, body: string) => body); + s = s.replace(//g, " "); + s = s.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1"); + s = s.replace(/!\[([^\]]*)\]\[[^\]]*\]/g, "$1"); + s = s.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1"); + s = s.replace(/\[([^\]]+)\]\[[^\]]*\]/g, "$1"); + s = s.replace(/<((?:https?|mailto):[^>\s]+)>/g, "$1"); + s = s.replace(/`+([^`\n]+?)`+/g, "$1"); + s = s.replace(/(\*\*|__)([\s\S]+?)\1/g, "$2"); + s = s.replace(/(?+[ \t]?/gm, ""); + s = s.replace(/^[ \t]*[-*+][ \t]+/gm, ""); + s = s.replace(/^[ \t]*\d+\.[ \t]+/gm, ""); + s = s.replace(/^[ \t]{0,3}(?:[-*_])(?:[ \t]*[-*_]){2,}[ \t]*$/gm, ""); + s = s.replace(/^[ \t]*\|?(?:[ \t]*:?-+:?[ \t]*\|)+[ \t]*:?-+:?[ \t]*\|?[ \t]*$/gm, ""); + s = s.replace(/\\([\\`*_{}[\]()#+\-.!~>])/g, "$1"); + return s; +} + +export function buildCitationSearchCandidates(rawText: string): string[] { + if (!rawText) return []; + const stripped = stripMarkdownForMatch(rawText); + const normalized = normalizeWhitespace(stripped); + if (normalized.length < MIN_SNIPPET_LENGTH) return []; + + const out: string[] = []; + const seen = new Set(); + const push = (s: string) => { + const t = normalizeWhitespace(s); + if (t.length >= MIN_SNIPPET_LENGTH && !seen.has(t)) { + out.push(t); + seen.add(t); + } + }; + + const sentenceMatch = normalized.match(/^[^.!?]+[.!?]/); + if (sentenceMatch) { + const sentence = sentenceMatch[0]; + push(sentence.length > FIRST_SENTENCE_MAX ? sentence.slice(0, FIRST_SENTENCE_MAX) : sentence); + } else if (normalized.length > FIRST_SENTENCE_MAX) { + push(normalized.slice(0, FIRST_SENTENCE_MAX)); + } + + const words = normalized.split(" ").filter(Boolean); + if (words.length > FIRST_PHRASE_WORDS) { + push(words.slice(0, FIRST_PHRASE_WORDS).join(" ")); + } + + // Per-line candidates: each chunk line is roughly one block in the + // rendered editor. Trying them in order gives us a separate decorate + // attempt for each block, which matters when the first line is a + // heading containing a link (Plate's `FindReplacePlugin` will skip + // any block whose children aren't all text nodes). + const rawLines = stripped.split(/\r?\n/); + let lineCount = 0; + for (const line of rawLines) { + if (lineCount >= MAX_LINE_CANDIDATES) break; + const trimmed = normalizeWhitespace(line); + if (trimmed.length < MIN_SNIPPET_LENGTH) continue; + push(trimmed.length > LINE_CANDIDATE_MAX ? trimmed.slice(0, LINE_CANDIDATE_MAX) : trimmed); + lineCount++; + } + + if (normalized.length <= FULL_CHUNK_MAX) { + push(normalized); + } + + return out; +} diff --git a/surfsense_web/package.json b/surfsense_web/package.json index 41175daeb..665490e4f 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -36,6 +36,7 @@ "@platejs/code-block": "^52.0.11", "@platejs/combobox": "^52.0.15", "@platejs/dnd": "^52.0.11", + "@platejs/find-replace": "^52.3.10", "@platejs/floating": "^52.0.11", "@platejs/indent": "^52.0.11", "@platejs/link": "^52.0.11", diff --git a/surfsense_web/pnpm-lock.yaml b/surfsense_web/pnpm-lock.yaml index b1730e842..a1a7bea12 100644 --- a/surfsense_web/pnpm-lock.yaml +++ b/surfsense_web/pnpm-lock.yaml @@ -53,6 +53,9 @@ importers: '@platejs/dnd': specifier: ^52.0.11 version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + '@platejs/find-replace': + specifier: ^52.3.10 + version: 52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4) '@platejs/floating': specifier: ^52.0.11 version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4) @@ -2827,6 +2830,13 @@ packages: react-dnd-html5-backend: '>=14.0.0' react-dom: '>=18.0.0' + '@platejs/find-replace@52.3.10': + resolution: {integrity: sha512-V/MOMMUYxHfEn/skd2+YO213xSATFDVsl8FzVzVRV/XaxwwVefH2EPD1lAVIvmYjennTVTTsHHtEI9K9iOsEaA==} + peerDependencies: + platejs: '>=52.0.11' + react: '>=18.0.0' + react-dom: '>=18.0.0' + '@platejs/floating@52.0.11': resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==} peerDependencies: @@ -11105,6 +11115,13 @@ snapshots: react-dnd-html5-backend: 16.0.1 react-dom: 19.2.4(react@19.2.4) + '@platejs/find-replace@52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)': + dependencies: + platejs: 52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)) + react: 19.2.4 + react-compiler-runtime: 1.0.0(react@19.2.4) + react-dom: 19.2.4(react@19.2.4) + '@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)': dependencies: '@floating-ui/core': 1.7.4