feat: various UI fixes, prompt optimizations, and allowing duplicate docs

- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths.
- Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`.
- Added detailed comments for clarity on the changes and their implications.
- Introduced new citation handling in the editor for improved user experience with citation jumps.
- Updated package dependencies in the frontend for better functionality.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-28 21:30:53 -07:00
parent e6433f78c4
commit b9a66cb417
26 changed files with 1540 additions and 852 deletions

View file

@ -0,0 +1,107 @@
"""133_drop_documents_content_hash_unique
Revision ID: 133
Revises: 132
Create Date: 2026-04-29
Drop the global UNIQUE constraint on ``documents.content_hash`` so the
new-chat agent's ``write_file`` flow can persist legitimate file copies
(two paths, identical content) without hitting a constraint that mirrors
no real filesystem semantic.
Path uniqueness still lives on ``documents.unique_identifier_hash`` (per
search space), which is the right invariant exactly like an inode at a
given path on a POSIX filesystem.
The non-unique INDEX on ``content_hash`` is preserved so connector
indexers' "have we seen this content before?" lookup
(:func:`app.tasks.document_processors.base.check_duplicate_document`,
which already uses ``.scalars().first()`` and is therefore tolerant of
duplicates) stays cheap.
"""
from __future__ import annotations
from collections.abc import Sequence
from sqlalchemy import inspect
from alembic import op
revision: str = "133"
down_revision: str | None = "132"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def _existing_constraint_names(bind, table: str) -> set[str]:
inspector = inspect(bind)
return {c["name"] for c in inspector.get_unique_constraints(table)}
def _existing_index_names(bind, table: str) -> set[str]:
inspector = inspect(bind)
return {i["name"] for i in inspector.get_indexes(table)}
def upgrade() -> None:
bind = op.get_bind()
# Both the named UniqueConstraint (added in revision 8) and the
# implicit-unique-index variant SQLAlchemy may emit need draining.
constraints = _existing_constraint_names(bind, "documents")
if "uq_documents_content_hash" in constraints:
op.drop_constraint(
"uq_documents_content_hash", "documents", type_="unique"
)
indexes = _existing_index_names(bind, "documents")
# Some Postgres versions surface the unique constraint via a unique
# index of the same name; check for that too.
for idx_name in ("uq_documents_content_hash",):
if idx_name in indexes:
op.drop_index(idx_name, table_name="documents")
# Ensure the non-unique index is present for fast lookups.
if "ix_documents_content_hash" not in indexes:
op.create_index(
"ix_documents_content_hash",
"documents",
["content_hash"],
unique=False,
)
def downgrade() -> None:
bind = op.get_bind()
# Re-applying UNIQUE is destructive: there may now be legitimate
# duplicates (e.g. two NOTE documents that share content because the
# user explicitly copied one to a new path). To avoid the migration
# silently deleting user data, we keep only the lowest-id row per
# content_hash — same strategy revision 8 used when first introducing
# the constraint.
op.execute(
"""
DELETE FROM documents
WHERE id NOT IN (
SELECT MIN(id)
FROM documents
GROUP BY content_hash
)
"""
)
indexes = _existing_index_names(bind, "documents")
if "ix_documents_content_hash" in indexes:
op.drop_index("ix_documents_content_hash", table_name="documents")
op.create_index(
"ix_documents_content_hash",
"documents",
["content_hash"],
unique=False,
)
op.create_unique_constraint(
"uq_documents_content_hash", "documents", ["content_hash"]
)

View file

@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.callbacks import dispatch_custom_event
from langgraph.runtime import Runtime
from sqlalchemy import delete, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.filesystem_selection import FilesystemMode
@ -150,10 +151,11 @@ async def _create_document(
virtual_path,
search_space_id,
)
# Guard against the unique_identifier_hash constraint: another row at the
# same virtual_path (this search space) already owns the hash. Callers are
# expected to upsert via the wrapper, but this defends against bypasses
# and gives a clean ValueError instead of a session-poisoning IntegrityError.
# Filesystem-parity invariant: the only thing that *must* be unique is
# the path. Two notes can legitimately share content (e.g. ``cp a b``).
# Guard against the path-derived ``unique_identifier_hash`` constraint
# so we surface a clean ValueError instead of letting the INSERT poison
# the session with an IntegrityError.
path_collision = await session.execute(
select(Document.id).where(
Document.search_space_id == search_space_id,
@ -165,17 +167,14 @@ async def _create_document(
f"a document already exists at path '{virtual_path}' "
"(unique_identifier_hash collision)"
)
# ``content_hash`` is intentionally NOT checked for uniqueness here.
# In a real filesystem two files at different paths can hold identical
# bytes, and the agent's ``write_file`` path needs that semantic to
# support copy/duplicate operations. The hash remains useful as a
# change-detection hint for connector indexers, which still consult it
# via :func:`check_duplicate_document` but do so with a non-unique
# lookup (``.first()``).
content_hash = generate_content_hash(content, search_space_id)
content_collision = await session.execute(
select(Document.id).where(
Document.search_space_id == search_space_id,
Document.content_hash == content_hash,
)
)
if content_collision.scalar_one_or_none() is not None:
raise ValueError(
f"a document with identical content already exists for path '{virtual_path}'"
)
doc = Document(
title=title,
document_type=DocumentType.NOTE,
@ -493,19 +492,43 @@ async def commit_staged_filesystem_state(
}
)
else:
# Wrap each create in a SAVEPOINT so a residual
# ``IntegrityError`` (e.g. a deployment that hasn't run
# migration 133 yet, where ``documents.content_hash``
# still carries its legacy global UNIQUE constraint)
# rolls back only this one create instead of poisoning
# the whole turn's transaction.
try:
new_doc = await _create_document(
session,
virtual_path=path,
content=content,
search_space_id=search_space_id,
created_by_id=created_by_id,
)
async with session.begin_nested():
new_doc = await _create_document(
session,
virtual_path=path,
content=content,
search_space_id=search_space_id,
created_by_id=created_by_id,
)
except ValueError as exc:
logger.warning(
"kb_persistence: skipping %s create: %s", path, exc
)
continue
except IntegrityError as exc:
# The path-uniqueness check above already protected
# against ``unique_identifier_hash`` collisions, so
# the most likely culprit is the legacy
# ``ix_documents_content_hash`` UNIQUE constraint
# that migration 133 drops. Log loudly so operators
# know to run the migration; do NOT silently swallow.
msg = str(exc.orig) if exc.orig is not None else str(exc)
logger.error(
"kb_persistence: IntegrityError creating %s: %s. "
"If this mentions content_hash, run alembic "
"upgrade to apply migration 133 which drops the "
"global UNIQUE constraint on documents.content_hash.",
path,
msg,
)
continue
doc_id_by_path[path] = new_doc.id
committed_creates.append(
{

View file

@ -38,12 +38,38 @@ from app.db import ChatVisibility
# Provider variant detection
# -----------------------------------------------------------------------------
ProviderVariant = str # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"
# String literal alias for the supported provider-specific prompt variants.
# When adding a new variant, also drop a matching ``providers/<variant>.md``
# file in this package and (if appropriate) extend the regex matchers below.
#
# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
# to SurfSense's "supplemental hints" architecture (each fragment is a
# focused style nudge, NOT a full system prompt — the main prompt is
# already assembled from base/ + tools/ + routing/).
ProviderVariant = str
# Known values:
# "anthropic" — Claude family (XML-friendly, narrative todos)
# "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
# "openai_classic" — GPT-4 family (autonomous persistence)
# "openai_codex" — gpt-*-codex (code-purist, terse, file:line refs)
# "google" — Gemini (formal, <3-line, numbered workflow)
# "kimi" — Moonshot Kimi-K* (action-bias, parallel tools)
# "grok" — xAI Grok (extreme-terse, one-word ok)
# "deepseek" — DeepSeek V3 / R1 (terse, R1-aware reasoning)
# "default" — fallback, no provider-specific block emitted
# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
# More specific patterns must come first (e.g. ``codex`` before
# ``openai_reasoning`` because codex model ids contain ``gpt``).
_OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE)
_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
def detect_provider_variant(model_name: str | None) -> ProviderVariant:
@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
Heuristic match on the model id; returns ``"default"`` when nothing
matches so the composer can fall back to the empty placeholder file.
Order is significant: more-specific patterns are tried first so
``gpt-5-codex`` routes to ``"openai_codex"`` rather than
``"openai_reasoning"`` (mirrors OpenCode's
``packages/opencode/src/session/system.ts`` dispatch).
"""
if not model_name:
return "default"
name = model_name.strip()
if _OPENAI_CODEX_RE.search(name):
return "openai_codex"
if _OPENAI_REASONING_RE.search(name):
return "openai_reasoning"
if _OPENAI_CLASSIC_RE.search(name):
@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
return "anthropic"
if _GOOGLE_RE.search(name):
return "google"
if _KIMI_RE.search(name):
return "kimi"
if _GROK_RE.search(name):
return "grok"
if _DEEPSEEK_RE.search(name):
return "deepseek"
return "default"

View file

@ -1,5 +1,20 @@
<provider_hints>
You are running on an Anthropic Claude model. Use XML tags liberally to structure
intermediate reasoning when the task is complex. Prefer step-by-step plans inside
`<thinking>` blocks before producing the final answer.
You are running on an Anthropic Claude model.
Structured reasoning:
- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
- For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
Professional objectivity:
- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
- Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
Task management:
- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
- Narrate progress through the todo list itself, not through chatty status lines.
Tool calls:
- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
</provider_hints>

View file

@ -0,0 +1,18 @@
<provider_hints>
You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
Reasoning hygiene (R1-aware):
- If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
- Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
Output style:
- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
- For factual answers, cite once with `[citation:chunk_id]` and stop.
Tool calls:
- Issue independent tool calls in parallel within a single turn.
- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
</provider_hints>

View file

@ -1,4 +1,20 @@
<provider_hints>
You are running on a Google Gemini model. Prefer concise, structured responses.
When using tools, follow the function-calling protocol and avoid verbose preludes.
You are running on a Google Gemini model.
Output style:
- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
- Format with GitHub-flavoured Markdown; assume monospace rendering.
- For one-line factual answers, just answer. No headers, no bullets.
Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
Discipline:
- Do not take significant actions beyond the clear scope of the user's request without confirming first.
- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
- Path arguments must be the exact strings returned by tools; do not synthesise file paths.
</provider_hints>

View file

@ -0,0 +1,17 @@
<provider_hints>
You are running on an xAI Grok model.
Maximum terseness:
- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
- Avoid restating the user's question.
- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
Tool discipline:
- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
Style:
- No emojis unless the user asked. No nested bullets, no headers for short answers.
- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
</provider_hints>

View file

@ -0,0 +1,21 @@
<provider_hints>
You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
Action bias:
- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
Tool calls:
- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
Language:
- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
Discipline:
- Stay on track. Never give the user more than what they asked for.
- Fact-check before stating anything as factual; don't fabricate citations.
- Keep it stupidly simple. Don't overcomplicate.
</provider_hints>

View file

@ -1,5 +1,21 @@
<provider_hints>
You are running on a classic OpenAI chat model (GPT-4 family). Use direct
function-calling for tools. When editing files, use the standard `edit_file`
or `write_file` tools rather than diff-based patches.
You are running on a classic OpenAI chat model (GPT-4 family).
Persistence:
- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
Planning:
- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
Output style:
- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
- Don't dump tool output verbatim — summarise the relevant lines.
- Don't add a closing recap unless the user asked for one. After completing the work, just stop.
Tool calls:
- Issue independent tool calls in parallel within one response.
- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
</provider_hints>

View file

@ -0,0 +1,19 @@
<provider_hints>
You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
Output style:
- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
- Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
- Skip headers and heavy formatting for simple confirmations.
- No emojis, no em-dashes, no nested bullets. Single-level lists only.
Code & structured-output tasks:
- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
- For multi-line snippets use fenced code blocks with a language tag.
Tool calls:
- Run independent tool calls in parallel; chain only when later calls need earlier results.
- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
</provider_hints>

View file

@ -1,5 +1,21 @@
<provider_hints>
You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and
direct in your responses. When editing files, prefer the `apply_patch` tool format
where available. Avoid restating the user request before answering.
You are running on an OpenAI reasoning model (GPT-5+ / o-series).
Output style:
- Be terse and direct. Don't restate the user's request before answering.
- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
Channels (for clients that support them):
- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
Tool calls:
- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
Autonomy:
- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
</provider_hints>

View file

@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin):
document_metadata = Column(JSON, nullable=True)
content = Column(Text, nullable=False)
content_hash = Column(String, nullable=False, index=True, unique=True)
# ``content_hash`` is intentionally NOT globally unique. In a real
# filesystem two files at different paths can hold identical bytes,
# and the agent's ``write_file`` flow needs that semantic to support
# copy / duplicate operations. Path uniqueness lives on
# ``unique_identifier_hash`` (per search space). The hash remains
# indexed because connector indexers consult it as a change-detection
# / cross-source dedup hint via :func:`check_duplicate_document`.
# See migration 133.
content_hash = Column(String, nullable=False, index=True)
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension))

View file

@ -25,17 +25,33 @@ class TestProviderVariantDetection:
@pytest.mark.parametrize(
"model_name,expected",
[
# GPT-4 family routes to "classic" (autonomous-persistence style)
("openai:gpt-4o-mini", "openai_classic"),
("openai:gpt-4-turbo", "openai_classic"),
# GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
("openai:gpt-5", "openai_reasoning"),
("openai:gpt-5-codex", "openai_reasoning"),
("openai:o1-preview", "openai_reasoning"),
("openai:o3-mini", "openai_reasoning"),
# Codex family beats reasoning (more specific). Mirrors OpenCode
# ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
("openai:gpt-5-codex", "openai_codex"),
("openai:gpt-codex", "openai_codex"),
("openai:codex-mini", "openai_codex"),
# Anthropic + Google
("anthropic:claude-3-5-sonnet", "anthropic"),
("anthropic/claude-opus-4", "anthropic"),
("google:gemini-2.0-flash", "google"),
("vertex:gemini-1.5-pro", "google"),
# Newly-covered families
("moonshot:kimi-k2", "kimi"),
("openrouter:moonshot/kimi-k2.5", "kimi"),
("xai:grok-2", "grok"),
("openrouter:x-ai/grok-3", "grok"),
("openai:deepseek-v3", "deepseek"),
("deepseek:deepseek-r1", "deepseek"),
# Unknown families fall back to default (no provider block emitted)
("groq:mixtral-8x7b", "default"),
("together:llama-3.1-70b", "default"),
(None, "default"),
("", "default"),
],
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
def test_detection(self, model_name: str | None, expected: str) -> None:
assert detect_provider_variant(model_name) == expected
def test_codex_takes_precedence_over_reasoning(self) -> None:
"""Regression guard: ``gpt-5-codex`` must NOT match the generic
``gpt-5`` reasoning regex first. Codex is the more specialised
prompt and mirrors OpenCode's dispatch order.
"""
from app.agents.new_chat.prompts.composer import detect_provider_variant
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
class TestCompose:
def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
@ -149,6 +175,52 @@ class TestCompose:
prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
assert "<provider_hints>" not in prompt
@pytest.mark.parametrize(
"model_name,expected_marker",
[
# Each marker is a unique-ish phrase from the corresponding fragment.
# If a fragment is renamed/rewritten such that the marker is gone,
# update both the fragment and this test deliberately.
("openai:gpt-5-codex", "Codex-class"),
("openai:gpt-5", "OpenAI reasoning model"),
("openai:gpt-4o", "classic OpenAI chat model"),
("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
("google:gemini-2.0-flash", "Google Gemini"),
("moonshot:kimi-k2", "Moonshot Kimi"),
("xai:grok-2", "xAI Grok"),
("deepseek:deepseek-r1", "DeepSeek"),
],
)
def test_each_known_variant_renders_with_its_marker(
self,
fixed_today: datetime,
model_name: str,
expected_marker: str,
) -> None:
"""Every supported variant must produce a ``<provider_hints>`` block
containing its identifying marker. This pins the dispatch + the
on-disk fragments together so a missing/renamed file is caught
immediately.
"""
prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
assert "<provider_hints>" in prompt, (
f"variant for {model_name!r} did not emit a provider_hints block; "
"the corresponding providers/<variant>.md may be missing"
)
assert expected_marker in prompt, (
f"variant for {model_name!r} emitted hints but lacked the "
f"expected marker {expected_marker!r} — the fragment may have "
"drifted from the dispatch table"
)
def test_provider_blocks_are_byte_stable_across_calls(
self, fixed_today: datetime
) -> None:
"""Cache-stability guard: same model id → byte-identical prompt."""
a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
assert a == b
def test_custom_system_instructions_override_default(
self, fixed_today: datetime
) -> None:

View file

@ -0,0 +1,168 @@
"""Unit tests for kb_persistence filesystem-parity invariants.
Specifically, these tests pin down that the agent-driven write_file flow
treats path uniqueness not content uniqueness as the only hard
invariant. This mirrors a real filesystem: ``cp a b`` produces two files
with identical bytes living at different paths, and that should round-trip
through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
"""
from __future__ import annotations
from typing import Any
from unittest.mock import AsyncMock, MagicMock
import numpy as np
import pytest
from app.agents.new_chat.middleware import kb_persistence
from app.db import Document
class _FakeResult:
"""Minimal stand-in for ``sqlalchemy.engine.Result``."""
def __init__(self, value: Any = None) -> None:
self._value = value
def scalar_one_or_none(self) -> Any:
return self._value
def scalar(self) -> Any:
return self._value
class _FakeSession:
"""Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
Records every ``add`` so we can assert against the resulting Documents
and Chunks. ``execute`` always returns "no row" by default i.e. no
folder hierarchy preexists and no path collision exists. Tests that
want a path collision can override that on a per-call basis.
"""
def __init__(self) -> None:
self.added: list[Any] = []
self.execute = AsyncMock(return_value=_FakeResult(None))
self.flush = AsyncMock()
# Simulate ``await session.flush()`` assigning an id to the doc;
# we increment a counter so each Document gets a unique id.
self._next_id = 1
async def _flush_assigning_ids() -> None:
for obj in self.added:
if getattr(obj, "id", None) is None:
obj.id = self._next_id
self._next_id += 1
self.flush.side_effect = _flush_assigning_ids
def add(self, obj: Any) -> None:
self.added.append(obj)
def add_all(self, objs: list[Any]) -> None:
self.added.extend(objs)
@pytest.fixture(autouse=True)
def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
"""Avoid loading the embedding model in unit tests."""
monkeypatch.setattr(
kb_persistence,
"embed_texts",
lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
)
monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
@pytest.mark.asyncio
async def test_create_document_allows_identical_content_at_different_paths() -> None:
"""The core regression: ``cp /a/notes.md /b/notes-copy.md``.
Both create calls must succeed even though the bytes are byte-for-byte
identical, because path is the only filesystem-style unique key.
"""
session = _FakeSession()
content = "# Same body\n\nIdentical content used by two different paths.\n"
first = await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/a/notes.md",
content=content,
search_space_id=42,
created_by_id="user-1",
)
assert isinstance(first, Document)
assert first.title == "notes.md"
# Second create with byte-identical content at a different path should
# not raise — that's the whole point of the filesystem-parity fix.
second = await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/b/notes-copy.md",
content=content,
search_space_id=42,
created_by_id="user-1",
)
assert isinstance(second, Document)
assert second.title == "notes-copy.md"
# Both rows share the same content_hash but live at distinct paths
# (distinct ``unique_identifier_hash``). That's the desired contract.
assert first.content_hash == second.content_hash
assert first.unique_identifier_hash != second.unique_identifier_hash
@pytest.mark.asyncio
async def test_create_document_still_rejects_path_collision() -> None:
"""Path uniqueness remains the hard invariant.
If ``unique_identifier_hash`` already points at an existing row in
the same search space, the create call must raise ``ValueError``
with a clear message matching the behavior the commit loop relies
on to upsert via the existing-row code path.
"""
session = _FakeSession()
# Path with no folder parts so ``_ensure_folder_hierarchy`` is a
# no-op and the only SELECT executed is the path-collision check.
# That SELECT returns an existing doc id, triggering the guard.
session.execute = AsyncMock(return_value=_FakeResult(value=99))
with pytest.raises(ValueError, match="already exists at path"):
await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/notes.md",
content="anything",
search_space_id=42,
created_by_id="user-1",
)
@pytest.mark.asyncio
async def test_create_document_does_not_query_for_content_hash_collision(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Regression guard: the legacy second SELECT (content_hash collision
pre-check) must be gone. Counting ``execute`` calls is a brittle but
effective way to lock that in.
The current flow runs exactly one ``execute`` for the path-collision
SELECT (no folder parts in this path ``_ensure_folder_hierarchy``
short-circuits). If a future refactor reintroduces a content-hash
SELECT, this test will fail loud.
"""
session = _FakeSession()
await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/notes.md",
content="hello",
search_space_id=42,
created_by_id="user-1",
)
# Path-collision SELECT only. No content_hash SELECT.
assert session.execute.await_count == 1, (
f"Unexpected execute count {session.execute.await_count}; "
"did the legacy content_hash collision pre-check get re-added?"
)

View file

@ -210,6 +210,27 @@ button {
}
}
/* Citation-jump highlight entrance pulse only. The `SearchHighlightLeaf`
(see components/ui/search-highlight-node.tsx) is otherwise statically
tinted; this animation runs once on mount to draw the eye to the cited
text after `scrollIntoView` lands. The highlight itself is permanent
until the user clicks inside the editor (or another dismissal trigger
fires in `EditorPanelContent`). */
@keyframes citation-flash-in {
0% {
background-color: transparent;
box-shadow: 0 0 0 0 transparent;
}
40% {
background-color: color-mix(in oklab, var(--primary) 30%, transparent);
box-shadow: 0 0 0 3px color-mix(in oklab, var(--primary) 25%, transparent);
}
100% {
background-color: color-mix(in oklab, var(--primary) 15%, transparent);
box-shadow: 0 0 0 1px color-mix(in oklab, var(--primary) 40%, transparent);
}
}
/* Human-in-the-loop approval card animations */
@keyframes pulse-subtle {
0%,

View file

@ -0,0 +1,19 @@
import { atom } from "jotai";
/**
* Cross-component handoff for citation jumps. Set by `InlineCitation` when a
* numeric chunk badge is clicked (after the document has been resolved); read
* by `DocumentTabContent` once the matching document tab mounts so it can
* scroll to and softly highlight the cited chunk inside the rendered markdown.
*
* Cleared by `DocumentTabContent` only after a terminal state exact /
* approximate / miss has been reached, so that an escalation refetch (2MB
* preview 16MB) keeps the pending intent alive across the re-render.
*/
export interface PendingChunkHighlight {
documentId: number;
chunkId: number;
chunkText: string;
}
export const pendingChunkHighlightAtom = atom<PendingChunkHighlight | null>(null);

View file

@ -1,26 +1,45 @@
"use client";
import { FileText } from "lucide-react";
import { useQuery, useQueryClient } from "@tanstack/react-query";
import { useSetAtom } from "jotai";
import { ExternalLink, FileText } from "lucide-react";
import type { FC } from "react";
import { useState } from "react";
import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel";
import { MarkdownViewer } from "@/components/markdown-viewer";
import { Citation } from "@/components/tool-ui/citation";
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
import { Spinner } from "@/components/ui/spinner";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { cacheKeys } from "@/lib/query-client/cache-keys";
interface InlineCitationProps {
chunkId: number;
isDocsChunk?: boolean;
}
const POPOVER_HOVER_CLOSE_DELAY_MS = 150;
/**
* Inline citation for knowledge-base chunks (numeric chunk IDs).
* Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel.
* Negative chunk IDs indicate anonymous/synthetic uploads and render as a static badge.
* Inline citation badge for knowledge-base chunks (numeric chunk IDs) and
* Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as
* a static "doc" pill (anonymous/synthetic uploads).
*
* Numeric KB chunks: clicking resolves the parent document via
* `getDocumentByChunk`, opens the document in the right side panel (alongside
* the chat does not replace it), and stages the cited chunk text in
* `pendingChunkHighlightAtom` so `EditorPanelContent` can scroll to and softly
* highlight it inside the rendered markdown.
*
* Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that
* lazily fetches and previews the cited chunk inline, since those docs aren't
* indexed into the user's search space and have no tab to open.
*/
export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => {
const [isOpen, setIsOpen] = useState(false);
if (chunkId < 0) {
return (
<Tooltip>
@ -38,26 +57,185 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk =
);
}
if (isDocsChunk) {
return <SurfsenseDocCitation chunkId={chunkId} />;
}
return <NumericChunkCitation chunkId={chunkId} />;
};
const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
const queryClient = useQueryClient();
const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
const openEditorPanel = useSetAtom(openEditorPanelAtom);
const [resolving, setResolving] = useState(false);
const handleClick = useCallback(async () => {
if (resolving) return;
setResolving(true);
console.log("[citation:click] start", { chunkId });
try {
const data = await queryClient.fetchQuery({
// Local key with explicit window. The shared `cacheKeys.documents.byChunk`
// is window-agnostic (latent footgun); namespace the call to avoid
// reusing a different-window cached result.
queryKey: ["documents", "by-chunk", chunkId, "w0"] as const,
queryFn: () =>
documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 0 }),
staleTime: 5 * 60 * 1000,
});
const cited = data.chunks.find((c) => c.id === chunkId) ?? data.chunks[0];
console.log("[citation:click] fetched doc-by-chunk", {
docId: data.id,
docTitle: data.title,
chunksReturned: data.chunks.length,
citedChunkId: cited?.id,
citedChunkContentLen: cited?.content?.length ?? 0,
citedChunkPreview:
cited?.content && cited.content.length > 120
? `${cited.content.slice(0, 120)}…(+${cited.content.length - 120})`
: (cited?.content ?? ""),
});
// Stage the highlight BEFORE opening the panel so `EditorPanelContent`
// already sees the pending intent on its very first render — avoids a
// "fetch → render → no-pending → next-tick render with pending" race.
setPendingHighlight({
documentId: data.id,
chunkId,
chunkText: cited?.content ?? "",
});
openEditorPanel({
documentId: data.id,
searchSpaceId: data.search_space_id,
title: data.title,
});
console.log("[citation:click] staged highlight + opened editor panel", {
documentId: data.id,
});
} catch (err) {
console.warn("[citation:click] failed", err);
toast.error(err instanceof Error ? err.message : "Couldn't open cited document");
} finally {
setResolving(false);
}
}, [chunkId, openEditorPanel, queryClient, resolving, setPendingHighlight]);
return (
<SourceDetailPanel
open={isOpen}
onOpenChange={setIsOpen}
chunkId={chunkId}
sourceType={isDocsChunk ? "SURFSENSE_DOCS" : ""}
title={isDocsChunk ? "Surfsense Documentation" : "Source"}
description=""
url=""
isDocsChunk={isDocsChunk}
<button
type="button"
onClick={handleClick}
disabled={resolving}
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none disabled:cursor-progress disabled:opacity-70"
title={`View source chunk #${chunkId}`}
aria-label={`Jump to cited chunk ${chunkId}`}
>
<button
type="button"
onClick={() => setIsOpen(true)}
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
title={`View source chunk #${chunkId}`}
{resolving ? <Spinner size="xs" /> : chunkId}
</button>
);
};
const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
const [open, setOpen] = useState(false);
const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const cancelClose = useCallback(() => {
if (closeTimerRef.current) {
clearTimeout(closeTimerRef.current);
closeTimerRef.current = null;
}
}, []);
const scheduleClose = useCallback(() => {
cancelClose();
closeTimerRef.current = setTimeout(() => {
setOpen(false);
closeTimerRef.current = null;
}, POPOVER_HOVER_CLOSE_DELAY_MS);
}, [cancelClose]);
useEffect(() => () => cancelClose(), [cancelClose]);
const { data, isLoading, error } = useQuery({
queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`),
queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId),
enabled: open,
staleTime: 5 * 60 * 1000,
});
const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0];
return (
<Popover open={open} onOpenChange={setOpen}>
<PopoverTrigger asChild>
<button
type="button"
onClick={() => setOpen((prev) => !prev)}
onMouseEnter={() => {
cancelClose();
setOpen(true);
}}
onMouseLeave={scheduleClose}
onFocus={() => {
cancelClose();
setOpen(true);
}}
onBlur={scheduleClose}
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center gap-0.5 rounded-md bg-primary/10 px-1.5 text-[11px] font-medium text-primary align-baseline shadow-sm transition-colors hover:bg-primary/15 focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
aria-label={`Show Surfsense documentation chunk ${chunkId}`}
title="Surfsense documentation"
>
<FileText className="size-3" />
doc
</button>
</PopoverTrigger>
<PopoverContent
className="w-96 max-w-[calc(100vw-2rem)] p-0"
align="start"
sideOffset={6}
onMouseEnter={cancelClose}
onMouseLeave={scheduleClose}
onOpenAutoFocus={(e) => e.preventDefault()}
>
{chunkId}
</button>
</SourceDetailPanel>
<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
<div className="min-w-0">
<p className="truncate text-sm font-medium">
{data?.title ?? "Surfsense documentation"}
</p>
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
</div>
{data?.source && (
<a
href={data.source}
target="_blank"
rel="noopener noreferrer"
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
>
<ExternalLink className="size-3" />
Open
</a>
)}
</div>
<div className="max-h-72 overflow-auto px-3 py-2 text-sm">
{isLoading && (
<div className="flex items-center gap-2 py-4 text-muted-foreground">
<Spinner size="xs" />
<span className="text-xs">Loading</span>
</div>
)}
{error && (
<p className="py-4 text-xs text-destructive">
{error instanceof Error ? error.message : "Failed to load chunk"}
</p>
)}
{!isLoading && !error && citedChunk?.content && (
<MarkdownViewer content={citedChunk.content} maxLength={1500} />
)}
{!isLoading && !error && !citedChunk?.content && (
<p className="py-4 text-xs text-muted-foreground">No content available.</p>
)}
</div>
</PopoverContent>
</Popover>
);
};

View file

@ -1,5 +1,6 @@
"use client";
import { FindReplacePlugin } from "@platejs/find-replace";
import { useAtomValue, useSetAtom } from "jotai";
import {
Check,
@ -14,17 +15,21 @@ import {
import dynamic from "next/dynamic";
import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { VersionHistoryButton } from "@/components/documents/version-history";
import type { PlateEditorInstance } from "@/components/editor/plate-editor";
import { SourceCodeEditor } from "@/components/editor/source-code-editor";
import { MarkdownViewer } from "@/components/markdown-viewer";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
import { CITATION_HIGHLIGHT_CLASS } from "@/components/ui/search-highlight-node";
import { Spinner } from "@/components/ui/spinner";
import { useMediaQuery } from "@/hooks/use-media-query";
import { useElectronAPI } from "@/hooks/use-platform";
import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils";
import { buildCitationSearchCandidates } from "@/lib/citation-search";
import { inferMonacoLanguageFromPath } from "@/lib/editor-language";
const PlateEditor = dynamic(
@ -32,7 +37,10 @@ const PlateEditor = dynamic(
{ ssr: false, loading: () => <EditorPanelSkeleton /> }
);
type CitationHighlightStatus = "exact" | "miss";
const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB
const CITATION_MAX_LENGTH = 16 * 1024 * 1024; // 16MB on-demand cap for citation jumps
interface EditorContent {
document_id: number;
@ -136,6 +144,61 @@ export function EditorPanelContent({
const [displayTitle, setDisplayTitle] = useState(title || "Untitled");
const isLocalFileMode = kind === "local_file";
const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown";
// --- Citation-jump highlight wiring ----------------------------------
// `EditorPanelContent` is the consumer of `pendingChunkHighlightAtom`: when
// a citation badge is clicked, the badge stages `{documentId, chunkId,
// chunkText}` and opens this panel. We drive Plate's `FindReplacePlugin`
// (registered in every preset) to highlight the cited text natively via
// Slate decorations — no DOM walking, no Range gymnastics. The state
// machine below escalates the document fetch from 2MB → 16MB once if no
// candidate snippet matched in the preview, and surfaces miss outcomes
// via an inline alert.
const pending = useAtomValue(pendingChunkHighlightAtom);
const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
const [fetchKey, setFetchKey] = useState(0);
const [maxLengthOverride, setMaxLengthOverride] = useState<number | null>(null);
const [highlightResult, setHighlightResult] = useState<CitationHighlightStatus | null>(null);
const editorRef = useRef<PlateEditorInstance | null>(null);
const escalatedForRef = useRef<number | null>(null);
const lastAppliedChunkIdRef = useRef<number | null>(null);
// Tracks whether a citation highlight is currently decorated in the
// editor. We use a ref (not state) because the click-to-dismiss handler
// runs in a stable callback that would otherwise close over stale state.
const isHighlightActiveRef = useRef(false);
// Once a citation jump targets this doc we have to keep `PlateEditor`
// mounted for the *rest of the doc session* — even after the highlight
// effect clears `pendingChunkHighlightAtom` (which it does as soon as
// the decoration is applied, so a follow-up citation on the same chunk
// can re-trigger). Without this latch, non-editable docs would re-render
// back into `MarkdownViewer` the instant `pending` is released, tearing
// down the Plate decorations and dropping the highlight after a frame.
const [stickyPlateMode, setStickyPlateMode] = useState(false);
const clearCitationSearch = useCallback(() => {
isHighlightActiveRef.current = false;
const editor = editorRef.current;
if (!editor) return;
try {
editor.setOption(FindReplacePlugin, "search", "");
editor.api.redecorate();
} catch (err) {
console.warn("[EditorPanelContent] clearCitationSearch failed:", err);
}
}, []);
// Dismiss the highlight when the user interacts with the editor surface.
// `onPointerDown` fires before focus / selection changes so the click
// itself feels responsive — the highlight clears in the same event tick
// that places the cursor. No-op when nothing is highlighted, so we don't
// thrash `redecorate` on every click in normal editing.
const handleEditorPointerDown = useCallback(() => {
if (!isHighlightActiveRef.current) return;
clearCitationSearch();
setHighlightResult(null);
}, [clearCitationSearch]);
const isCitationTarget = !!pending && !isLocalFileMode && pending.documentId === documentId;
const resolveLocalVirtualPath = useCallback(
async (candidatePath: string): Promise<string> => {
if (!electronAPI?.getAgentFilesystemMounts) {
@ -155,6 +218,8 @@ export function EditorPanelContent({
const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD;
// `fetchKey` is an explicit re-fetch trigger (escalation bumps it to force
// a new request even when documentId/searchSpaceId haven't changed).
useEffect(() => {
const controller = new AbortController();
setIsLoading(true);
@ -166,6 +231,12 @@ export function EditorPanelContent({
setIsEditing(false);
initialLoadDone.current = false;
changeCountRef.current = 0;
// Clear any in-flight FindReplacePlugin search before the editor
// re-mounts on new content (a fresh editor key is generated below
// from documentId + isEditing, so the previous editor + its
// decorations are about to be discarded anyway, but we belt-and-
// brace here for the case where only `fetchKey` changed).
clearCitationSearch();
const doFetch = async () => {
try {
@ -210,7 +281,11 @@ export function EditorPanelContent({
const url = new URL(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`
);
url.searchParams.set("max_length", String(LARGE_DOCUMENT_THRESHOLD));
url.searchParams.set("max_length", String(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD));
// `fetchKey` participates here so biome's noUnusedVariables sees it
// as consumed; bumping it forces a fresh request even when the URL
// is otherwise identical.
if (fetchKey > 0) url.searchParams.set("_n", String(fetchKey));
const response = await authenticatedFetch(url.toString(), { method: "GET" });
@ -256,8 +331,259 @@ export function EditorPanelContent({
resolveLocalVirtualPath,
searchSpaceId,
title,
fetchKey,
maxLengthOverride,
clearCitationSearch,
]);
// Reset citation-jump bookkeeping whenever the panel switches to a different
// document (or local file). Body only writes setters — the deps are the
// real triggers we want to react to.
// biome-ignore lint/correctness/useExhaustiveDependencies: documentId/localFilePath are intentional triggers.
useEffect(() => {
clearCitationSearch();
escalatedForRef.current = null;
lastAppliedChunkIdRef.current = null;
setHighlightResult(null);
setMaxLengthOverride(null);
setFetchKey(0);
// Drop sticky Plate mode when the panel moves to a different doc
// — the next doc starts in its preferred render mode (Plate for
// editable, MarkdownViewer for everything else) until/unless a
// citation jump targets it.
setStickyPlateMode(false);
}, [documentId, localFilePath, clearCitationSearch]);
// Latch sticky Plate mode the first time a citation jump targets this
// doc. We keep it sticky for the remainder of this doc session so the
// highlight effect's `setPendingHighlight(null)` doesn't unmount the
// editor mid-flight (see comment on `stickyPlateMode` declaration).
useEffect(() => {
if (isCitationTarget) setStickyPlateMode(true);
}, [isCitationTarget]);
// `isEditorReady` is what `useEffect` actually depends on — `editorRef`
// is a ref so changes don't trigger re-runs. We flip this to `true` once
// `PlateEditor` calls back with its live editor instance (its
// `usePlateEditor` value-init runs synchronously, so by the time this
// flips true the markdown is already deserialized into the Slate tree).
const [isEditorReady, setIsEditorReady] = useState(false);
const handleEditorReady = useCallback((editor: PlateEditorInstance | null) => {
console.log("[citation:editor] handleEditorReady", { ready: !!editor });
editorRef.current = editor;
setIsEditorReady(!!editor);
}, []);
// --- Citation jump highlight effect -----------------------------------
// Drives Plate's FindReplacePlugin to highlight the cited chunk:
// 1. Build candidate snippets from the chunk text (first sentence,
// first 8 words, full chunk if short). Plate's decorate runs per-
// block and won't cross block boundaries, so the shorter
// candidates exist to give us something that fits in one
// paragraph / heading.
// 2. For each candidate: setOption('search', ...) → redecorate →
// wait two animation frames for React to flush → query the editor
// DOM for `.${CITATION_HIGHLIGHT_CLASS}`. First hit wins.
//
// Why a className and not a `data-*` attribute? Plate's
// `PlateLeaf` runs its props through `useNodeAttributes`, which
// only forwards `attributes`, `className`, `ref`, and `style` —
// arbitrary `data-*` attributes are silently dropped. `className`
// is the only escape hatch guaranteed to survive into the DOM.
// 3. On hit: smooth-scroll the first match into view, mark the
// highlight active (so a click inside the editor can dismiss it),
// release the pending atom.
// 4. On terminal miss: if the doc was truncated and we haven't
// escalated yet, bump the fetch's `max_length` to the citation
// cap and re-fetch — the post-refetch render will re-run this
// effect against the larger preview. Otherwise, release the
// atom and show the miss alert.
useEffect(() => {
console.log("[citation:effect] fired", {
isCitationTarget,
pendingDocId: pending?.documentId,
pendingChunkId: pending?.chunkId,
pendingChunkTextLen: pending?.chunkText?.length,
documentId,
isLocalFileMode,
isEditing,
hasMarkdown: !!editorDoc?.source_markdown,
markdownLen: editorDoc?.source_markdown?.length,
truncated: editorDoc?.truncated,
isEditorReady,
editorRefSet: !!editorRef.current,
maxLengthOverride,
});
if (!isCitationTarget || !pending) {
console.log("[citation:effect] guard ✗ no citation target / no pending");
return;
}
if (isLocalFileMode || isEditing) {
console.log("[citation:effect] guard ✗ localFileMode/editing");
return;
}
if (!editorDoc?.source_markdown) {
console.log("[citation:effect] guard ✗ source_markdown not ready");
return;
}
if (!isEditorReady) {
console.log("[citation:effect] guard ✗ editor not ready yet");
return;
}
const editor = editorRef.current;
if (!editor) {
console.log("[citation:effect] guard ✗ editorRef.current is null");
return;
}
if (lastAppliedChunkIdRef.current !== pending.chunkId) {
lastAppliedChunkIdRef.current = pending.chunkId;
}
let cancelled = false;
const finishMiss = () => {
console.log("[citation:effect] terminal miss — no candidate matched");
try {
editor.setOption(FindReplacePlugin, "search", "");
editor.api.redecorate();
} catch (err) {
console.warn("[EditorPanelContent] reset search after miss failed:", err);
}
const canEscalate =
editorDoc.truncated === true &&
(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD) < CITATION_MAX_LENGTH &&
escalatedForRef.current !== pending.chunkId;
console.log("[citation:effect] miss decision", {
truncated: editorDoc.truncated,
currentMaxLength: maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD,
canEscalate,
});
if (canEscalate) {
escalatedForRef.current = pending.chunkId;
setMaxLengthOverride(CITATION_MAX_LENGTH);
setFetchKey((k) => k + 1);
// Keep the atom set so the post-refetch render re-runs.
return;
}
setHighlightResult("miss");
setPendingHighlight(null);
};
const tryCandidates = async () => {
const candidates = buildCitationSearchCandidates(pending.chunkText);
console.log("[citation:effect] candidates built", {
count: candidates.length,
previews: candidates.map((c) => c.slice(0, 60)),
});
if (candidates.length === 0) {
if (!cancelled) finishMiss();
return;
}
// Resolve the editor's rendered DOM root via Slate's stable
// `[data-slate-editor="true"]` attribute (set by slate-react's
// `<Editable>`). Scoping queries to this root prevents
// `<mark>` elements rendered elsewhere on the page (e.g. chat
// search-highlight leaves in another mounted PlateEditor) from
// being mistaken for citation hits.
const editorRoot = document.querySelector<HTMLElement>('[data-slate-editor="true"]');
console.log("[citation:effect] editor root", {
hasRoot: !!editorRoot,
});
const root: ParentNode = editorRoot ?? document;
for (let i = 0; i < candidates.length; i++) {
const candidate = candidates[i];
if (cancelled) return;
try {
editor.setOption(FindReplacePlugin, "search", candidate);
editor.api.redecorate();
console.log(`[citation:effect] try #${i} setOption + redecorate`, {
len: candidate.length,
preview: candidate.slice(0, 80),
});
} catch (err) {
console.warn("[EditorPanelContent] setOption/redecorate failed:", err);
continue;
}
// Two rAFs: first lets Slate flush its onChange, second lets
// React commit the decoration leaves into the DOM.
await new Promise<void>((resolve) =>
requestAnimationFrame(() => requestAnimationFrame(() => resolve()))
);
if (cancelled) return;
// Primary probe: by our stable class on the rendered <mark>.
let el = root.querySelector<HTMLElement>(`.${CITATION_HIGHLIGHT_CLASS}`);
const classMarkCount = root.querySelectorAll(`.${CITATION_HIGHLIGHT_CLASS}`).length;
// Diagnostic fallback: any <mark> inside the editor root.
// If we ever see allMarks > 0 but classMarkCount === 0,
// the className was stripped again and we need to revisit
// `useNodeAttributes` filtering.
const allMarkCount = root.querySelectorAll("mark").length;
if (!el && allMarkCount > 0) {
el = root.querySelector<HTMLElement>("mark");
}
console.log(`[citation:effect] try #${i} DOM probe`, {
foundEl: !!el,
classMarkCount,
allMarkCount,
usedFallback: !!el && classMarkCount === 0,
});
if (el) {
try {
el.scrollIntoView({ block: "center", behavior: "smooth" });
} catch {
el.scrollIntoView();
}
isHighlightActiveRef.current = true;
setHighlightResult("exact");
console.log(`[citation:effect] ✓ exact via candidate #${i} — atom released`);
// No auto-clear timer — the highlight is intentionally
// permanent until the user clicks inside the editor (see
// `handleEditorPointerDown`) or another dismissal trigger
// fires (doc switch, edit-mode toggle, panel unmount,
// next citation jump). Sticky Plate mode keeps the
// editor mounted after the atom clears.
setPendingHighlight(null);
return;
}
}
if (!cancelled) finishMiss();
};
void tryCandidates();
return () => {
cancelled = true;
};
}, [
isCitationTarget,
pending,
documentId,
editorDoc?.source_markdown,
editorDoc?.truncated,
isLocalFileMode,
isEditing,
isEditorReady,
maxLengthOverride,
clearCitationSearch,
setPendingHighlight,
]);
// Cleanup any active highlight on unmount.
useEffect(() => {
return () => clearCitationSearch();
}, [clearCitationSearch]);
// Toggling into edit mode swaps Plate out of readOnly. Clear the citation
// search so stale leaves don't linger in the editing surface.
useEffect(() => {
if (isEditing) {
clearCitationSearch();
setHighlightResult(null);
}
}, [isEditing, clearCitationSearch]);
useEffect(() => {
return () => {
if (copyResetTimeoutRef.current) {
@ -367,6 +693,15 @@ export function EditorPanelContent({
EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) &&
!isLargeDocument
: false;
// Use PlateEditor for any of:
// - Editable doc types (FILE/NOTE) — existing editing UX.
// - Active citation jump in flight (`isCitationTarget`) — covers the
// mount in the very first render where the atom is set but the
// sticky effect hasn't fired yet.
// - Sticky Plate mode latched on a previous citation jump — keeps
// the editor mounted (with its decorations) after the highlight
// effect clears the atom. Resets when the doc changes.
const renderInPlateEditor = isEditableType || isCitationTarget || stickyPlateMode;
const hasUnsavedChanges = editedMarkdown !== null;
const showDesktopHeader = !!onClose;
const showEditingActions = isEditableType && isEditing;
@ -381,6 +716,90 @@ export function EditorPanelContent({
setIsEditing(false);
}, [editorDoc?.source_markdown]);
const handleDownloadMarkdown = useCallback(async () => {
if (!searchSpaceId || !documentId) return;
setDownloading(true);
try {
const response = await authenticatedFetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
{ method: "GET" }
);
if (!response.ok) throw new Error("Download failed");
const blob = await response.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
const disposition = response.headers.get("content-disposition");
const match = disposition?.match(/filename="(.+)"/);
a.download = match?.[1] ?? `${editorDoc?.title || "document"}.md`;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
toast.success("Download started");
} catch {
toast.error("Failed to download document");
} finally {
setDownloading(false);
}
}, [documentId, editorDoc?.title, searchSpaceId]);
// We no longer surface an "approximate" status — Plate's FindReplacePlugin
// either decorates an exact match or it doesn't, and the candidate snippet
// strategy (first sentence → first 8 words → full chunk) means we either
// land on the citation start or fall through to the miss alert.
const showMissAlert = isCitationTarget && highlightResult === "miss";
const citationAlerts = showMissAlert && (
<Alert variant="destructive" className="mb-4">
<FileQuestionMark className="size-4" />
<AlertDescription className="flex items-center justify-between gap-4">
<span>Cited section couldn&apos;t be located in this view.</span>
{editorDoc?.truncated && (
<Button
variant="outline"
size="sm"
className="relative shrink-0"
disabled={downloading}
onClick={handleDownloadMarkdown}
>
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
<Download className="size-3.5" />
Download .md
</span>
{downloading && <Spinner size="sm" className="absolute" />}
</Button>
)}
</AlertDescription>
</Alert>
);
const largeDocAlert = isLargeDocument && !isLocalFileMode && editorDoc && (
<Alert className="mb-4">
<FileText className="size-4" />
<AlertDescription className="flex items-center justify-between gap-4">
<span>
This document is too large for the editor (
{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
</span>
<Button
variant="outline"
size="sm"
className="relative shrink-0"
disabled={downloading}
onClick={handleDownloadMarkdown}
>
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
<Download className="size-3.5" />
Download .md
</span>
{downloading && <Spinner size="sm" className="absolute" />}
</Button>
</AlertDescription>
</Alert>
);
return (
<>
{showDesktopHeader ? (
@ -565,61 +984,6 @@ export function EditorPanelContent({
</p>
</div>
</div>
) : isLargeDocument && !isLocalFileMode ? (
<div className="h-full overflow-y-auto px-5 py-4">
<Alert className="mb-4">
<FileText className="size-4" />
<AlertDescription className="flex items-center justify-between gap-4">
<span>
This document is too large for the editor (
{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
</span>
<Button
variant="outline"
size="sm"
className="relative shrink-0"
disabled={downloading}
onClick={async () => {
setDownloading(true);
try {
if (!searchSpaceId || !documentId) {
throw new Error("Missing document context");
}
const response = await authenticatedFetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
{ method: "GET" }
);
if (!response.ok) throw new Error("Download failed");
const blob = await response.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
const disposition = response.headers.get("content-disposition");
const match = disposition?.match(/filename="(.+)"/);
a.download = match?.[1] ?? `${editorDoc.title || "document"}.md`;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
toast.success("Download started");
} catch {
toast.error("Failed to download document");
} finally {
setDownloading(false);
}
}}
>
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
<Download className="size-3.5" />
Download .md
</span>
{downloading && <Spinner size="sm" className="absolute" />}
</Button>
</AlertDescription>
</Alert>
<MarkdownViewer content={editorDoc.source_markdown} />
</div>
) : editorRenderMode === "source_code" ? (
<div className="h-full overflow-hidden">
<SourceCodeEditor
@ -638,20 +1002,46 @@ export function EditorPanelContent({
}}
/>
</div>
) : isEditableType ? (
<PlateEditor
key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
preset="full"
markdown={editorDoc.source_markdown}
onMarkdownChange={handleMarkdownChange}
readOnly={!isEditing}
placeholder="Start writing..."
editorVariant="default"
allowModeToggle={false}
reserveToolbarSpace
defaultEditing={isEditing}
className="[&_[role=toolbar]]:!bg-sidebar"
/>
) : isLargeDocument && !isLocalFileMode && !isCitationTarget ? (
// Large doc, no active citation — fast Streamdown preview
// + download CTA. We only fall back to MarkdownViewer here
// because Plate is heavy on multi-MB docs and the user
// isn't waiting on a specific citation to render.
<div className="h-full overflow-y-auto px-5 py-4">
{largeDocAlert}
<MarkdownViewer content={editorDoc.source_markdown} />
</div>
) : renderInPlateEditor ? (
// Editable doc (FILE/NOTE) OR active citation jump (any
// doc type). The citation path uses Plate's
// FindReplacePlugin for native, decoration-based
// highlighting — see the citation-jump highlight effect
// above for how `editorRef` and `handleEditorReady` are
// wired.
<div className="flex h-full min-h-0 flex-col">
{(citationAlerts || (isLargeDocument && isCitationTarget && !isLocalFileMode)) && (
<div className="shrink-0 px-5 pt-4">
{isLargeDocument && isCitationTarget && largeDocAlert}
{citationAlerts}
</div>
)}
<div className="flex-1 min-h-0 overflow-hidden" onPointerDown={handleEditorPointerDown}>
<PlateEditor
key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
preset="full"
markdown={editorDoc.source_markdown}
onMarkdownChange={handleMarkdownChange}
readOnly={!isEditing}
placeholder="Start writing..."
editorVariant="default"
allowModeToggle={false}
reserveToolbarSpace
defaultEditing={isEditing}
className="[&_[role=toolbar]]:!bg-sidebar"
onEditorReady={handleEditorReady}
/>
</div>
</div>
) : (
<div className="h-full overflow-y-auto px-5 py-4">
<MarkdownViewer content={editorDoc.source_markdown} />

View file

@ -12,6 +12,12 @@ import { type EditorPreset, presetMap } from "@/components/editor/presets";
import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
import { Editor, EditorContainer } from "@/components/ui/editor";
/** Live editor instance returned by `usePlateEditor`. Exposed via the
* `onEditorReady` prop so callers (e.g. `EditorPanelContent`) can drive
* plugin options imperatively most notably setting
* `FindReplacePlugin`'s `search` option for citation-jump highlights. */
export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
export interface PlateEditorProps {
/** Markdown string to load as initial content */
markdown?: string;
@ -62,6 +68,15 @@ export interface PlateEditorProps {
* without modifying the core editor component.
*/
extraPlugins?: AnyPluginConfig[];
/**
* Called whenever the live editor instance (re)mounts, with `null` on
* unmount. Used by callers that need to drive plugin options imperatively
* e.g. `EditorPanelContent` setting `FindReplacePlugin`'s `search`
* option for citation-jump highlights. The callback is invoked exactly
* once per editor lifetime (the parent's `key` prop forces a fresh
* editor when needed, e.g. on edit-mode toggle).
*/
onEditorReady?: (editor: PlateEditorInstance | null) => void;
}
function PlateEditorContent({
@ -100,6 +115,7 @@ export function PlateEditor({
defaultEditing = false,
preset = "full",
extraPlugins = [],
onEditorReady,
}: PlateEditorProps) {
const lastMarkdownRef = useRef(markdown);
const lastHtmlRef = useRef(html);
@ -156,6 +172,21 @@ export function PlateEditor({
: undefined,
});
// Expose the live editor instance to imperative callers (e.g. citation
// jump highlights). We deliberately don't depend on `onEditorReady`
// itself in the cleanup closure — callers commonly pass an arrow that
// closes over a stable ref setter, but if they pass a freshly-bound
// callback per render, the `onEditorReady?.(editor)` re-fires which is
// idempotent for ref-style setters.
const onEditorReadyRef = useRef(onEditorReady);
useEffect(() => {
onEditorReadyRef.current = onEditorReady;
}, [onEditorReady]);
useEffect(() => {
onEditorReadyRef.current?.(editor);
return () => onEditorReadyRef.current?.(null);
}, [editor]);
// Update editor content when html prop changes externally
useEffect(() => {
if (html !== undefined && html !== lastHtmlRef.current) {

View file

@ -1,5 +1,6 @@
"use client";
import { FindReplacePlugin } from "@platejs/find-replace";
import type { AnyPluginConfig } from "platejs";
import { TrailingBlockPlugin } from "platejs";
@ -17,6 +18,30 @@ import { SelectionKit } from "@/components/editor/plugins/selection-kit";
import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit";
import { TableKit } from "@/components/editor/plugins/table-kit";
import { ToggleKit } from "@/components/editor/plugins/toggle-kit";
import { SearchHighlightLeaf } from "@/components/ui/search-highlight-node";
/**
* Citation-jump highlighter. Re-uses Plate's built-in `FindReplacePlugin`
* (decorate-only, no editing surface) to drive the "scroll-to-cited-text"
* UX in `EditorPanelContent`. We register it in every preset because:
* - Decorate is a no-op when `search` is empty (single getOptions() check
* per block), so cost is effectively zero for non-citation viewers.
* - Keeping it preset-agnostic means citations work whether the doc is
* opened in editable (`full`) or pure-viewer (`readonly`) modes.
*
* The parent component drives `setOption(FindReplacePlugin, 'search', ...)`
* + `editor.api.redecorate()` to trigger highlights, then queries the
* editor DOM for `.citation-highlight-leaf` to scroll the first match
* into view. (We can't use a `data-*` attribute here — Plate's
* `PlateLeaf` runs props through `useNodeAttributes`, which only forwards
* `attributes`, `className`, `ref`, `style`; arbitrary `data-*` props are
* silently dropped.) See `components/ui/search-highlight-node.tsx` for
* the leaf component and `CITATION_HIGHLIGHT_CLASS` constant.
*/
const CitationFindReplacePlugin = FindReplacePlugin.configure({
options: { search: "" },
render: { node: SearchHighlightLeaf },
});
/**
* Full preset every plugin kit enabled.
@ -38,6 +63,7 @@ export const fullPreset: AnyPluginConfig[] = [
...AutoformatKit,
...DndKit,
TrailingBlockPlugin,
CitationFindReplacePlugin,
];
/**
@ -52,6 +78,7 @@ export const minimalPreset: AnyPluginConfig[] = [
...LinkKit,
...AutoformatKit,
TrailingBlockPlugin,
CitationFindReplacePlugin,
];
/**
@ -68,6 +95,7 @@ export const readonlyPreset: AnyPluginConfig[] = [
...CalloutKit,
...ToggleKit,
...MathKit,
CitationFindReplacePlugin,
];
/** All available preset names */

View file

@ -1,719 +0,0 @@
"use client";
import { useQuery } from "@tanstack/react-query";
import {
BookOpen,
ChevronDown,
ChevronUp,
ExternalLink,
FileQuestionMark,
FileText,
Hash,
Loader2,
Sparkles,
X,
} from "lucide-react";
import { AnimatePresence, motion, useReducedMotion } from "motion/react";
import { useTranslations } from "next-intl";
import type React from "react";
import { forwardRef, memo, type ReactNode, useCallback, useEffect, useRef, useState } from "react";
import { createPortal } from "react-dom";
import { MarkdownViewer } from "@/components/markdown-viewer";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { ScrollArea } from "@/components/ui/scroll-area";
import { Spinner } from "@/components/ui/spinner";
import type {
GetDocumentByChunkResponse,
GetSurfsenseDocsByChunkResponse,
} from "@/contracts/types/document.types";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { cacheKeys } from "@/lib/query-client/cache-keys";
import { cn } from "@/lib/utils";
type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse;
interface SourceDetailPanelProps {
open: boolean;
onOpenChange: (open: boolean) => void;
chunkId: number;
sourceType: string;
title: string;
description?: string;
url?: string;
children?: ReactNode;
isDocsChunk?: boolean;
}
const formatDocumentType = (type: string) => {
if (!type) return "";
return type
.split("_")
.map((word) => word.charAt(0) + word.slice(1).toLowerCase())
.join(" ");
};
// Chunk card component
// For large documents (>30 chunks), we disable animation to prevent layout shifts
// which break auto-scroll functionality
interface ChunkCardProps {
chunk: { id: number; content: string };
localIndex: number;
chunkNumber: number;
totalChunks: number;
isCited: boolean;
isActive: boolean;
disableLayoutAnimation?: boolean;
}
const ChunkCard = memo(
forwardRef<HTMLDivElement, ChunkCardProps>(
({ chunk, localIndex, chunkNumber, totalChunks, isCited }, ref) => {
return (
<div
ref={ref}
data-chunk-index={localIndex}
className={cn(
"group relative rounded-2xl border-2 transition-all duration-300",
isCited
? "bg-linear-to-br from-primary/5 via-primary/10 to-primary/5 border-primary shadow-lg shadow-primary/10"
: "bg-card border-border/50 hover:border-border hover:shadow-md"
)}
>
{isCited && <div className="absolute inset-0 rounded-2xl bg-primary/5 blur-xl -z-10" />}
<div className="flex items-center justify-between px-5 py-4 border-b border-border/50">
<div className="flex items-center gap-3">
<div
className={cn(
"flex items-center justify-center w-8 h-8 rounded-full text-sm font-semibold transition-colors",
isCited
? "bg-primary text-primary-foreground"
: "bg-muted text-muted-foreground group-hover:bg-muted/80"
)}
>
{chunkNumber}
</div>
<span className="text-sm text-muted-foreground">
Chunk {chunkNumber} of {totalChunks}
</span>
</div>
{isCited && (
<Badge variant="default" className="gap-1.5 px-3 py-1">
<Sparkles className="h-3 w-3" />
Cited Source
</Badge>
)}
</div>
<div className="p-5 overflow-hidden">
<MarkdownViewer content={chunk.content} maxLength={100_000} />
</div>
</div>
);
}
)
);
ChunkCard.displayName = "ChunkCard";
export function SourceDetailPanel({
open,
onOpenChange,
chunkId,
sourceType,
title,
description,
url,
children,
isDocsChunk = false,
}: SourceDetailPanelProps) {
const t = useTranslations("dashboard");
const scrollAreaRef = useRef<HTMLDivElement>(null);
const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
const scrollTimersRef = useRef<ReturnType<typeof setTimeout>[]>([]);
const [activeChunkIndex, setActiveChunkIndex] = useState<number | null>(null);
const [mounted, setMounted] = useState(false);
const shouldReduceMotion = useReducedMotion();
useEffect(() => {
setMounted(true);
}, []);
const {
data: documentData,
isLoading: isDocumentByChunkFetching,
error: documentByChunkFetchingError,
} = useQuery<DocumentData>({
queryKey: isDocsChunk
? cacheKeys.documents.byChunk(`doc-${chunkId}`)
: cacheKeys.documents.byChunk(chunkId.toString()),
queryFn: async () => {
if (isDocsChunk) {
return documentsApiService.getSurfsenseDocByChunk(chunkId);
}
return documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 5 });
},
enabled: !!chunkId && open,
staleTime: 5 * 60 * 1000,
});
const totalChunks =
documentData && "total_chunks" in documentData
? (documentData.total_chunks ?? documentData.chunks.length)
: (documentData?.chunks?.length ?? 0);
const [beforeChunks, setBeforeChunks] = useState<
Array<{ id: number; content: string; created_at: string }>
>([]);
const [afterChunks, setAfterChunks] = useState<
Array<{ id: number; content: string; created_at: string }>
>([]);
const [loadingBefore, setLoadingBefore] = useState(false);
const [loadingAfter, setLoadingAfter] = useState(false);
useEffect(() => {
setBeforeChunks([]);
setAfterChunks([]);
}, [chunkId, open]);
const chunkStartIndex =
documentData && "chunk_start_index" in documentData ? (documentData.chunk_start_index ?? 0) : 0;
const initialChunks = documentData?.chunks ?? [];
const allChunks = [...beforeChunks, ...initialChunks, ...afterChunks];
const absoluteStart = chunkStartIndex - beforeChunks.length;
const absoluteEnd = chunkStartIndex + initialChunks.length + afterChunks.length;
const canLoadBefore = absoluteStart > 0;
const canLoadAfter = absoluteEnd < totalChunks;
const EXPAND_SIZE = 10;
const loadBefore = useCallback(async () => {
if (!documentData || !("search_space_id" in documentData) || !canLoadBefore) return;
setLoadingBefore(true);
try {
const count = Math.min(EXPAND_SIZE, absoluteStart);
const result = await documentsApiService.getDocumentChunks({
document_id: documentData.id,
page: 0,
page_size: count,
start_offset: absoluteStart - count,
});
const existingIds = new Set(allChunks.map((c) => c.id));
const newChunks = result.items
.filter((c) => !existingIds.has(c.id))
.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
setBeforeChunks((prev) => [...newChunks, ...prev]);
} catch (err) {
console.error("Failed to load earlier chunks:", err);
} finally {
setLoadingBefore(false);
}
}, [documentData, absoluteStart, canLoadBefore, allChunks]);
const loadAfter = useCallback(async () => {
if (!documentData || !("search_space_id" in documentData) || !canLoadAfter) return;
setLoadingAfter(true);
try {
const result = await documentsApiService.getDocumentChunks({
document_id: documentData.id,
page: 0,
page_size: EXPAND_SIZE,
start_offset: absoluteEnd,
});
const existingIds = new Set(allChunks.map((c) => c.id));
const newChunks = result.items
.filter((c) => !existingIds.has(c.id))
.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
setAfterChunks((prev) => [...prev, ...newChunks]);
} catch (err) {
console.error("Failed to load later chunks:", err);
} finally {
setLoadingAfter(false);
}
}, [documentData, absoluteEnd, canLoadAfter, allChunks]);
const isDirectRenderSource =
sourceType === "TAVILY_API" ||
sourceType === "LINKUP_API" ||
sourceType === "SEARXNG_API" ||
sourceType === "BAIDU_SEARCH_API";
const citedChunkIndex = allChunks.findIndex((chunk) => chunk.id === chunkId);
// Simple scroll function that scrolls to a chunk by index
const scrollToChunkByIndex = useCallback(
(chunkIndex: number, smooth = true) => {
const scrollContainer = scrollAreaRef.current;
if (!scrollContainer) return;
const viewport = scrollContainer.querySelector(
"[data-radix-scroll-area-viewport]"
) as HTMLElement | null;
if (!viewport) return;
const chunkElement = scrollContainer.querySelector(
`[data-chunk-index="${chunkIndex}"]`
) as HTMLElement | null;
if (!chunkElement) return;
// Get positions using getBoundingClientRect for accuracy
const viewportRect = viewport.getBoundingClientRect();
const chunkRect = chunkElement.getBoundingClientRect();
// Calculate where to scroll to center the chunk
const currentScrollTop = viewport.scrollTop;
const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
const scrollTarget =
chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
viewport.scrollTo({
top: Math.max(0, scrollTarget),
behavior: smooth && !shouldReduceMotion ? "smooth" : "auto",
});
setActiveChunkIndex(chunkIndex);
},
[shouldReduceMotion]
);
// Callback ref for the cited chunk - scrolls when the element mounts
const citedChunkRefCallback = useCallback(
(node: HTMLDivElement | null) => {
if (node && !hasScrolledRef.current && open) {
hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls
// Store the node reference for the delayed scroll
const scrollToCitedChunk = () => {
const scrollContainer = scrollAreaRef.current;
if (!scrollContainer || !node.isConnected) return false;
const viewport = scrollContainer.querySelector(
"[data-radix-scroll-area-viewport]"
) as HTMLElement | null;
if (!viewport) return false;
// Get positions
const viewportRect = viewport.getBoundingClientRect();
const chunkRect = node.getBoundingClientRect();
// Calculate scroll position to center the chunk
const currentScrollTop = viewport.scrollTop;
const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
const scrollTarget =
chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
viewport.scrollTo({
top: Math.max(0, scrollTarget),
behavior: "auto", // Instant scroll for initial positioning
});
return true;
};
// Scroll multiple times with delays to handle progressive content rendering
// Each subsequent scroll will correct for any layout shifts
const scrollAttempts = [50, 150, 300, 600, 1000];
scrollAttempts.forEach((delay) => {
scrollTimersRef.current.push(
setTimeout(() => {
scrollToCitedChunk();
}, delay)
);
});
// After final attempt, mark the cited chunk as active
scrollTimersRef.current.push(
setTimeout(
() => {
setActiveChunkIndex(citedChunkIndex);
},
scrollAttempts[scrollAttempts.length - 1] + 50
)
);
}
},
[open, citedChunkIndex]
);
// Reset scroll state when panel closes
useEffect(() => {
if (!open) {
scrollTimersRef.current.forEach(clearTimeout);
scrollTimersRef.current = [];
hasScrolledRef.current = false;
setActiveChunkIndex(null);
}
return () => {
scrollTimersRef.current.forEach(clearTimeout);
scrollTimersRef.current = [];
};
}, [open]);
// Handle escape key
useEffect(() => {
const handleEscape = (e: KeyboardEvent) => {
if (e.key === "Escape" && open) {
onOpenChange(false);
}
};
window.addEventListener("keydown", handleEscape);
return () => window.removeEventListener("keydown", handleEscape);
}, [open, onOpenChange]);
// Prevent body scroll when open
useEffect(() => {
if (open) {
document.body.style.overflow = "hidden";
} else {
document.body.style.overflow = "";
}
return () => {
document.body.style.overflow = "";
};
}, [open]);
const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => {
e.preventDefault();
e.stopPropagation();
window.open(clickUrl, "_blank", "noopener,noreferrer");
};
const scrollToChunk = useCallback(
(index: number) => {
scrollToChunkByIndex(index, true);
},
[scrollToChunkByIndex]
);
const panelContent = (
<AnimatePresence mode="wait">
{open && (
<>
{/* Backdrop */}
<motion.div
key="backdrop"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
transition={{ duration: 0.2 }}
className="fixed inset-0 z-50 bg-black/60 backdrop-blur-sm"
onClick={() => onOpenChange(false)}
/>
{/* Panel */}
<motion.div
key="panel"
initial={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
animate={{ opacity: 1, scale: 1, y: 0 }}
exit={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
transition={{
type: "spring",
damping: 30,
stiffness: 300,
}}
className="fixed inset-3 sm:inset-6 md:inset-10 lg:inset-16 z-50 flex flex-col bg-background rounded-3xl shadow-2xl border overflow-hidden"
>
{/* Header */}
<motion.div
initial={{ opacity: 0, y: -10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.1 }}
className="flex items-center justify-between px-6 py-5 border-b bg-linear-to-r from-muted/50 to-muted/30"
>
<div className="min-w-0 flex-1">
<h2 className="text-xl font-semibold truncate">
{documentData?.title || title || "Source Document"}
</h2>
<p className="text-sm text-muted-foreground mt-0.5">
{documentData && "document_type" in documentData
? formatDocumentType(documentData.document_type)
: sourceType && formatDocumentType(sourceType)}
{totalChunks > 0 && (
<span className="ml-2">
{totalChunks} chunk{totalChunks !== 1 ? "s" : ""}
{allChunks.length < totalChunks && ` (showing ${allChunks.length})`}
</span>
)}
</p>
</div>
<div className="flex items-center gap-3 shrink-0">
{url && (
<Button
size="sm"
variant="outline"
onClick={(e) => handleUrlClick(e, url)}
className="hidden sm:flex gap-2 rounded-xl"
>
<ExternalLink className="h-4 w-4" />
Open Source
</Button>
)}
<Button
size="icon"
variant="ghost"
onClick={() => onOpenChange(false)}
className="h-8 w-8 rounded-full"
>
<X className="h-4 w-4" />
<span className="sr-only">Close</span>
</Button>
</div>
</motion.div>
{/* Loading State */}
{!isDirectRenderSource && isDocumentByChunkFetching && (
<div className="flex-1 flex items-center justify-center">
<motion.div
initial={{ opacity: 0, scale: 0.9 }}
animate={{ opacity: 1, scale: 1 }}
className="flex flex-col items-center gap-4"
>
<Spinner size="lg" />
<p className="text-sm text-muted-foreground font-medium">
{t("loading_document")}
</p>
</motion.div>
</div>
)}
{/* Error State */}
{!isDirectRenderSource && documentByChunkFetchingError && (
<div className="flex-1 flex items-center justify-center">
<motion.div
initial={{ opacity: 0, scale: 0.9 }}
animate={{ opacity: 1, scale: 1 }}
className="flex flex-col items-center gap-4 text-center px-6"
>
<div className="w-20 h-20 rounded-full bg-muted/50 flex items-center justify-center">
<FileQuestionMark className="h-10 w-10 text-muted-foreground" />
</div>
<div>
<p className="font-semibold text-foreground text-lg">Document unavailable</p>
<p className="text-sm text-muted-foreground mt-2 max-w-md">
{documentByChunkFetchingError.message ||
"An unexpected error occurred. Please try again."}
</p>
</div>
<Button variant="outline" onClick={() => onOpenChange(false)} className="mt-2">
Close Panel
</Button>
</motion.div>
</div>
)}
{/* Direct render for web search providers */}
{isDirectRenderSource && (
<ScrollArea className="flex-1">
<div className="p-6 max-w-3xl mx-auto">
{url && (
<Button
size="default"
variant="outline"
onClick={(e) => handleUrlClick(e, url)}
className="w-full mb-6 sm:hidden rounded-xl"
>
<ExternalLink className="mr-2 h-4 w-4" />
Open in Browser
</Button>
)}
<motion.div
initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }}
className="p-6 bg-muted/50 rounded-2xl border"
>
<h3 className="text-base font-semibold mb-4 flex items-center gap-2">
<BookOpen className="h-4 w-4" />
Source Information
</h3>
<div className="text-sm text-muted-foreground mb-3 font-medium">
{title || "Untitled"}
</div>
<div className="text-sm text-foreground leading-relaxed">
{description || "No content available"}
</div>
</motion.div>
</div>
</ScrollArea>
)}
{/* API-fetched document content */}
{!isDirectRenderSource && documentData && (
<div className="flex-1 flex overflow-hidden">
{/* Chunk Navigation Sidebar */}
{allChunks.length > 1 && (
<motion.div
initial={{ opacity: 0, x: -20 }}
animate={{ opacity: 1, x: 0 }}
transition={{ delay: 0.2 }}
className="hidden lg:flex flex-col w-16 border-r bg-muted/10 overflow-hidden"
>
<ScrollArea className="flex-1 h-full">
<div className="p-2 pt-3 flex flex-col gap-1.5">
{allChunks.map((chunk, idx) => {
const absNum = absoluteStart + idx + 1;
const isCited = chunk.id === chunkId;
const isActive = activeChunkIndex === idx;
return (
<motion.button
key={chunk.id}
type="button"
onClick={() => scrollToChunk(idx)}
initial={{ opacity: 0, scale: 0.8 }}
animate={{ opacity: 1, scale: 1 }}
transition={{ delay: Math.min(idx * 0.02, 0.2) }}
className={cn(
"relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center",
isCited
? "bg-primary text-primary-foreground shadow-md"
: isActive
? "bg-muted text-foreground"
: "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground"
)}
title={isCited ? `Chunk ${absNum} (Cited)` : `Chunk ${absNum}`}
>
{absNum}
{isCited && (
<span className="absolute -top-1.5 -right-1.5 flex items-center justify-center w-4 h-4 bg-primary rounded-full border-2 border-background shadow-sm">
<Sparkles className="h-2.5 w-2.5 text-primary-foreground" />
</span>
)}
</motion.button>
);
})}
</div>
</ScrollArea>
</motion.div>
)}
{/* Main Content */}
<ScrollArea className="flex-1" ref={scrollAreaRef}>
<div className="p-6 lg:p-8 max-w-4xl mx-auto space-y-6">
{/* Document Metadata */}
{"document_metadata" in documentData &&
documentData.document_metadata &&
Object.keys(documentData.document_metadata).length > 0 && (
<motion.div
initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.1 }}
className="p-5 bg-muted/30 rounded-2xl border"
>
<h3 className="text-sm font-semibold mb-4 text-muted-foreground uppercase tracking-wider flex items-center gap-2">
<FileText className="h-4 w-4" />
Document Information
</h3>
<dl className="grid grid-cols-1 sm:grid-cols-2 gap-4 text-sm">
{Object.entries(documentData.document_metadata).map(([key, value]) => (
<div key={key} className="space-y-1">
<dt className="font-medium text-muted-foreground capitalize text-xs">
{key.replace(/_/g, " ")}
</dt>
<dd className="text-foreground wrap-break-word">{String(value)}</dd>
</div>
))}
</dl>
</motion.div>
)}
{/* Chunks Header */}
<div className="flex items-center justify-between pt-2">
<h3 className="text-sm font-semibold text-muted-foreground uppercase tracking-wider flex items-center gap-2">
<Hash className="h-4 w-4" />
Chunks {absoluteStart + 1}{absoluteEnd} of {totalChunks}
</h3>
{citedChunkIndex !== -1 && (
<Button
variant="ghost"
size="sm"
onClick={() => scrollToChunk(citedChunkIndex)}
className="gap-2 text-primary hover:text-primary"
>
<Sparkles className="h-3.5 w-3.5" />
Jump to cited
</Button>
)}
</div>
{/* Load Earlier */}
{canLoadBefore && (
<div className="flex items-center justify-center">
<Button
variant="outline"
size="sm"
onClick={loadBefore}
disabled={loadingBefore}
className="gap-2"
>
{loadingBefore ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<ChevronUp className="h-3.5 w-3.5" />
)}
{loadingBefore
? "Loading..."
: `Load ${Math.min(EXPAND_SIZE, absoluteStart)} earlier chunks`}
</Button>
</div>
)}
{/* Chunks */}
<div className="space-y-4">
{allChunks.map((chunk, idx) => {
const isCited = chunk.id === chunkId;
const chunkNumber = absoluteStart + idx + 1;
return (
<ChunkCard
key={chunk.id}
ref={isCited ? citedChunkRefCallback : undefined}
chunk={chunk}
localIndex={idx}
chunkNumber={chunkNumber}
totalChunks={totalChunks}
isCited={isCited}
isActive={activeChunkIndex === idx}
disableLayoutAnimation={allChunks.length > 30}
/>
);
})}
</div>
{/* Load Later */}
{canLoadAfter && (
<div className="flex items-center justify-center py-3">
<Button
variant="outline"
size="sm"
onClick={loadAfter}
disabled={loadingAfter}
className="gap-2"
>
{loadingAfter ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<ChevronDown className="h-3.5 w-3.5" />
)}
{loadingAfter
? "Loading..."
: `Load ${Math.min(EXPAND_SIZE, totalChunks - absoluteEnd)} later chunks`}
</Button>
</div>
)}
</div>
</ScrollArea>
</div>
)}
</motion.div>
</>
)}
</AnimatePresence>
);
if (!mounted) return <>{children}</>;
return (
<>
{children}
{createPortal(panelContent, globalThis.document.body)}
</>
);
}

View file

@ -67,9 +67,6 @@ const DesktopShortcutsContent = dynamic(
import(
"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
).then((m) => ({ default: m.DesktopShortcutsContent })),
import(
"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
).then((m) => ({ default: m.DesktopShortcutsContent })),
{ ssr: false }
);
const MemoryContent = dynamic(

View file

@ -0,0 +1,45 @@
"use client";
import type { PlateLeafProps } from "platejs/react";
import { PlateLeaf } from "platejs/react";
/**
* Stable class name used to identify Plate-rendered citation highlight
* leaves in the DOM. We can't use a `data-*` attribute here — Plate's
* `PlateLeaf` runs its props through `useNodeAttributes`, which only
* forwards `attributes`, `className`, `ref`, and `style` to the rendered
* element; arbitrary `data-*` props are silently dropped (verified
* against `@platejs/core/dist/react/index.js` v52). So `className` is
* the only escape hatch that's guaranteed to survive into the DOM.
*/
export const CITATION_HIGHLIGHT_CLASS = "citation-highlight-leaf";
/**
* Leaf rendered for ranges decorated by `@platejs/find-replace`'s
* `FindReplacePlugin`. We re-purpose that plugin to drive the citation-jump
* highlight: when a citation is staged, the parent sets the plugin's `search`
* option to a snippet of the chunk text and Plate decorates every match with
* `searchHighlight: true`. This component renders those decorations as a
* `<mark>` tagged with `CITATION_HIGHLIGHT_CLASS` so the parent can:
* 1. Query the first match in DOM order to scroll it into view.
* 2. Detect the active-highlight state without a separate React ref.
*
* The highlight is **persistent** it does not auto-fade. The parent in
* `EditorPanelContent` clears it by setting the plugin's `search` option
* back to "" when one of: (a) the user clicks anywhere inside the editor,
* (b) the panel switches to a different document, (c) the user toggles
* into edit mode, (d) another citation jump is staged, (e) the panel
* unmounts. We use a brief entrance pulse (`citation-flash-in`, see
* `globals.css`) purely to draw the eye after `scrollIntoView` lands.
*/
export function SearchHighlightLeaf(props: PlateLeafProps) {
return (
<PlateLeaf
{...props}
as="mark"
className={`${CITATION_HIGHLIGHT_CLASS} bg-primary/15 ring-1 ring-primary/40 rounded-sm px-0.5 text-inherit animate-[citation-flash-in_400ms_ease-out]`}
>
{props.children}
</PlateLeaf>
);
}

View file

@ -0,0 +1,125 @@
/**
* Snippet generation for the citation-jump highlight, driven by Plate's
* `FindReplacePlugin`. The plugin runs `decorate` per-block and only matches
* within blocks whose children are all `Text` nodes (so it crosses inline
* marks like bold/italic but **not** block boundaries, and a block that
* contains even one inline element such as a link is silently skipped).
* That means a full chunk that spans heading + paragraph won't match as a
* single string we have to pick a shorter snippet that fits inside one
* rendered block.
*
* `buildCitationSearchCandidates` returns search strings ordered from
* "most-specific anchor" to "broadest fallback":
* 1. First sentence of the chunk (capped at `FIRST_SENTENCE_MAX`).
* 2. First `FIRST_PHRASE_WORDS` words.
* 3. Each non-trivial line of the chunk, in source order gives us a
* separate attempt for each rendered block, so a heading line with
* an inline link doesn't doom the whole jump.
* 4. Full chunk (only if it's already short enough to plausibly fit
* inside one block).
*
* The caller tries each candidate in turn set the plugin's `search`
* option, `editor.api.redecorate()`, then check the editor DOM for a
* `.citation-highlight-leaf` element. First candidate that produces one
* wins; subsequent candidates are skipped.
*/
const FIRST_SENTENCE_MAX = 120;
const FIRST_PHRASE_WORDS = 8;
const MIN_SNIPPET_LENGTH = 6;
const FULL_CHUNK_MAX = FIRST_SENTENCE_MAX * 2;
const MAX_LINE_CANDIDATES = 6;
const LINE_CANDIDATE_MAX = FIRST_SENTENCE_MAX;
function normalizeWhitespace(input: string): string {
return input.replace(/\s+/g, " ").trim();
}
/**
* Strip the markdown syntax that won't survive into the rendered editor's
* plain text, so the chunk text (which comes back from the indexer as raw
* source markdown) can be matched against the literal text values stored
* in Plate's Slate tree.
*
* Order matters: handle multi-char and "container" syntax before single-
* char emphasis, otherwise `**text**` collapses to `*text*` first.
*
* Heuristic only we don't aim to be a full markdown parser, just to
* remove the common markers (`**bold**`, `[text](url)`, `# headings`,
* `- list`, etc.) that show up in connector-doc chunks and would break
* literal substring search.
*/
export function stripMarkdownForMatch(input: string): string {
let s = input;
s = s.replace(/```[a-z0-9_+-]*\n?([\s\S]*?)```/gi, (_, body: string) => body);
s = s.replace(/<!--[\s\S]*?-->/g, " ");
s = s.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
s = s.replace(/!\[([^\]]*)\]\[[^\]]*\]/g, "$1");
s = s.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1");
s = s.replace(/\[([^\]]+)\]\[[^\]]*\]/g, "$1");
s = s.replace(/<((?:https?|mailto):[^>\s]+)>/g, "$1");
s = s.replace(/`+([^`\n]+?)`+/g, "$1");
s = s.replace(/(\*\*|__)([\s\S]+?)\1/g, "$2");
s = s.replace(/(?<!\w)([*_])([^*_\n]+?)\1(?!\w)/g, "$2");
s = s.replace(/~~([^~]+)~~/g, "$1");
s = s.replace(/^[ \t]{0,3}#{1,6}[ \t]+/gm, "");
s = s.replace(/^[ \t]{0,3}(?:=+|-+)[ \t]*$/gm, "");
s = s.replace(/^[ \t]{0,3}>+[ \t]?/gm, "");
s = s.replace(/^[ \t]*[-*+][ \t]+/gm, "");
s = s.replace(/^[ \t]*\d+\.[ \t]+/gm, "");
s = s.replace(/^[ \t]{0,3}(?:[-*_])(?:[ \t]*[-*_]){2,}[ \t]*$/gm, "");
s = s.replace(/^[ \t]*\|?(?:[ \t]*:?-+:?[ \t]*\|)+[ \t]*:?-+:?[ \t]*\|?[ \t]*$/gm, "");
s = s.replace(/\\([\\`*_{}[\]()#+\-.!~>])/g, "$1");
return s;
}
export function buildCitationSearchCandidates(rawText: string): string[] {
if (!rawText) return [];
const stripped = stripMarkdownForMatch(rawText);
const normalized = normalizeWhitespace(stripped);
if (normalized.length < MIN_SNIPPET_LENGTH) return [];
const out: string[] = [];
const seen = new Set<string>();
const push = (s: string) => {
const t = normalizeWhitespace(s);
if (t.length >= MIN_SNIPPET_LENGTH && !seen.has(t)) {
out.push(t);
seen.add(t);
}
};
const sentenceMatch = normalized.match(/^[^.!?]+[.!?]/);
if (sentenceMatch) {
const sentence = sentenceMatch[0];
push(sentence.length > FIRST_SENTENCE_MAX ? sentence.slice(0, FIRST_SENTENCE_MAX) : sentence);
} else if (normalized.length > FIRST_SENTENCE_MAX) {
push(normalized.slice(0, FIRST_SENTENCE_MAX));
}
const words = normalized.split(" ").filter(Boolean);
if (words.length > FIRST_PHRASE_WORDS) {
push(words.slice(0, FIRST_PHRASE_WORDS).join(" "));
}
// Per-line candidates: each chunk line is roughly one block in the
// rendered editor. Trying them in order gives us a separate decorate
// attempt for each block, which matters when the first line is a
// heading containing a link (Plate's `FindReplacePlugin` will skip
// any block whose children aren't all text nodes).
const rawLines = stripped.split(/\r?\n/);
let lineCount = 0;
for (const line of rawLines) {
if (lineCount >= MAX_LINE_CANDIDATES) break;
const trimmed = normalizeWhitespace(line);
if (trimmed.length < MIN_SNIPPET_LENGTH) continue;
push(trimmed.length > LINE_CANDIDATE_MAX ? trimmed.slice(0, LINE_CANDIDATE_MAX) : trimmed);
lineCount++;
}
if (normalized.length <= FULL_CHUNK_MAX) {
push(normalized);
}
return out;
}

View file

@ -36,6 +36,7 @@
"@platejs/code-block": "^52.0.11",
"@platejs/combobox": "^52.0.15",
"@platejs/dnd": "^52.0.11",
"@platejs/find-replace": "^52.3.10",
"@platejs/floating": "^52.0.11",
"@platejs/indent": "^52.0.11",
"@platejs/link": "^52.0.11",

View file

@ -53,6 +53,9 @@ importers:
'@platejs/dnd':
specifier: ^52.0.11
version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
'@platejs/find-replace':
specifier: ^52.3.10
version: 52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
'@platejs/floating':
specifier: ^52.0.11
version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
@ -2827,6 +2830,13 @@ packages:
react-dnd-html5-backend: '>=14.0.0'
react-dom: '>=18.0.0'
'@platejs/find-replace@52.3.10':
resolution: {integrity: sha512-V/MOMMUYxHfEn/skd2+YO213xSATFDVsl8FzVzVRV/XaxwwVefH2EPD1lAVIvmYjennTVTTsHHtEI9K9iOsEaA==}
peerDependencies:
platejs: '>=52.0.11'
react: '>=18.0.0'
react-dom: '>=18.0.0'
'@platejs/floating@52.0.11':
resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==}
peerDependencies:
@ -11105,6 +11115,13 @@ snapshots:
react-dnd-html5-backend: 16.0.1
react-dom: 19.2.4(react@19.2.4)
'@platejs/find-replace@52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
dependencies:
platejs: 52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4))
react: 19.2.4
react-compiler-runtime: 1.0.0(react@19.2.4)
react-dom: 19.2.4(react@19.2.4)
'@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
dependencies:
'@floating-ui/core': 1.7.4