feat: various UI fixes, prompt optimizations, and allowing duplicate docs

- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths.
- Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`.
- Added detailed comments for clarity on the changes and their implications.
- Introduced new citation handling in the editor for improved user experience with citation jumps.
- Updated package dependencies in the frontend for better functionality.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-04-28 21:30:53 -07:00
parent e6433f78c4
commit b9a66cb417
26 changed files with 1540 additions and 852 deletions

View file

@ -0,0 +1,107 @@
"""133_drop_documents_content_hash_unique
Revision ID: 133
Revises: 132
Create Date: 2026-04-29
Drop the global UNIQUE constraint on ``documents.content_hash`` so the
new-chat agent's ``write_file`` flow can persist legitimate file copies
(two paths, identical content) without hitting a constraint that mirrors
no real filesystem semantic.
Path uniqueness still lives on ``documents.unique_identifier_hash`` (per
search space), which is the right invariant exactly like an inode at a
given path on a POSIX filesystem.
The non-unique INDEX on ``content_hash`` is preserved so connector
indexers' "have we seen this content before?" lookup
(:func:`app.tasks.document_processors.base.check_duplicate_document`,
which already uses ``.scalars().first()`` and is therefore tolerant of
duplicates) stays cheap.
"""
from __future__ import annotations
from collections.abc import Sequence
from sqlalchemy import inspect
from alembic import op
revision: str = "133"
down_revision: str | None = "132"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def _existing_constraint_names(bind, table: str) -> set[str]:
inspector = inspect(bind)
return {c["name"] for c in inspector.get_unique_constraints(table)}
def _existing_index_names(bind, table: str) -> set[str]:
inspector = inspect(bind)
return {i["name"] for i in inspector.get_indexes(table)}
def upgrade() -> None:
bind = op.get_bind()
# Both the named UniqueConstraint (added in revision 8) and the
# implicit-unique-index variant SQLAlchemy may emit need draining.
constraints = _existing_constraint_names(bind, "documents")
if "uq_documents_content_hash" in constraints:
op.drop_constraint(
"uq_documents_content_hash", "documents", type_="unique"
)
indexes = _existing_index_names(bind, "documents")
# Some Postgres versions surface the unique constraint via a unique
# index of the same name; check for that too.
for idx_name in ("uq_documents_content_hash",):
if idx_name in indexes:
op.drop_index(idx_name, table_name="documents")
# Ensure the non-unique index is present for fast lookups.
if "ix_documents_content_hash" not in indexes:
op.create_index(
"ix_documents_content_hash",
"documents",
["content_hash"],
unique=False,
)
def downgrade() -> None:
bind = op.get_bind()
# Re-applying UNIQUE is destructive: there may now be legitimate
# duplicates (e.g. two NOTE documents that share content because the
# user explicitly copied one to a new path). To avoid the migration
# silently deleting user data, we keep only the lowest-id row per
# content_hash — same strategy revision 8 used when first introducing
# the constraint.
op.execute(
"""
DELETE FROM documents
WHERE id NOT IN (
SELECT MIN(id)
FROM documents
GROUP BY content_hash
)
"""
)
indexes = _existing_index_names(bind, "documents")
if "ix_documents_content_hash" in indexes:
op.drop_index("ix_documents_content_hash", table_name="documents")
op.create_index(
"ix_documents_content_hash",
"documents",
["content_hash"],
unique=False,
)
op.create_unique_constraint(
"uq_documents_content_hash", "documents", ["content_hash"]
)

View file

@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain_core.callbacks import dispatch_custom_event from langchain_core.callbacks import dispatch_custom_event
from langgraph.runtime import Runtime from langgraph.runtime import Runtime
from sqlalchemy import delete, select from sqlalchemy import delete, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.new_chat.filesystem_selection import FilesystemMode
@ -150,10 +151,11 @@ async def _create_document(
virtual_path, virtual_path,
search_space_id, search_space_id,
) )
# Guard against the unique_identifier_hash constraint: another row at the # Filesystem-parity invariant: the only thing that *must* be unique is
# same virtual_path (this search space) already owns the hash. Callers are # the path. Two notes can legitimately share content (e.g. ``cp a b``).
# expected to upsert via the wrapper, but this defends against bypasses # Guard against the path-derived ``unique_identifier_hash`` constraint
# and gives a clean ValueError instead of a session-poisoning IntegrityError. # so we surface a clean ValueError instead of letting the INSERT poison
# the session with an IntegrityError.
path_collision = await session.execute( path_collision = await session.execute(
select(Document.id).where( select(Document.id).where(
Document.search_space_id == search_space_id, Document.search_space_id == search_space_id,
@ -165,17 +167,14 @@ async def _create_document(
f"a document already exists at path '{virtual_path}' " f"a document already exists at path '{virtual_path}' "
"(unique_identifier_hash collision)" "(unique_identifier_hash collision)"
) )
# ``content_hash`` is intentionally NOT checked for uniqueness here.
# In a real filesystem two files at different paths can hold identical
# bytes, and the agent's ``write_file`` path needs that semantic to
# support copy/duplicate operations. The hash remains useful as a
# change-detection hint for connector indexers, which still consult it
# via :func:`check_duplicate_document` but do so with a non-unique
# lookup (``.first()``).
content_hash = generate_content_hash(content, search_space_id) content_hash = generate_content_hash(content, search_space_id)
content_collision = await session.execute(
select(Document.id).where(
Document.search_space_id == search_space_id,
Document.content_hash == content_hash,
)
)
if content_collision.scalar_one_or_none() is not None:
raise ValueError(
f"a document with identical content already exists for path '{virtual_path}'"
)
doc = Document( doc = Document(
title=title, title=title,
document_type=DocumentType.NOTE, document_type=DocumentType.NOTE,
@ -493,7 +492,14 @@ async def commit_staged_filesystem_state(
} }
) )
else: else:
# Wrap each create in a SAVEPOINT so a residual
# ``IntegrityError`` (e.g. a deployment that hasn't run
# migration 133 yet, where ``documents.content_hash``
# still carries its legacy global UNIQUE constraint)
# rolls back only this one create instead of poisoning
# the whole turn's transaction.
try: try:
async with session.begin_nested():
new_doc = await _create_document( new_doc = await _create_document(
session, session,
virtual_path=path, virtual_path=path,
@ -506,6 +512,23 @@ async def commit_staged_filesystem_state(
"kb_persistence: skipping %s create: %s", path, exc "kb_persistence: skipping %s create: %s", path, exc
) )
continue continue
except IntegrityError as exc:
# The path-uniqueness check above already protected
# against ``unique_identifier_hash`` collisions, so
# the most likely culprit is the legacy
# ``ix_documents_content_hash`` UNIQUE constraint
# that migration 133 drops. Log loudly so operators
# know to run the migration; do NOT silently swallow.
msg = str(exc.orig) if exc.orig is not None else str(exc)
logger.error(
"kb_persistence: IntegrityError creating %s: %s. "
"If this mentions content_hash, run alembic "
"upgrade to apply migration 133 which drops the "
"global UNIQUE constraint on documents.content_hash.",
path,
msg,
)
continue
doc_id_by_path[path] = new_doc.id doc_id_by_path[path] = new_doc.id
committed_creates.append( committed_creates.append(
{ {

View file

@ -38,12 +38,38 @@ from app.db import ChatVisibility
# Provider variant detection # Provider variant detection
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
ProviderVariant = str # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default" # String literal alias for the supported provider-specific prompt variants.
# When adding a new variant, also drop a matching ``providers/<variant>.md``
# file in this package and (if appropriate) extend the regex matchers below.
#
# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
# to SurfSense's "supplemental hints" architecture (each fragment is a
# focused style nudge, NOT a full system prompt — the main prompt is
# already assembled from base/ + tools/ + routing/).
ProviderVariant = str
# Known values:
# "anthropic" — Claude family (XML-friendly, narrative todos)
# "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
# "openai_classic" — GPT-4 family (autonomous persistence)
# "openai_codex" — gpt-*-codex (code-purist, terse, file:line refs)
# "google" — Gemini (formal, <3-line, numbered workflow)
# "kimi" — Moonshot Kimi-K* (action-bias, parallel tools)
# "grok" — xAI Grok (extreme-terse, one-word ok)
# "deepseek" — DeepSeek V3 / R1 (terse, R1-aware reasoning)
# "default" — fallback, no provider-specific block emitted
# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
# More specific patterns must come first (e.g. ``codex`` before
# ``openai_reasoning`` because codex model ids contain ``gpt``).
_OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE)
_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE) _OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE) _OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE) _ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE) _GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
def detect_provider_variant(model_name: str | None) -> ProviderVariant: def detect_provider_variant(model_name: str | None) -> ProviderVariant:
@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
Heuristic match on the model id; returns ``"default"`` when nothing Heuristic match on the model id; returns ``"default"`` when nothing
matches so the composer can fall back to the empty placeholder file. matches so the composer can fall back to the empty placeholder file.
Order is significant: more-specific patterns are tried first so
``gpt-5-codex`` routes to ``"openai_codex"`` rather than
``"openai_reasoning"`` (mirrors OpenCode's
``packages/opencode/src/session/system.ts`` dispatch).
""" """
if not model_name: if not model_name:
return "default" return "default"
name = model_name.strip() name = model_name.strip()
if _OPENAI_CODEX_RE.search(name):
return "openai_codex"
if _OPENAI_REASONING_RE.search(name): if _OPENAI_REASONING_RE.search(name):
return "openai_reasoning" return "openai_reasoning"
if _OPENAI_CLASSIC_RE.search(name): if _OPENAI_CLASSIC_RE.search(name):
@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
return "anthropic" return "anthropic"
if _GOOGLE_RE.search(name): if _GOOGLE_RE.search(name):
return "google" return "google"
if _KIMI_RE.search(name):
return "kimi"
if _GROK_RE.search(name):
return "grok"
if _DEEPSEEK_RE.search(name):
return "deepseek"
return "default" return "default"

View file

@ -1,5 +1,20 @@
<provider_hints> <provider_hints>
You are running on an Anthropic Claude model. Use XML tags liberally to structure You are running on an Anthropic Claude model.
intermediate reasoning when the task is complex. Prefer step-by-step plans inside
`<thinking>` blocks before producing the final answer. Structured reasoning:
- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
- For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
Professional objectivity:
- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
- Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
Task management:
- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
- Narrate progress through the todo list itself, not through chatty status lines.
Tool calls:
- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
</provider_hints> </provider_hints>

View file

@ -0,0 +1,18 @@
<provider_hints>
You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
Reasoning hygiene (R1-aware):
- If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
- Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
Output style:
- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
- For factual answers, cite once with `[citation:chunk_id]` and stop.
Tool calls:
- Issue independent tool calls in parallel within a single turn.
- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
</provider_hints>

View file

@ -1,4 +1,20 @@
<provider_hints> <provider_hints>
You are running on a Google Gemini model. Prefer concise, structured responses. You are running on a Google Gemini model.
When using tools, follow the function-calling protocol and avoid verbose preludes.
Output style:
- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
- Format with GitHub-flavoured Markdown; assume monospace rendering.
- For one-line factual answers, just answer. No headers, no bullets.
Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
Discipline:
- Do not take significant actions beyond the clear scope of the user's request without confirming first.
- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
- Path arguments must be the exact strings returned by tools; do not synthesise file paths.
</provider_hints> </provider_hints>

View file

@ -0,0 +1,17 @@
<provider_hints>
You are running on an xAI Grok model.
Maximum terseness:
- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
- Avoid restating the user's question.
- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
Tool discipline:
- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
Style:
- No emojis unless the user asked. No nested bullets, no headers for short answers.
- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
</provider_hints>

View file

@ -0,0 +1,21 @@
<provider_hints>
You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
Action bias:
- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
Tool calls:
- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
Language:
- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
Discipline:
- Stay on track. Never give the user more than what they asked for.
- Fact-check before stating anything as factual; don't fabricate citations.
- Keep it stupidly simple. Don't overcomplicate.
</provider_hints>

View file

@ -1,5 +1,21 @@
<provider_hints> <provider_hints>
You are running on a classic OpenAI chat model (GPT-4 family). Use direct You are running on a classic OpenAI chat model (GPT-4 family).
function-calling for tools. When editing files, use the standard `edit_file`
or `write_file` tools rather than diff-based patches. Persistence:
- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
Planning:
- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
Output style:
- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
- Don't dump tool output verbatim — summarise the relevant lines.
- Don't add a closing recap unless the user asked for one. After completing the work, just stop.
Tool calls:
- Issue independent tool calls in parallel within one response.
- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
</provider_hints> </provider_hints>

View file

@ -0,0 +1,19 @@
<provider_hints>
You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
Output style:
- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
- Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
- Skip headers and heavy formatting for simple confirmations.
- No emojis, no em-dashes, no nested bullets. Single-level lists only.
Code & structured-output tasks:
- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
- For multi-line snippets use fenced code blocks with a language tag.
Tool calls:
- Run independent tool calls in parallel; chain only when later calls need earlier results.
- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
</provider_hints>

View file

@ -1,5 +1,21 @@
<provider_hints> <provider_hints>
You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and You are running on an OpenAI reasoning model (GPT-5+ / o-series).
direct in your responses. When editing files, prefer the `apply_patch` tool format
where available. Avoid restating the user request before answering. Output style:
- Be terse and direct. Don't restate the user's request before answering.
- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
Channels (for clients that support them):
- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
Tool calls:
- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
Autonomy:
- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
</provider_hints> </provider_hints>

View file

@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin):
document_metadata = Column(JSON, nullable=True) document_metadata = Column(JSON, nullable=True)
content = Column(Text, nullable=False) content = Column(Text, nullable=False)
content_hash = Column(String, nullable=False, index=True, unique=True) # ``content_hash`` is intentionally NOT globally unique. In a real
# filesystem two files at different paths can hold identical bytes,
# and the agent's ``write_file`` flow needs that semantic to support
# copy / duplicate operations. Path uniqueness lives on
# ``unique_identifier_hash`` (per search space). The hash remains
# indexed because connector indexers consult it as a change-detection
# / cross-source dedup hint via :func:`check_duplicate_document`.
# See migration 133.
content_hash = Column(String, nullable=False, index=True)
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True) unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension)) embedding = Column(Vector(config.embedding_model_instance.dimension))

View file

@ -25,17 +25,33 @@ class TestProviderVariantDetection:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model_name,expected", "model_name,expected",
[ [
# GPT-4 family routes to "classic" (autonomous-persistence style)
("openai:gpt-4o-mini", "openai_classic"), ("openai:gpt-4o-mini", "openai_classic"),
("openai:gpt-4-turbo", "openai_classic"), ("openai:gpt-4-turbo", "openai_classic"),
# GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
("openai:gpt-5", "openai_reasoning"), ("openai:gpt-5", "openai_reasoning"),
("openai:gpt-5-codex", "openai_reasoning"),
("openai:o1-preview", "openai_reasoning"), ("openai:o1-preview", "openai_reasoning"),
("openai:o3-mini", "openai_reasoning"), ("openai:o3-mini", "openai_reasoning"),
# Codex family beats reasoning (more specific). Mirrors OpenCode
# ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
("openai:gpt-5-codex", "openai_codex"),
("openai:gpt-codex", "openai_codex"),
("openai:codex-mini", "openai_codex"),
# Anthropic + Google
("anthropic:claude-3-5-sonnet", "anthropic"), ("anthropic:claude-3-5-sonnet", "anthropic"),
("anthropic/claude-opus-4", "anthropic"), ("anthropic/claude-opus-4", "anthropic"),
("google:gemini-2.0-flash", "google"), ("google:gemini-2.0-flash", "google"),
("vertex:gemini-1.5-pro", "google"), ("vertex:gemini-1.5-pro", "google"),
# Newly-covered families
("moonshot:kimi-k2", "kimi"),
("openrouter:moonshot/kimi-k2.5", "kimi"),
("xai:grok-2", "grok"),
("openrouter:x-ai/grok-3", "grok"),
("openai:deepseek-v3", "deepseek"),
("deepseek:deepseek-r1", "deepseek"),
# Unknown families fall back to default (no provider block emitted)
("groq:mixtral-8x7b", "default"), ("groq:mixtral-8x7b", "default"),
("together:llama-3.1-70b", "default"),
(None, "default"), (None, "default"),
("", "default"), ("", "default"),
], ],
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
def test_detection(self, model_name: str | None, expected: str) -> None: def test_detection(self, model_name: str | None, expected: str) -> None:
assert detect_provider_variant(model_name) == expected assert detect_provider_variant(model_name) == expected
def test_codex_takes_precedence_over_reasoning(self) -> None:
"""Regression guard: ``gpt-5-codex`` must NOT match the generic
``gpt-5`` reasoning regex first. Codex is the more specialised
prompt and mirrors OpenCode's dispatch order.
"""
from app.agents.new_chat.prompts.composer import detect_provider_variant
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
class TestCompose: class TestCompose:
def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None: def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
@ -149,6 +175,52 @@ class TestCompose:
prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo") prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
assert "<provider_hints>" not in prompt assert "<provider_hints>" not in prompt
@pytest.mark.parametrize(
"model_name,expected_marker",
[
# Each marker is a unique-ish phrase from the corresponding fragment.
# If a fragment is renamed/rewritten such that the marker is gone,
# update both the fragment and this test deliberately.
("openai:gpt-5-codex", "Codex-class"),
("openai:gpt-5", "OpenAI reasoning model"),
("openai:gpt-4o", "classic OpenAI chat model"),
("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
("google:gemini-2.0-flash", "Google Gemini"),
("moonshot:kimi-k2", "Moonshot Kimi"),
("xai:grok-2", "xAI Grok"),
("deepseek:deepseek-r1", "DeepSeek"),
],
)
def test_each_known_variant_renders_with_its_marker(
self,
fixed_today: datetime,
model_name: str,
expected_marker: str,
) -> None:
"""Every supported variant must produce a ``<provider_hints>`` block
containing its identifying marker. This pins the dispatch + the
on-disk fragments together so a missing/renamed file is caught
immediately.
"""
prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
assert "<provider_hints>" in prompt, (
f"variant for {model_name!r} did not emit a provider_hints block; "
"the corresponding providers/<variant>.md may be missing"
)
assert expected_marker in prompt, (
f"variant for {model_name!r} emitted hints but lacked the "
f"expected marker {expected_marker!r} — the fragment may have "
"drifted from the dispatch table"
)
def test_provider_blocks_are_byte_stable_across_calls(
self, fixed_today: datetime
) -> None:
"""Cache-stability guard: same model id → byte-identical prompt."""
a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
assert a == b
def test_custom_system_instructions_override_default( def test_custom_system_instructions_override_default(
self, fixed_today: datetime self, fixed_today: datetime
) -> None: ) -> None:

View file

@ -0,0 +1,168 @@
"""Unit tests for kb_persistence filesystem-parity invariants.
Specifically, these tests pin down that the agent-driven write_file flow
treats path uniqueness not content uniqueness as the only hard
invariant. This mirrors a real filesystem: ``cp a b`` produces two files
with identical bytes living at different paths, and that should round-trip
through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
"""
from __future__ import annotations
from typing import Any
from unittest.mock import AsyncMock, MagicMock
import numpy as np
import pytest
from app.agents.new_chat.middleware import kb_persistence
from app.db import Document
class _FakeResult:
"""Minimal stand-in for ``sqlalchemy.engine.Result``."""
def __init__(self, value: Any = None) -> None:
self._value = value
def scalar_one_or_none(self) -> Any:
return self._value
def scalar(self) -> Any:
return self._value
class _FakeSession:
"""Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
Records every ``add`` so we can assert against the resulting Documents
and Chunks. ``execute`` always returns "no row" by default i.e. no
folder hierarchy preexists and no path collision exists. Tests that
want a path collision can override that on a per-call basis.
"""
def __init__(self) -> None:
self.added: list[Any] = []
self.execute = AsyncMock(return_value=_FakeResult(None))
self.flush = AsyncMock()
# Simulate ``await session.flush()`` assigning an id to the doc;
# we increment a counter so each Document gets a unique id.
self._next_id = 1
async def _flush_assigning_ids() -> None:
for obj in self.added:
if getattr(obj, "id", None) is None:
obj.id = self._next_id
self._next_id += 1
self.flush.side_effect = _flush_assigning_ids
def add(self, obj: Any) -> None:
self.added.append(obj)
def add_all(self, objs: list[Any]) -> None:
self.added.extend(objs)
@pytest.fixture(autouse=True)
def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
"""Avoid loading the embedding model in unit tests."""
monkeypatch.setattr(
kb_persistence,
"embed_texts",
lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
)
monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
@pytest.mark.asyncio
async def test_create_document_allows_identical_content_at_different_paths() -> None:
"""The core regression: ``cp /a/notes.md /b/notes-copy.md``.
Both create calls must succeed even though the bytes are byte-for-byte
identical, because path is the only filesystem-style unique key.
"""
session = _FakeSession()
content = "# Same body\n\nIdentical content used by two different paths.\n"
first = await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/a/notes.md",
content=content,
search_space_id=42,
created_by_id="user-1",
)
assert isinstance(first, Document)
assert first.title == "notes.md"
# Second create with byte-identical content at a different path should
# not raise — that's the whole point of the filesystem-parity fix.
second = await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/b/notes-copy.md",
content=content,
search_space_id=42,
created_by_id="user-1",
)
assert isinstance(second, Document)
assert second.title == "notes-copy.md"
# Both rows share the same content_hash but live at distinct paths
# (distinct ``unique_identifier_hash``). That's the desired contract.
assert first.content_hash == second.content_hash
assert first.unique_identifier_hash != second.unique_identifier_hash
@pytest.mark.asyncio
async def test_create_document_still_rejects_path_collision() -> None:
"""Path uniqueness remains the hard invariant.
If ``unique_identifier_hash`` already points at an existing row in
the same search space, the create call must raise ``ValueError``
with a clear message matching the behavior the commit loop relies
on to upsert via the existing-row code path.
"""
session = _FakeSession()
# Path with no folder parts so ``_ensure_folder_hierarchy`` is a
# no-op and the only SELECT executed is the path-collision check.
# That SELECT returns an existing doc id, triggering the guard.
session.execute = AsyncMock(return_value=_FakeResult(value=99))
with pytest.raises(ValueError, match="already exists at path"):
await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/notes.md",
content="anything",
search_space_id=42,
created_by_id="user-1",
)
@pytest.mark.asyncio
async def test_create_document_does_not_query_for_content_hash_collision(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Regression guard: the legacy second SELECT (content_hash collision
pre-check) must be gone. Counting ``execute`` calls is a brittle but
effective way to lock that in.
The current flow runs exactly one ``execute`` for the path-collision
SELECT (no folder parts in this path ``_ensure_folder_hierarchy``
short-circuits). If a future refactor reintroduces a content-hash
SELECT, this test will fail loud.
"""
session = _FakeSession()
await kb_persistence._create_document(
session, # type: ignore[arg-type]
virtual_path="/documents/notes.md",
content="hello",
search_space_id=42,
created_by_id="user-1",
)
# Path-collision SELECT only. No content_hash SELECT.
assert session.execute.await_count == 1, (
f"Unexpected execute count {session.execute.await_count}; "
"did the legacy content_hash collision pre-check get re-added?"
)

View file

@ -210,6 +210,27 @@ button {
} }
} }
/* Citation-jump highlight entrance pulse only. The `SearchHighlightLeaf`
(see components/ui/search-highlight-node.tsx) is otherwise statically
tinted; this animation runs once on mount to draw the eye to the cited
text after `scrollIntoView` lands. The highlight itself is permanent
until the user clicks inside the editor (or another dismissal trigger
fires in `EditorPanelContent`). */
@keyframes citation-flash-in {
0% {
background-color: transparent;
box-shadow: 0 0 0 0 transparent;
}
40% {
background-color: color-mix(in oklab, var(--primary) 30%, transparent);
box-shadow: 0 0 0 3px color-mix(in oklab, var(--primary) 25%, transparent);
}
100% {
background-color: color-mix(in oklab, var(--primary) 15%, transparent);
box-shadow: 0 0 0 1px color-mix(in oklab, var(--primary) 40%, transparent);
}
}
/* Human-in-the-loop approval card animations */ /* Human-in-the-loop approval card animations */
@keyframes pulse-subtle { @keyframes pulse-subtle {
0%, 0%,

View file

@ -0,0 +1,19 @@
import { atom } from "jotai";
/**
* Cross-component handoff for citation jumps. Set by `InlineCitation` when a
* numeric chunk badge is clicked (after the document has been resolved); read
* by `DocumentTabContent` once the matching document tab mounts so it can
* scroll to and softly highlight the cited chunk inside the rendered markdown.
*
* Cleared by `DocumentTabContent` only after a terminal state exact /
* approximate / miss has been reached, so that an escalation refetch (2MB
* preview 16MB) keeps the pending intent alive across the re-render.
*/
export interface PendingChunkHighlight {
documentId: number;
chunkId: number;
chunkText: string;
}
export const pendingChunkHighlightAtom = atom<PendingChunkHighlight | null>(null);

View file

@ -1,26 +1,45 @@
"use client"; "use client";
import { FileText } from "lucide-react"; import { useQuery, useQueryClient } from "@tanstack/react-query";
import { useSetAtom } from "jotai";
import { ExternalLink, FileText } from "lucide-react";
import type { FC } from "react"; import type { FC } from "react";
import { useState } from "react"; import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context"; import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel"; import { MarkdownViewer } from "@/components/markdown-viewer";
import { Citation } from "@/components/tool-ui/citation"; import { Citation } from "@/components/tool-ui/citation";
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
import { Spinner } from "@/components/ui/spinner";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { cacheKeys } from "@/lib/query-client/cache-keys";
interface InlineCitationProps { interface InlineCitationProps {
chunkId: number; chunkId: number;
isDocsChunk?: boolean; isDocsChunk?: boolean;
} }
const POPOVER_HOVER_CLOSE_DELAY_MS = 150;
/** /**
* Inline citation for knowledge-base chunks (numeric chunk IDs). * Inline citation badge for knowledge-base chunks (numeric chunk IDs) and
* Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel. * Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as
* Negative chunk IDs indicate anonymous/synthetic uploads and render as a static badge. * a static "doc" pill (anonymous/synthetic uploads).
*
* Numeric KB chunks: clicking resolves the parent document via
* `getDocumentByChunk`, opens the document in the right side panel (alongside
* the chat does not replace it), and stages the cited chunk text in
* `pendingChunkHighlightAtom` so `EditorPanelContent` can scroll to and softly
* highlight it inside the rendered markdown.
*
* Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that
* lazily fetches and previews the cited chunk inline, since those docs aren't
* indexed into the user's search space and have no tab to open.
*/ */
export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => { export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => {
const [isOpen, setIsOpen] = useState(false);
if (chunkId < 0) { if (chunkId < 0) {
return ( return (
<Tooltip> <Tooltip>
@ -38,26 +57,185 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk =
); );
} }
if (isDocsChunk) {
return <SurfsenseDocCitation chunkId={chunkId} />;
}
return <NumericChunkCitation chunkId={chunkId} />;
};
const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
const queryClient = useQueryClient();
const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
const openEditorPanel = useSetAtom(openEditorPanelAtom);
const [resolving, setResolving] = useState(false);
const handleClick = useCallback(async () => {
if (resolving) return;
setResolving(true);
console.log("[citation:click] start", { chunkId });
try {
const data = await queryClient.fetchQuery({
// Local key with explicit window. The shared `cacheKeys.documents.byChunk`
// is window-agnostic (latent footgun); namespace the call to avoid
// reusing a different-window cached result.
queryKey: ["documents", "by-chunk", chunkId, "w0"] as const,
queryFn: () =>
documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 0 }),
staleTime: 5 * 60 * 1000,
});
const cited = data.chunks.find((c) => c.id === chunkId) ?? data.chunks[0];
console.log("[citation:click] fetched doc-by-chunk", {
docId: data.id,
docTitle: data.title,
chunksReturned: data.chunks.length,
citedChunkId: cited?.id,
citedChunkContentLen: cited?.content?.length ?? 0,
citedChunkPreview:
cited?.content && cited.content.length > 120
? `${cited.content.slice(0, 120)}…(+${cited.content.length - 120})`
: (cited?.content ?? ""),
});
// Stage the highlight BEFORE opening the panel so `EditorPanelContent`
// already sees the pending intent on its very first render — avoids a
// "fetch → render → no-pending → next-tick render with pending" race.
setPendingHighlight({
documentId: data.id,
chunkId,
chunkText: cited?.content ?? "",
});
openEditorPanel({
documentId: data.id,
searchSpaceId: data.search_space_id,
title: data.title,
});
console.log("[citation:click] staged highlight + opened editor panel", {
documentId: data.id,
});
} catch (err) {
console.warn("[citation:click] failed", err);
toast.error(err instanceof Error ? err.message : "Couldn't open cited document");
} finally {
setResolving(false);
}
}, [chunkId, openEditorPanel, queryClient, resolving, setPendingHighlight]);
return ( return (
<SourceDetailPanel
open={isOpen}
onOpenChange={setIsOpen}
chunkId={chunkId}
sourceType={isDocsChunk ? "SURFSENSE_DOCS" : ""}
title={isDocsChunk ? "Surfsense Documentation" : "Source"}
description=""
url=""
isDocsChunk={isDocsChunk}
>
<button <button
type="button" type="button"
onClick={() => setIsOpen(true)} onClick={handleClick}
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none" disabled={resolving}
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none disabled:cursor-progress disabled:opacity-70"
title={`View source chunk #${chunkId}`} title={`View source chunk #${chunkId}`}
aria-label={`Jump to cited chunk ${chunkId}`}
> >
{chunkId} {resolving ? <Spinner size="xs" /> : chunkId}
</button> </button>
</SourceDetailPanel> );
};
const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
const [open, setOpen] = useState(false);
const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
const cancelClose = useCallback(() => {
if (closeTimerRef.current) {
clearTimeout(closeTimerRef.current);
closeTimerRef.current = null;
}
}, []);
const scheduleClose = useCallback(() => {
cancelClose();
closeTimerRef.current = setTimeout(() => {
setOpen(false);
closeTimerRef.current = null;
}, POPOVER_HOVER_CLOSE_DELAY_MS);
}, [cancelClose]);
useEffect(() => () => cancelClose(), [cancelClose]);
const { data, isLoading, error } = useQuery({
queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`),
queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId),
enabled: open,
staleTime: 5 * 60 * 1000,
});
const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0];
return (
<Popover open={open} onOpenChange={setOpen}>
<PopoverTrigger asChild>
<button
type="button"
onClick={() => setOpen((prev) => !prev)}
onMouseEnter={() => {
cancelClose();
setOpen(true);
}}
onMouseLeave={scheduleClose}
onFocus={() => {
cancelClose();
setOpen(true);
}}
onBlur={scheduleClose}
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center gap-0.5 rounded-md bg-primary/10 px-1.5 text-[11px] font-medium text-primary align-baseline shadow-sm transition-colors hover:bg-primary/15 focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
aria-label={`Show Surfsense documentation chunk ${chunkId}`}
title="Surfsense documentation"
>
<FileText className="size-3" />
doc
</button>
</PopoverTrigger>
<PopoverContent
className="w-96 max-w-[calc(100vw-2rem)] p-0"
align="start"
sideOffset={6}
onMouseEnter={cancelClose}
onMouseLeave={scheduleClose}
onOpenAutoFocus={(e) => e.preventDefault()}
>
<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
<div className="min-w-0">
<p className="truncate text-sm font-medium">
{data?.title ?? "Surfsense documentation"}
</p>
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
</div>
{data?.source && (
<a
href={data.source}
target="_blank"
rel="noopener noreferrer"
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
>
<ExternalLink className="size-3" />
Open
</a>
)}
</div>
<div className="max-h-72 overflow-auto px-3 py-2 text-sm">
{isLoading && (
<div className="flex items-center gap-2 py-4 text-muted-foreground">
<Spinner size="xs" />
<span className="text-xs">Loading</span>
</div>
)}
{error && (
<p className="py-4 text-xs text-destructive">
{error instanceof Error ? error.message : "Failed to load chunk"}
</p>
)}
{!isLoading && !error && citedChunk?.content && (
<MarkdownViewer content={citedChunk.content} maxLength={1500} />
)}
{!isLoading && !error && !citedChunk?.content && (
<p className="py-4 text-xs text-muted-foreground">No content available.</p>
)}
</div>
</PopoverContent>
</Popover>
); );
}; };

View file

@ -1,5 +1,6 @@
"use client"; "use client";
import { FindReplacePlugin } from "@platejs/find-replace";
import { useAtomValue, useSetAtom } from "jotai"; import { useAtomValue, useSetAtom } from "jotai";
import { import {
Check, Check,
@ -14,17 +15,21 @@ import {
import dynamic from "next/dynamic"; import dynamic from "next/dynamic";
import { useCallback, useEffect, useRef, useState } from "react"; import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom"; import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { VersionHistoryButton } from "@/components/documents/version-history"; import { VersionHistoryButton } from "@/components/documents/version-history";
import type { PlateEditorInstance } from "@/components/editor/plate-editor";
import { SourceCodeEditor } from "@/components/editor/source-code-editor"; import { SourceCodeEditor } from "@/components/editor/source-code-editor";
import { MarkdownViewer } from "@/components/markdown-viewer"; import { MarkdownViewer } from "@/components/markdown-viewer";
import { Alert, AlertDescription } from "@/components/ui/alert"; import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer"; import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
import { CITATION_HIGHLIGHT_CLASS } from "@/components/ui/search-highlight-node";
import { Spinner } from "@/components/ui/spinner"; import { Spinner } from "@/components/ui/spinner";
import { useMediaQuery } from "@/hooks/use-media-query"; import { useMediaQuery } from "@/hooks/use-media-query";
import { useElectronAPI } from "@/hooks/use-platform"; import { useElectronAPI } from "@/hooks/use-platform";
import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils"; import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils";
import { buildCitationSearchCandidates } from "@/lib/citation-search";
import { inferMonacoLanguageFromPath } from "@/lib/editor-language"; import { inferMonacoLanguageFromPath } from "@/lib/editor-language";
const PlateEditor = dynamic( const PlateEditor = dynamic(
@ -32,7 +37,10 @@ const PlateEditor = dynamic(
{ ssr: false, loading: () => <EditorPanelSkeleton /> } { ssr: false, loading: () => <EditorPanelSkeleton /> }
); );
type CitationHighlightStatus = "exact" | "miss";
const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB
const CITATION_MAX_LENGTH = 16 * 1024 * 1024; // 16MB on-demand cap for citation jumps
interface EditorContent { interface EditorContent {
document_id: number; document_id: number;
@ -136,6 +144,61 @@ export function EditorPanelContent({
const [displayTitle, setDisplayTitle] = useState(title || "Untitled"); const [displayTitle, setDisplayTitle] = useState(title || "Untitled");
const isLocalFileMode = kind === "local_file"; const isLocalFileMode = kind === "local_file";
const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown"; const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown";
// --- Citation-jump highlight wiring ----------------------------------
// `EditorPanelContent` is the consumer of `pendingChunkHighlightAtom`: when
// a citation badge is clicked, the badge stages `{documentId, chunkId,
// chunkText}` and opens this panel. We drive Plate's `FindReplacePlugin`
// (registered in every preset) to highlight the cited text natively via
// Slate decorations — no DOM walking, no Range gymnastics. The state
// machine below escalates the document fetch from 2MB → 16MB once if no
// candidate snippet matched in the preview, and surfaces miss outcomes
// via an inline alert.
const pending = useAtomValue(pendingChunkHighlightAtom);
const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
const [fetchKey, setFetchKey] = useState(0);
const [maxLengthOverride, setMaxLengthOverride] = useState<number | null>(null);
const [highlightResult, setHighlightResult] = useState<CitationHighlightStatus | null>(null);
const editorRef = useRef<PlateEditorInstance | null>(null);
const escalatedForRef = useRef<number | null>(null);
const lastAppliedChunkIdRef = useRef<number | null>(null);
// Tracks whether a citation highlight is currently decorated in the
// editor. We use a ref (not state) because the click-to-dismiss handler
// runs in a stable callback that would otherwise close over stale state.
const isHighlightActiveRef = useRef(false);
// Once a citation jump targets this doc we have to keep `PlateEditor`
// mounted for the *rest of the doc session* — even after the highlight
// effect clears `pendingChunkHighlightAtom` (which it does as soon as
// the decoration is applied, so a follow-up citation on the same chunk
// can re-trigger). Without this latch, non-editable docs would re-render
// back into `MarkdownViewer` the instant `pending` is released, tearing
// down the Plate decorations and dropping the highlight after a frame.
const [stickyPlateMode, setStickyPlateMode] = useState(false);
const clearCitationSearch = useCallback(() => {
isHighlightActiveRef.current = false;
const editor = editorRef.current;
if (!editor) return;
try {
editor.setOption(FindReplacePlugin, "search", "");
editor.api.redecorate();
} catch (err) {
console.warn("[EditorPanelContent] clearCitationSearch failed:", err);
}
}, []);
// Dismiss the highlight when the user interacts with the editor surface.
// `onPointerDown` fires before focus / selection changes so the click
// itself feels responsive — the highlight clears in the same event tick
// that places the cursor. No-op when nothing is highlighted, so we don't
// thrash `redecorate` on every click in normal editing.
const handleEditorPointerDown = useCallback(() => {
if (!isHighlightActiveRef.current) return;
clearCitationSearch();
setHighlightResult(null);
}, [clearCitationSearch]);
const isCitationTarget = !!pending && !isLocalFileMode && pending.documentId === documentId;
const resolveLocalVirtualPath = useCallback( const resolveLocalVirtualPath = useCallback(
async (candidatePath: string): Promise<string> => { async (candidatePath: string): Promise<string> => {
if (!electronAPI?.getAgentFilesystemMounts) { if (!electronAPI?.getAgentFilesystemMounts) {
@ -155,6 +218,8 @@ export function EditorPanelContent({
const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD; const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD;
// `fetchKey` is an explicit re-fetch trigger (escalation bumps it to force
// a new request even when documentId/searchSpaceId haven't changed).
useEffect(() => { useEffect(() => {
const controller = new AbortController(); const controller = new AbortController();
setIsLoading(true); setIsLoading(true);
@ -166,6 +231,12 @@ export function EditorPanelContent({
setIsEditing(false); setIsEditing(false);
initialLoadDone.current = false; initialLoadDone.current = false;
changeCountRef.current = 0; changeCountRef.current = 0;
// Clear any in-flight FindReplacePlugin search before the editor
// re-mounts on new content (a fresh editor key is generated below
// from documentId + isEditing, so the previous editor + its
// decorations are about to be discarded anyway, but we belt-and-
// brace here for the case where only `fetchKey` changed).
clearCitationSearch();
const doFetch = async () => { const doFetch = async () => {
try { try {
@ -210,7 +281,11 @@ export function EditorPanelContent({
const url = new URL( const url = new URL(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content` `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`
); );
url.searchParams.set("max_length", String(LARGE_DOCUMENT_THRESHOLD)); url.searchParams.set("max_length", String(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD));
// `fetchKey` participates here so biome's noUnusedVariables sees it
// as consumed; bumping it forces a fresh request even when the URL
// is otherwise identical.
if (fetchKey > 0) url.searchParams.set("_n", String(fetchKey));
const response = await authenticatedFetch(url.toString(), { method: "GET" }); const response = await authenticatedFetch(url.toString(), { method: "GET" });
@ -256,8 +331,259 @@ export function EditorPanelContent({
resolveLocalVirtualPath, resolveLocalVirtualPath,
searchSpaceId, searchSpaceId,
title, title,
fetchKey,
maxLengthOverride,
clearCitationSearch,
]); ]);
// Reset citation-jump bookkeeping whenever the panel switches to a different
// document (or local file). Body only writes setters — the deps are the
// real triggers we want to react to.
// biome-ignore lint/correctness/useExhaustiveDependencies: documentId/localFilePath are intentional triggers.
useEffect(() => {
clearCitationSearch();
escalatedForRef.current = null;
lastAppliedChunkIdRef.current = null;
setHighlightResult(null);
setMaxLengthOverride(null);
setFetchKey(0);
// Drop sticky Plate mode when the panel moves to a different doc
// — the next doc starts in its preferred render mode (Plate for
// editable, MarkdownViewer for everything else) until/unless a
// citation jump targets it.
setStickyPlateMode(false);
}, [documentId, localFilePath, clearCitationSearch]);
// Latch sticky Plate mode the first time a citation jump targets this
// doc. We keep it sticky for the remainder of this doc session so the
// highlight effect's `setPendingHighlight(null)` doesn't unmount the
// editor mid-flight (see comment on `stickyPlateMode` declaration).
useEffect(() => {
if (isCitationTarget) setStickyPlateMode(true);
}, [isCitationTarget]);
// `isEditorReady` is what `useEffect` actually depends on — `editorRef`
// is a ref so changes don't trigger re-runs. We flip this to `true` once
// `PlateEditor` calls back with its live editor instance (its
// `usePlateEditor` value-init runs synchronously, so by the time this
// flips true the markdown is already deserialized into the Slate tree).
const [isEditorReady, setIsEditorReady] = useState(false);
const handleEditorReady = useCallback((editor: PlateEditorInstance | null) => {
console.log("[citation:editor] handleEditorReady", { ready: !!editor });
editorRef.current = editor;
setIsEditorReady(!!editor);
}, []);
// --- Citation jump highlight effect -----------------------------------
// Drives Plate's FindReplacePlugin to highlight the cited chunk:
// 1. Build candidate snippets from the chunk text (first sentence,
// first 8 words, full chunk if short). Plate's decorate runs per-
// block and won't cross block boundaries, so the shorter
// candidates exist to give us something that fits in one
// paragraph / heading.
// 2. For each candidate: setOption('search', ...) → redecorate →
// wait two animation frames for React to flush → query the editor
// DOM for `.${CITATION_HIGHLIGHT_CLASS}`. First hit wins.
//
// Why a className and not a `data-*` attribute? Plate's
// `PlateLeaf` runs its props through `useNodeAttributes`, which
// only forwards `attributes`, `className`, `ref`, and `style` —
// arbitrary `data-*` attributes are silently dropped. `className`
// is the only escape hatch guaranteed to survive into the DOM.
// 3. On hit: smooth-scroll the first match into view, mark the
// highlight active (so a click inside the editor can dismiss it),
// release the pending atom.
// 4. On terminal miss: if the doc was truncated and we haven't
// escalated yet, bump the fetch's `max_length` to the citation
// cap and re-fetch — the post-refetch render will re-run this
// effect against the larger preview. Otherwise, release the
// atom and show the miss alert.
useEffect(() => {
console.log("[citation:effect] fired", {
isCitationTarget,
pendingDocId: pending?.documentId,
pendingChunkId: pending?.chunkId,
pendingChunkTextLen: pending?.chunkText?.length,
documentId,
isLocalFileMode,
isEditing,
hasMarkdown: !!editorDoc?.source_markdown,
markdownLen: editorDoc?.source_markdown?.length,
truncated: editorDoc?.truncated,
isEditorReady,
editorRefSet: !!editorRef.current,
maxLengthOverride,
});
if (!isCitationTarget || !pending) {
console.log("[citation:effect] guard ✗ no citation target / no pending");
return;
}
if (isLocalFileMode || isEditing) {
console.log("[citation:effect] guard ✗ localFileMode/editing");
return;
}
if (!editorDoc?.source_markdown) {
console.log("[citation:effect] guard ✗ source_markdown not ready");
return;
}
if (!isEditorReady) {
console.log("[citation:effect] guard ✗ editor not ready yet");
return;
}
const editor = editorRef.current;
if (!editor) {
console.log("[citation:effect] guard ✗ editorRef.current is null");
return;
}
if (lastAppliedChunkIdRef.current !== pending.chunkId) {
lastAppliedChunkIdRef.current = pending.chunkId;
}
let cancelled = false;
const finishMiss = () => {
console.log("[citation:effect] terminal miss — no candidate matched");
try {
editor.setOption(FindReplacePlugin, "search", "");
editor.api.redecorate();
} catch (err) {
console.warn("[EditorPanelContent] reset search after miss failed:", err);
}
const canEscalate =
editorDoc.truncated === true &&
(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD) < CITATION_MAX_LENGTH &&
escalatedForRef.current !== pending.chunkId;
console.log("[citation:effect] miss decision", {
truncated: editorDoc.truncated,
currentMaxLength: maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD,
canEscalate,
});
if (canEscalate) {
escalatedForRef.current = pending.chunkId;
setMaxLengthOverride(CITATION_MAX_LENGTH);
setFetchKey((k) => k + 1);
// Keep the atom set so the post-refetch render re-runs.
return;
}
setHighlightResult("miss");
setPendingHighlight(null);
};
const tryCandidates = async () => {
const candidates = buildCitationSearchCandidates(pending.chunkText);
console.log("[citation:effect] candidates built", {
count: candidates.length,
previews: candidates.map((c) => c.slice(0, 60)),
});
if (candidates.length === 0) {
if (!cancelled) finishMiss();
return;
}
// Resolve the editor's rendered DOM root via Slate's stable
// `[data-slate-editor="true"]` attribute (set by slate-react's
// `<Editable>`). Scoping queries to this root prevents
// `<mark>` elements rendered elsewhere on the page (e.g. chat
// search-highlight leaves in another mounted PlateEditor) from
// being mistaken for citation hits.
const editorRoot = document.querySelector<HTMLElement>('[data-slate-editor="true"]');
console.log("[citation:effect] editor root", {
hasRoot: !!editorRoot,
});
const root: ParentNode = editorRoot ?? document;
for (let i = 0; i < candidates.length; i++) {
const candidate = candidates[i];
if (cancelled) return;
try {
editor.setOption(FindReplacePlugin, "search", candidate);
editor.api.redecorate();
console.log(`[citation:effect] try #${i} setOption + redecorate`, {
len: candidate.length,
preview: candidate.slice(0, 80),
});
} catch (err) {
console.warn("[EditorPanelContent] setOption/redecorate failed:", err);
continue;
}
// Two rAFs: first lets Slate flush its onChange, second lets
// React commit the decoration leaves into the DOM.
await new Promise<void>((resolve) =>
requestAnimationFrame(() => requestAnimationFrame(() => resolve()))
);
if (cancelled) return;
// Primary probe: by our stable class on the rendered <mark>.
let el = root.querySelector<HTMLElement>(`.${CITATION_HIGHLIGHT_CLASS}`);
const classMarkCount = root.querySelectorAll(`.${CITATION_HIGHLIGHT_CLASS}`).length;
// Diagnostic fallback: any <mark> inside the editor root.
// If we ever see allMarks > 0 but classMarkCount === 0,
// the className was stripped again and we need to revisit
// `useNodeAttributes` filtering.
const allMarkCount = root.querySelectorAll("mark").length;
if (!el && allMarkCount > 0) {
el = root.querySelector<HTMLElement>("mark");
}
console.log(`[citation:effect] try #${i} DOM probe`, {
foundEl: !!el,
classMarkCount,
allMarkCount,
usedFallback: !!el && classMarkCount === 0,
});
if (el) {
try {
el.scrollIntoView({ block: "center", behavior: "smooth" });
} catch {
el.scrollIntoView();
}
isHighlightActiveRef.current = true;
setHighlightResult("exact");
console.log(`[citation:effect] ✓ exact via candidate #${i} — atom released`);
// No auto-clear timer — the highlight is intentionally
// permanent until the user clicks inside the editor (see
// `handleEditorPointerDown`) or another dismissal trigger
// fires (doc switch, edit-mode toggle, panel unmount,
// next citation jump). Sticky Plate mode keeps the
// editor mounted after the atom clears.
setPendingHighlight(null);
return;
}
}
if (!cancelled) finishMiss();
};
void tryCandidates();
return () => {
cancelled = true;
};
}, [
isCitationTarget,
pending,
documentId,
editorDoc?.source_markdown,
editorDoc?.truncated,
isLocalFileMode,
isEditing,
isEditorReady,
maxLengthOverride,
clearCitationSearch,
setPendingHighlight,
]);
// Cleanup any active highlight on unmount.
useEffect(() => {
return () => clearCitationSearch();
}, [clearCitationSearch]);
// Toggling into edit mode swaps Plate out of readOnly. Clear the citation
// search so stale leaves don't linger in the editing surface.
useEffect(() => {
if (isEditing) {
clearCitationSearch();
setHighlightResult(null);
}
}, [isEditing, clearCitationSearch]);
useEffect(() => { useEffect(() => {
return () => { return () => {
if (copyResetTimeoutRef.current) { if (copyResetTimeoutRef.current) {
@ -367,6 +693,15 @@ export function EditorPanelContent({
EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) && EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) &&
!isLargeDocument !isLargeDocument
: false; : false;
// Use PlateEditor for any of:
// - Editable doc types (FILE/NOTE) — existing editing UX.
// - Active citation jump in flight (`isCitationTarget`) — covers the
// mount in the very first render where the atom is set but the
// sticky effect hasn't fired yet.
// - Sticky Plate mode latched on a previous citation jump — keeps
// the editor mounted (with its decorations) after the highlight
// effect clears the atom. Resets when the doc changes.
const renderInPlateEditor = isEditableType || isCitationTarget || stickyPlateMode;
const hasUnsavedChanges = editedMarkdown !== null; const hasUnsavedChanges = editedMarkdown !== null;
const showDesktopHeader = !!onClose; const showDesktopHeader = !!onClose;
const showEditingActions = isEditableType && isEditing; const showEditingActions = isEditableType && isEditing;
@ -381,6 +716,90 @@ export function EditorPanelContent({
setIsEditing(false); setIsEditing(false);
}, [editorDoc?.source_markdown]); }, [editorDoc?.source_markdown]);
const handleDownloadMarkdown = useCallback(async () => {
if (!searchSpaceId || !documentId) return;
setDownloading(true);
try {
const response = await authenticatedFetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
{ method: "GET" }
);
if (!response.ok) throw new Error("Download failed");
const blob = await response.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
const disposition = response.headers.get("content-disposition");
const match = disposition?.match(/filename="(.+)"/);
a.download = match?.[1] ?? `${editorDoc?.title || "document"}.md`;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
toast.success("Download started");
} catch {
toast.error("Failed to download document");
} finally {
setDownloading(false);
}
}, [documentId, editorDoc?.title, searchSpaceId]);
// We no longer surface an "approximate" status — Plate's FindReplacePlugin
// either decorates an exact match or it doesn't, and the candidate snippet
// strategy (first sentence → first 8 words → full chunk) means we either
// land on the citation start or fall through to the miss alert.
const showMissAlert = isCitationTarget && highlightResult === "miss";
const citationAlerts = showMissAlert && (
<Alert variant="destructive" className="mb-4">
<FileQuestionMark className="size-4" />
<AlertDescription className="flex items-center justify-between gap-4">
<span>Cited section couldn&apos;t be located in this view.</span>
{editorDoc?.truncated && (
<Button
variant="outline"
size="sm"
className="relative shrink-0"
disabled={downloading}
onClick={handleDownloadMarkdown}
>
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
<Download className="size-3.5" />
Download .md
</span>
{downloading && <Spinner size="sm" className="absolute" />}
</Button>
)}
</AlertDescription>
</Alert>
);
const largeDocAlert = isLargeDocument && !isLocalFileMode && editorDoc && (
<Alert className="mb-4">
<FileText className="size-4" />
<AlertDescription className="flex items-center justify-between gap-4">
<span>
This document is too large for the editor (
{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
</span>
<Button
variant="outline"
size="sm"
className="relative shrink-0"
disabled={downloading}
onClick={handleDownloadMarkdown}
>
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
<Download className="size-3.5" />
Download .md
</span>
{downloading && <Spinner size="sm" className="absolute" />}
</Button>
</AlertDescription>
</Alert>
);
return ( return (
<> <>
{showDesktopHeader ? ( {showDesktopHeader ? (
@ -565,61 +984,6 @@ export function EditorPanelContent({
</p> </p>
</div> </div>
</div> </div>
) : isLargeDocument && !isLocalFileMode ? (
<div className="h-full overflow-y-auto px-5 py-4">
<Alert className="mb-4">
<FileText className="size-4" />
<AlertDescription className="flex items-center justify-between gap-4">
<span>
This document is too large for the editor (
{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
</span>
<Button
variant="outline"
size="sm"
className="relative shrink-0"
disabled={downloading}
onClick={async () => {
setDownloading(true);
try {
if (!searchSpaceId || !documentId) {
throw new Error("Missing document context");
}
const response = await authenticatedFetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
{ method: "GET" }
);
if (!response.ok) throw new Error("Download failed");
const blob = await response.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
const disposition = response.headers.get("content-disposition");
const match = disposition?.match(/filename="(.+)"/);
a.download = match?.[1] ?? `${editorDoc.title || "document"}.md`;
document.body.appendChild(a);
a.click();
a.remove();
URL.revokeObjectURL(url);
toast.success("Download started");
} catch {
toast.error("Failed to download document");
} finally {
setDownloading(false);
}
}}
>
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
<Download className="size-3.5" />
Download .md
</span>
{downloading && <Spinner size="sm" className="absolute" />}
</Button>
</AlertDescription>
</Alert>
<MarkdownViewer content={editorDoc.source_markdown} />
</div>
) : editorRenderMode === "source_code" ? ( ) : editorRenderMode === "source_code" ? (
<div className="h-full overflow-hidden"> <div className="h-full overflow-hidden">
<SourceCodeEditor <SourceCodeEditor
@ -638,7 +1002,30 @@ export function EditorPanelContent({
}} }}
/> />
</div> </div>
) : isEditableType ? ( ) : isLargeDocument && !isLocalFileMode && !isCitationTarget ? (
// Large doc, no active citation — fast Streamdown preview
// + download CTA. We only fall back to MarkdownViewer here
// because Plate is heavy on multi-MB docs and the user
// isn't waiting on a specific citation to render.
<div className="h-full overflow-y-auto px-5 py-4">
{largeDocAlert}
<MarkdownViewer content={editorDoc.source_markdown} />
</div>
) : renderInPlateEditor ? (
// Editable doc (FILE/NOTE) OR active citation jump (any
// doc type). The citation path uses Plate's
// FindReplacePlugin for native, decoration-based
// highlighting — see the citation-jump highlight effect
// above for how `editorRef` and `handleEditorReady` are
// wired.
<div className="flex h-full min-h-0 flex-col">
{(citationAlerts || (isLargeDocument && isCitationTarget && !isLocalFileMode)) && (
<div className="shrink-0 px-5 pt-4">
{isLargeDocument && isCitationTarget && largeDocAlert}
{citationAlerts}
</div>
)}
<div className="flex-1 min-h-0 overflow-hidden" onPointerDown={handleEditorPointerDown}>
<PlateEditor <PlateEditor
key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`} key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
preset="full" preset="full"
@ -651,7 +1038,10 @@ export function EditorPanelContent({
reserveToolbarSpace reserveToolbarSpace
defaultEditing={isEditing} defaultEditing={isEditing}
className="[&_[role=toolbar]]:!bg-sidebar" className="[&_[role=toolbar]]:!bg-sidebar"
onEditorReady={handleEditorReady}
/> />
</div>
</div>
) : ( ) : (
<div className="h-full overflow-y-auto px-5 py-4"> <div className="h-full overflow-y-auto px-5 py-4">
<MarkdownViewer content={editorDoc.source_markdown} /> <MarkdownViewer content={editorDoc.source_markdown} />

View file

@ -12,6 +12,12 @@ import { type EditorPreset, presetMap } from "@/components/editor/presets";
import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx"; import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
import { Editor, EditorContainer } from "@/components/ui/editor"; import { Editor, EditorContainer } from "@/components/ui/editor";
/** Live editor instance returned by `usePlateEditor`. Exposed via the
* `onEditorReady` prop so callers (e.g. `EditorPanelContent`) can drive
* plugin options imperatively most notably setting
* `FindReplacePlugin`'s `search` option for citation-jump highlights. */
export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
export interface PlateEditorProps { export interface PlateEditorProps {
/** Markdown string to load as initial content */ /** Markdown string to load as initial content */
markdown?: string; markdown?: string;
@ -62,6 +68,15 @@ export interface PlateEditorProps {
* without modifying the core editor component. * without modifying the core editor component.
*/ */
extraPlugins?: AnyPluginConfig[]; extraPlugins?: AnyPluginConfig[];
/**
* Called whenever the live editor instance (re)mounts, with `null` on
* unmount. Used by callers that need to drive plugin options imperatively
* e.g. `EditorPanelContent` setting `FindReplacePlugin`'s `search`
* option for citation-jump highlights. The callback is invoked exactly
* once per editor lifetime (the parent's `key` prop forces a fresh
* editor when needed, e.g. on edit-mode toggle).
*/
onEditorReady?: (editor: PlateEditorInstance | null) => void;
} }
function PlateEditorContent({ function PlateEditorContent({
@ -100,6 +115,7 @@ export function PlateEditor({
defaultEditing = false, defaultEditing = false,
preset = "full", preset = "full",
extraPlugins = [], extraPlugins = [],
onEditorReady,
}: PlateEditorProps) { }: PlateEditorProps) {
const lastMarkdownRef = useRef(markdown); const lastMarkdownRef = useRef(markdown);
const lastHtmlRef = useRef(html); const lastHtmlRef = useRef(html);
@ -156,6 +172,21 @@ export function PlateEditor({
: undefined, : undefined,
}); });
// Expose the live editor instance to imperative callers (e.g. citation
// jump highlights). We deliberately don't depend on `onEditorReady`
// itself in the cleanup closure — callers commonly pass an arrow that
// closes over a stable ref setter, but if they pass a freshly-bound
// callback per render, the `onEditorReady?.(editor)` re-fires which is
// idempotent for ref-style setters.
const onEditorReadyRef = useRef(onEditorReady);
useEffect(() => {
onEditorReadyRef.current = onEditorReady;
}, [onEditorReady]);
useEffect(() => {
onEditorReadyRef.current?.(editor);
return () => onEditorReadyRef.current?.(null);
}, [editor]);
// Update editor content when html prop changes externally // Update editor content when html prop changes externally
useEffect(() => { useEffect(() => {
if (html !== undefined && html !== lastHtmlRef.current) { if (html !== undefined && html !== lastHtmlRef.current) {

View file

@ -1,5 +1,6 @@
"use client"; "use client";
import { FindReplacePlugin } from "@platejs/find-replace";
import type { AnyPluginConfig } from "platejs"; import type { AnyPluginConfig } from "platejs";
import { TrailingBlockPlugin } from "platejs"; import { TrailingBlockPlugin } from "platejs";
@ -17,6 +18,30 @@ import { SelectionKit } from "@/components/editor/plugins/selection-kit";
import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit"; import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit";
import { TableKit } from "@/components/editor/plugins/table-kit"; import { TableKit } from "@/components/editor/plugins/table-kit";
import { ToggleKit } from "@/components/editor/plugins/toggle-kit"; import { ToggleKit } from "@/components/editor/plugins/toggle-kit";
import { SearchHighlightLeaf } from "@/components/ui/search-highlight-node";
/**
* Citation-jump highlighter. Re-uses Plate's built-in `FindReplacePlugin`
* (decorate-only, no editing surface) to drive the "scroll-to-cited-text"
* UX in `EditorPanelContent`. We register it in every preset because:
* - Decorate is a no-op when `search` is empty (single getOptions() check
* per block), so cost is effectively zero for non-citation viewers.
* - Keeping it preset-agnostic means citations work whether the doc is
* opened in editable (`full`) or pure-viewer (`readonly`) modes.
*
* The parent component drives `setOption(FindReplacePlugin, 'search', ...)`
* + `editor.api.redecorate()` to trigger highlights, then queries the
* editor DOM for `.citation-highlight-leaf` to scroll the first match
* into view. (We can't use a `data-*` attribute here — Plate's
* `PlateLeaf` runs props through `useNodeAttributes`, which only forwards
* `attributes`, `className`, `ref`, `style`; arbitrary `data-*` props are
* silently dropped.) See `components/ui/search-highlight-node.tsx` for
* the leaf component and `CITATION_HIGHLIGHT_CLASS` constant.
*/
const CitationFindReplacePlugin = FindReplacePlugin.configure({
options: { search: "" },
render: { node: SearchHighlightLeaf },
});
/** /**
* Full preset every plugin kit enabled. * Full preset every plugin kit enabled.
@ -38,6 +63,7 @@ export const fullPreset: AnyPluginConfig[] = [
...AutoformatKit, ...AutoformatKit,
...DndKit, ...DndKit,
TrailingBlockPlugin, TrailingBlockPlugin,
CitationFindReplacePlugin,
]; ];
/** /**
@ -52,6 +78,7 @@ export const minimalPreset: AnyPluginConfig[] = [
...LinkKit, ...LinkKit,
...AutoformatKit, ...AutoformatKit,
TrailingBlockPlugin, TrailingBlockPlugin,
CitationFindReplacePlugin,
]; ];
/** /**
@ -68,6 +95,7 @@ export const readonlyPreset: AnyPluginConfig[] = [
...CalloutKit, ...CalloutKit,
...ToggleKit, ...ToggleKit,
...MathKit, ...MathKit,
CitationFindReplacePlugin,
]; ];
/** All available preset names */ /** All available preset names */

View file

@ -1,719 +0,0 @@
"use client";
import { useQuery } from "@tanstack/react-query";
import {
BookOpen,
ChevronDown,
ChevronUp,
ExternalLink,
FileQuestionMark,
FileText,
Hash,
Loader2,
Sparkles,
X,
} from "lucide-react";
import { AnimatePresence, motion, useReducedMotion } from "motion/react";
import { useTranslations } from "next-intl";
import type React from "react";
import { forwardRef, memo, type ReactNode, useCallback, useEffect, useRef, useState } from "react";
import { createPortal } from "react-dom";
import { MarkdownViewer } from "@/components/markdown-viewer";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { ScrollArea } from "@/components/ui/scroll-area";
import { Spinner } from "@/components/ui/spinner";
import type {
GetDocumentByChunkResponse,
GetSurfsenseDocsByChunkResponse,
} from "@/contracts/types/document.types";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { cacheKeys } from "@/lib/query-client/cache-keys";
import { cn } from "@/lib/utils";
type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse;
interface SourceDetailPanelProps {
open: boolean;
onOpenChange: (open: boolean) => void;
chunkId: number;
sourceType: string;
title: string;
description?: string;
url?: string;
children?: ReactNode;
isDocsChunk?: boolean;
}
const formatDocumentType = (type: string) => {
if (!type) return "";
return type
.split("_")
.map((word) => word.charAt(0) + word.slice(1).toLowerCase())
.join(" ");
};
// Chunk card component
// For large documents (>30 chunks), we disable animation to prevent layout shifts
// which break auto-scroll functionality
interface ChunkCardProps {
chunk: { id: number; content: string };
localIndex: number;
chunkNumber: number;
totalChunks: number;
isCited: boolean;
isActive: boolean;
disableLayoutAnimation?: boolean;
}
const ChunkCard = memo(
forwardRef<HTMLDivElement, ChunkCardProps>(
({ chunk, localIndex, chunkNumber, totalChunks, isCited }, ref) => {
return (
<div
ref={ref}
data-chunk-index={localIndex}
className={cn(
"group relative rounded-2xl border-2 transition-all duration-300",
isCited
? "bg-linear-to-br from-primary/5 via-primary/10 to-primary/5 border-primary shadow-lg shadow-primary/10"
: "bg-card border-border/50 hover:border-border hover:shadow-md"
)}
>
{isCited && <div className="absolute inset-0 rounded-2xl bg-primary/5 blur-xl -z-10" />}
<div className="flex items-center justify-between px-5 py-4 border-b border-border/50">
<div className="flex items-center gap-3">
<div
className={cn(
"flex items-center justify-center w-8 h-8 rounded-full text-sm font-semibold transition-colors",
isCited
? "bg-primary text-primary-foreground"
: "bg-muted text-muted-foreground group-hover:bg-muted/80"
)}
>
{chunkNumber}
</div>
<span className="text-sm text-muted-foreground">
Chunk {chunkNumber} of {totalChunks}
</span>
</div>
{isCited && (
<Badge variant="default" className="gap-1.5 px-3 py-1">
<Sparkles className="h-3 w-3" />
Cited Source
</Badge>
)}
</div>
<div className="p-5 overflow-hidden">
<MarkdownViewer content={chunk.content} maxLength={100_000} />
</div>
</div>
);
}
)
);
ChunkCard.displayName = "ChunkCard";
export function SourceDetailPanel({
open,
onOpenChange,
chunkId,
sourceType,
title,
description,
url,
children,
isDocsChunk = false,
}: SourceDetailPanelProps) {
const t = useTranslations("dashboard");
const scrollAreaRef = useRef<HTMLDivElement>(null);
const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
const scrollTimersRef = useRef<ReturnType<typeof setTimeout>[]>([]);
const [activeChunkIndex, setActiveChunkIndex] = useState<number | null>(null);
const [mounted, setMounted] = useState(false);
const shouldReduceMotion = useReducedMotion();
useEffect(() => {
setMounted(true);
}, []);
const {
data: documentData,
isLoading: isDocumentByChunkFetching,
error: documentByChunkFetchingError,
} = useQuery<DocumentData>({
queryKey: isDocsChunk
? cacheKeys.documents.byChunk(`doc-${chunkId}`)
: cacheKeys.documents.byChunk(chunkId.toString()),
queryFn: async () => {
if (isDocsChunk) {
return documentsApiService.getSurfsenseDocByChunk(chunkId);
}
return documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 5 });
},
enabled: !!chunkId && open,
staleTime: 5 * 60 * 1000,
});
const totalChunks =
documentData && "total_chunks" in documentData
? (documentData.total_chunks ?? documentData.chunks.length)
: (documentData?.chunks?.length ?? 0);
const [beforeChunks, setBeforeChunks] = useState<
Array<{ id: number; content: string; created_at: string }>
>([]);
const [afterChunks, setAfterChunks] = useState<
Array<{ id: number; content: string; created_at: string }>
>([]);
const [loadingBefore, setLoadingBefore] = useState(false);
const [loadingAfter, setLoadingAfter] = useState(false);
useEffect(() => {
setBeforeChunks([]);
setAfterChunks([]);
}, [chunkId, open]);
const chunkStartIndex =
documentData && "chunk_start_index" in documentData ? (documentData.chunk_start_index ?? 0) : 0;
const initialChunks = documentData?.chunks ?? [];
const allChunks = [...beforeChunks, ...initialChunks, ...afterChunks];
const absoluteStart = chunkStartIndex - beforeChunks.length;
const absoluteEnd = chunkStartIndex + initialChunks.length + afterChunks.length;
const canLoadBefore = absoluteStart > 0;
const canLoadAfter = absoluteEnd < totalChunks;
const EXPAND_SIZE = 10;
const loadBefore = useCallback(async () => {
if (!documentData || !("search_space_id" in documentData) || !canLoadBefore) return;
setLoadingBefore(true);
try {
const count = Math.min(EXPAND_SIZE, absoluteStart);
const result = await documentsApiService.getDocumentChunks({
document_id: documentData.id,
page: 0,
page_size: count,
start_offset: absoluteStart - count,
});
const existingIds = new Set(allChunks.map((c) => c.id));
const newChunks = result.items
.filter((c) => !existingIds.has(c.id))
.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
setBeforeChunks((prev) => [...newChunks, ...prev]);
} catch (err) {
console.error("Failed to load earlier chunks:", err);
} finally {
setLoadingBefore(false);
}
}, [documentData, absoluteStart, canLoadBefore, allChunks]);
const loadAfter = useCallback(async () => {
if (!documentData || !("search_space_id" in documentData) || !canLoadAfter) return;
setLoadingAfter(true);
try {
const result = await documentsApiService.getDocumentChunks({
document_id: documentData.id,
page: 0,
page_size: EXPAND_SIZE,
start_offset: absoluteEnd,
});
const existingIds = new Set(allChunks.map((c) => c.id));
const newChunks = result.items
.filter((c) => !existingIds.has(c.id))
.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
setAfterChunks((prev) => [...prev, ...newChunks]);
} catch (err) {
console.error("Failed to load later chunks:", err);
} finally {
setLoadingAfter(false);
}
}, [documentData, absoluteEnd, canLoadAfter, allChunks]);
const isDirectRenderSource =
sourceType === "TAVILY_API" ||
sourceType === "LINKUP_API" ||
sourceType === "SEARXNG_API" ||
sourceType === "BAIDU_SEARCH_API";
const citedChunkIndex = allChunks.findIndex((chunk) => chunk.id === chunkId);
// Simple scroll function that scrolls to a chunk by index
const scrollToChunkByIndex = useCallback(
(chunkIndex: number, smooth = true) => {
const scrollContainer = scrollAreaRef.current;
if (!scrollContainer) return;
const viewport = scrollContainer.querySelector(
"[data-radix-scroll-area-viewport]"
) as HTMLElement | null;
if (!viewport) return;
const chunkElement = scrollContainer.querySelector(
`[data-chunk-index="${chunkIndex}"]`
) as HTMLElement | null;
if (!chunkElement) return;
// Get positions using getBoundingClientRect for accuracy
const viewportRect = viewport.getBoundingClientRect();
const chunkRect = chunkElement.getBoundingClientRect();
// Calculate where to scroll to center the chunk
const currentScrollTop = viewport.scrollTop;
const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
const scrollTarget =
chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
viewport.scrollTo({
top: Math.max(0, scrollTarget),
behavior: smooth && !shouldReduceMotion ? "smooth" : "auto",
});
setActiveChunkIndex(chunkIndex);
},
[shouldReduceMotion]
);
// Callback ref for the cited chunk - scrolls when the element mounts
const citedChunkRefCallback = useCallback(
(node: HTMLDivElement | null) => {
if (node && !hasScrolledRef.current && open) {
hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls
// Store the node reference for the delayed scroll
const scrollToCitedChunk = () => {
const scrollContainer = scrollAreaRef.current;
if (!scrollContainer || !node.isConnected) return false;
const viewport = scrollContainer.querySelector(
"[data-radix-scroll-area-viewport]"
) as HTMLElement | null;
if (!viewport) return false;
// Get positions
const viewportRect = viewport.getBoundingClientRect();
const chunkRect = node.getBoundingClientRect();
// Calculate scroll position to center the chunk
const currentScrollTop = viewport.scrollTop;
const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
const scrollTarget =
chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
viewport.scrollTo({
top: Math.max(0, scrollTarget),
behavior: "auto", // Instant scroll for initial positioning
});
return true;
};
// Scroll multiple times with delays to handle progressive content rendering
// Each subsequent scroll will correct for any layout shifts
const scrollAttempts = [50, 150, 300, 600, 1000];
scrollAttempts.forEach((delay) => {
scrollTimersRef.current.push(
setTimeout(() => {
scrollToCitedChunk();
}, delay)
);
});
// After final attempt, mark the cited chunk as active
scrollTimersRef.current.push(
setTimeout(
() => {
setActiveChunkIndex(citedChunkIndex);
},
scrollAttempts[scrollAttempts.length - 1] + 50
)
);
}
},
[open, citedChunkIndex]
);
// Reset scroll state when panel closes
useEffect(() => {
if (!open) {
scrollTimersRef.current.forEach(clearTimeout);
scrollTimersRef.current = [];
hasScrolledRef.current = false;
setActiveChunkIndex(null);
}
return () => {
scrollTimersRef.current.forEach(clearTimeout);
scrollTimersRef.current = [];
};
}, [open]);
// Handle escape key
useEffect(() => {
const handleEscape = (e: KeyboardEvent) => {
if (e.key === "Escape" && open) {
onOpenChange(false);
}
};
window.addEventListener("keydown", handleEscape);
return () => window.removeEventListener("keydown", handleEscape);
}, [open, onOpenChange]);
// Prevent body scroll when open
useEffect(() => {
if (open) {
document.body.style.overflow = "hidden";
} else {
document.body.style.overflow = "";
}
return () => {
document.body.style.overflow = "";
};
}, [open]);
const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => {
e.preventDefault();
e.stopPropagation();
window.open(clickUrl, "_blank", "noopener,noreferrer");
};
const scrollToChunk = useCallback(
(index: number) => {
scrollToChunkByIndex(index, true);
},
[scrollToChunkByIndex]
);
const panelContent = (
<AnimatePresence mode="wait">
{open && (
<>
{/* Backdrop */}
<motion.div
key="backdrop"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
transition={{ duration: 0.2 }}
className="fixed inset-0 z-50 bg-black/60 backdrop-blur-sm"
onClick={() => onOpenChange(false)}
/>
{/* Panel */}
<motion.div
key="panel"
initial={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
animate={{ opacity: 1, scale: 1, y: 0 }}
exit={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
transition={{
type: "spring",
damping: 30,
stiffness: 300,
}}
className="fixed inset-3 sm:inset-6 md:inset-10 lg:inset-16 z-50 flex flex-col bg-background rounded-3xl shadow-2xl border overflow-hidden"
>
{/* Header */}
<motion.div
initial={{ opacity: 0, y: -10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.1 }}
className="flex items-center justify-between px-6 py-5 border-b bg-linear-to-r from-muted/50 to-muted/30"
>
<div className="min-w-0 flex-1">
<h2 className="text-xl font-semibold truncate">
{documentData?.title || title || "Source Document"}
</h2>
<p className="text-sm text-muted-foreground mt-0.5">
{documentData && "document_type" in documentData
? formatDocumentType(documentData.document_type)
: sourceType && formatDocumentType(sourceType)}
{totalChunks > 0 && (
<span className="ml-2">
{totalChunks} chunk{totalChunks !== 1 ? "s" : ""}
{allChunks.length < totalChunks && ` (showing ${allChunks.length})`}
</span>
)}
</p>
</div>
<div className="flex items-center gap-3 shrink-0">
{url && (
<Button
size="sm"
variant="outline"
onClick={(e) => handleUrlClick(e, url)}
className="hidden sm:flex gap-2 rounded-xl"
>
<ExternalLink className="h-4 w-4" />
Open Source
</Button>
)}
<Button
size="icon"
variant="ghost"
onClick={() => onOpenChange(false)}
className="h-8 w-8 rounded-full"
>
<X className="h-4 w-4" />
<span className="sr-only">Close</span>
</Button>
</div>
</motion.div>
{/* Loading State */}
{!isDirectRenderSource && isDocumentByChunkFetching && (
<div className="flex-1 flex items-center justify-center">
<motion.div
initial={{ opacity: 0, scale: 0.9 }}
animate={{ opacity: 1, scale: 1 }}
className="flex flex-col items-center gap-4"
>
<Spinner size="lg" />
<p className="text-sm text-muted-foreground font-medium">
{t("loading_document")}
</p>
</motion.div>
</div>
)}
{/* Error State */}
{!isDirectRenderSource && documentByChunkFetchingError && (
<div className="flex-1 flex items-center justify-center">
<motion.div
initial={{ opacity: 0, scale: 0.9 }}
animate={{ opacity: 1, scale: 1 }}
className="flex flex-col items-center gap-4 text-center px-6"
>
<div className="w-20 h-20 rounded-full bg-muted/50 flex items-center justify-center">
<FileQuestionMark className="h-10 w-10 text-muted-foreground" />
</div>
<div>
<p className="font-semibold text-foreground text-lg">Document unavailable</p>
<p className="text-sm text-muted-foreground mt-2 max-w-md">
{documentByChunkFetchingError.message ||
"An unexpected error occurred. Please try again."}
</p>
</div>
<Button variant="outline" onClick={() => onOpenChange(false)} className="mt-2">
Close Panel
</Button>
</motion.div>
</div>
)}
{/* Direct render for web search providers */}
{isDirectRenderSource && (
<ScrollArea className="flex-1">
<div className="p-6 max-w-3xl mx-auto">
{url && (
<Button
size="default"
variant="outline"
onClick={(e) => handleUrlClick(e, url)}
className="w-full mb-6 sm:hidden rounded-xl"
>
<ExternalLink className="mr-2 h-4 w-4" />
Open in Browser
</Button>
)}
<motion.div
initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }}
className="p-6 bg-muted/50 rounded-2xl border"
>
<h3 className="text-base font-semibold mb-4 flex items-center gap-2">
<BookOpen className="h-4 w-4" />
Source Information
</h3>
<div className="text-sm text-muted-foreground mb-3 font-medium">
{title || "Untitled"}
</div>
<div className="text-sm text-foreground leading-relaxed">
{description || "No content available"}
</div>
</motion.div>
</div>
</ScrollArea>
)}
{/* API-fetched document content */}
{!isDirectRenderSource && documentData && (
<div className="flex-1 flex overflow-hidden">
{/* Chunk Navigation Sidebar */}
{allChunks.length > 1 && (
<motion.div
initial={{ opacity: 0, x: -20 }}
animate={{ opacity: 1, x: 0 }}
transition={{ delay: 0.2 }}
className="hidden lg:flex flex-col w-16 border-r bg-muted/10 overflow-hidden"
>
<ScrollArea className="flex-1 h-full">
<div className="p-2 pt-3 flex flex-col gap-1.5">
{allChunks.map((chunk, idx) => {
const absNum = absoluteStart + idx + 1;
const isCited = chunk.id === chunkId;
const isActive = activeChunkIndex === idx;
return (
<motion.button
key={chunk.id}
type="button"
onClick={() => scrollToChunk(idx)}
initial={{ opacity: 0, scale: 0.8 }}
animate={{ opacity: 1, scale: 1 }}
transition={{ delay: Math.min(idx * 0.02, 0.2) }}
className={cn(
"relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center",
isCited
? "bg-primary text-primary-foreground shadow-md"
: isActive
? "bg-muted text-foreground"
: "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground"
)}
title={isCited ? `Chunk ${absNum} (Cited)` : `Chunk ${absNum}`}
>
{absNum}
{isCited && (
<span className="absolute -top-1.5 -right-1.5 flex items-center justify-center w-4 h-4 bg-primary rounded-full border-2 border-background shadow-sm">
<Sparkles className="h-2.5 w-2.5 text-primary-foreground" />
</span>
)}
</motion.button>
);
})}
</div>
</ScrollArea>
</motion.div>
)}
{/* Main Content */}
<ScrollArea className="flex-1" ref={scrollAreaRef}>
<div className="p-6 lg:p-8 max-w-4xl mx-auto space-y-6">
{/* Document Metadata */}
{"document_metadata" in documentData &&
documentData.document_metadata &&
Object.keys(documentData.document_metadata).length > 0 && (
<motion.div
initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.1 }}
className="p-5 bg-muted/30 rounded-2xl border"
>
<h3 className="text-sm font-semibold mb-4 text-muted-foreground uppercase tracking-wider flex items-center gap-2">
<FileText className="h-4 w-4" />
Document Information
</h3>
<dl className="grid grid-cols-1 sm:grid-cols-2 gap-4 text-sm">
{Object.entries(documentData.document_metadata).map(([key, value]) => (
<div key={key} className="space-y-1">
<dt className="font-medium text-muted-foreground capitalize text-xs">
{key.replace(/_/g, " ")}
</dt>
<dd className="text-foreground wrap-break-word">{String(value)}</dd>
</div>
))}
</dl>
</motion.div>
)}
{/* Chunks Header */}
<div className="flex items-center justify-between pt-2">
<h3 className="text-sm font-semibold text-muted-foreground uppercase tracking-wider flex items-center gap-2">
<Hash className="h-4 w-4" />
Chunks {absoluteStart + 1}{absoluteEnd} of {totalChunks}
</h3>
{citedChunkIndex !== -1 && (
<Button
variant="ghost"
size="sm"
onClick={() => scrollToChunk(citedChunkIndex)}
className="gap-2 text-primary hover:text-primary"
>
<Sparkles className="h-3.5 w-3.5" />
Jump to cited
</Button>
)}
</div>
{/* Load Earlier */}
{canLoadBefore && (
<div className="flex items-center justify-center">
<Button
variant="outline"
size="sm"
onClick={loadBefore}
disabled={loadingBefore}
className="gap-2"
>
{loadingBefore ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<ChevronUp className="h-3.5 w-3.5" />
)}
{loadingBefore
? "Loading..."
: `Load ${Math.min(EXPAND_SIZE, absoluteStart)} earlier chunks`}
</Button>
</div>
)}
{/* Chunks */}
<div className="space-y-4">
{allChunks.map((chunk, idx) => {
const isCited = chunk.id === chunkId;
const chunkNumber = absoluteStart + idx + 1;
return (
<ChunkCard
key={chunk.id}
ref={isCited ? citedChunkRefCallback : undefined}
chunk={chunk}
localIndex={idx}
chunkNumber={chunkNumber}
totalChunks={totalChunks}
isCited={isCited}
isActive={activeChunkIndex === idx}
disableLayoutAnimation={allChunks.length > 30}
/>
);
})}
</div>
{/* Load Later */}
{canLoadAfter && (
<div className="flex items-center justify-center py-3">
<Button
variant="outline"
size="sm"
onClick={loadAfter}
disabled={loadingAfter}
className="gap-2"
>
{loadingAfter ? (
<Loader2 className="h-3.5 w-3.5 animate-spin" />
) : (
<ChevronDown className="h-3.5 w-3.5" />
)}
{loadingAfter
? "Loading..."
: `Load ${Math.min(EXPAND_SIZE, totalChunks - absoluteEnd)} later chunks`}
</Button>
</div>
)}
</div>
</ScrollArea>
</div>
)}
</motion.div>
</>
)}
</AnimatePresence>
);
if (!mounted) return <>{children}</>;
return (
<>
{children}
{createPortal(panelContent, globalThis.document.body)}
</>
);
}

View file

@ -67,9 +67,6 @@ const DesktopShortcutsContent = dynamic(
import( import(
"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent" "@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
).then((m) => ({ default: m.DesktopShortcutsContent })), ).then((m) => ({ default: m.DesktopShortcutsContent })),
import(
"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
).then((m) => ({ default: m.DesktopShortcutsContent })),
{ ssr: false } { ssr: false }
); );
const MemoryContent = dynamic( const MemoryContent = dynamic(

View file

@ -0,0 +1,45 @@
"use client";
import type { PlateLeafProps } from "platejs/react";
import { PlateLeaf } from "platejs/react";
/**
* Stable class name used to identify Plate-rendered citation highlight
* leaves in the DOM. We can't use a `data-*` attribute here — Plate's
* `PlateLeaf` runs its props through `useNodeAttributes`, which only
* forwards `attributes`, `className`, `ref`, and `style` to the rendered
* element; arbitrary `data-*` props are silently dropped (verified
* against `@platejs/core/dist/react/index.js` v52). So `className` is
* the only escape hatch that's guaranteed to survive into the DOM.
*/
export const CITATION_HIGHLIGHT_CLASS = "citation-highlight-leaf";
/**
* Leaf rendered for ranges decorated by `@platejs/find-replace`'s
* `FindReplacePlugin`. We re-purpose that plugin to drive the citation-jump
* highlight: when a citation is staged, the parent sets the plugin's `search`
* option to a snippet of the chunk text and Plate decorates every match with
* `searchHighlight: true`. This component renders those decorations as a
* `<mark>` tagged with `CITATION_HIGHLIGHT_CLASS` so the parent can:
* 1. Query the first match in DOM order to scroll it into view.
* 2. Detect the active-highlight state without a separate React ref.
*
* The highlight is **persistent** it does not auto-fade. The parent in
* `EditorPanelContent` clears it by setting the plugin's `search` option
* back to "" when one of: (a) the user clicks anywhere inside the editor,
* (b) the panel switches to a different document, (c) the user toggles
* into edit mode, (d) another citation jump is staged, (e) the panel
* unmounts. We use a brief entrance pulse (`citation-flash-in`, see
* `globals.css`) purely to draw the eye after `scrollIntoView` lands.
*/
export function SearchHighlightLeaf(props: PlateLeafProps) {
return (
<PlateLeaf
{...props}
as="mark"
className={`${CITATION_HIGHLIGHT_CLASS} bg-primary/15 ring-1 ring-primary/40 rounded-sm px-0.5 text-inherit animate-[citation-flash-in_400ms_ease-out]`}
>
{props.children}
</PlateLeaf>
);
}

View file

@ -0,0 +1,125 @@
/**
* Snippet generation for the citation-jump highlight, driven by Plate's
* `FindReplacePlugin`. The plugin runs `decorate` per-block and only matches
* within blocks whose children are all `Text` nodes (so it crosses inline
* marks like bold/italic but **not** block boundaries, and a block that
* contains even one inline element such as a link is silently skipped).
* That means a full chunk that spans heading + paragraph won't match as a
* single string we have to pick a shorter snippet that fits inside one
* rendered block.
*
* `buildCitationSearchCandidates` returns search strings ordered from
* "most-specific anchor" to "broadest fallback":
* 1. First sentence of the chunk (capped at `FIRST_SENTENCE_MAX`).
* 2. First `FIRST_PHRASE_WORDS` words.
* 3. Each non-trivial line of the chunk, in source order gives us a
* separate attempt for each rendered block, so a heading line with
* an inline link doesn't doom the whole jump.
* 4. Full chunk (only if it's already short enough to plausibly fit
* inside one block).
*
* The caller tries each candidate in turn set the plugin's `search`
* option, `editor.api.redecorate()`, then check the editor DOM for a
* `.citation-highlight-leaf` element. First candidate that produces one
* wins; subsequent candidates are skipped.
*/
const FIRST_SENTENCE_MAX = 120;
const FIRST_PHRASE_WORDS = 8;
const MIN_SNIPPET_LENGTH = 6;
const FULL_CHUNK_MAX = FIRST_SENTENCE_MAX * 2;
const MAX_LINE_CANDIDATES = 6;
const LINE_CANDIDATE_MAX = FIRST_SENTENCE_MAX;
function normalizeWhitespace(input: string): string {
return input.replace(/\s+/g, " ").trim();
}
/**
* Strip the markdown syntax that won't survive into the rendered editor's
* plain text, so the chunk text (which comes back from the indexer as raw
* source markdown) can be matched against the literal text values stored
* in Plate's Slate tree.
*
* Order matters: handle multi-char and "container" syntax before single-
* char emphasis, otherwise `**text**` collapses to `*text*` first.
*
* Heuristic only we don't aim to be a full markdown parser, just to
* remove the common markers (`**bold**`, `[text](url)`, `# headings`,
* `- list`, etc.) that show up in connector-doc chunks and would break
* literal substring search.
*/
export function stripMarkdownForMatch(input: string): string {
let s = input;
s = s.replace(/```[a-z0-9_+-]*\n?([\s\S]*?)```/gi, (_, body: string) => body);
s = s.replace(/<!--[\s\S]*?-->/g, " ");
s = s.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
s = s.replace(/!\[([^\]]*)\]\[[^\]]*\]/g, "$1");
s = s.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1");
s = s.replace(/\[([^\]]+)\]\[[^\]]*\]/g, "$1");
s = s.replace(/<((?:https?|mailto):[^>\s]+)>/g, "$1");
s = s.replace(/`+([^`\n]+?)`+/g, "$1");
s = s.replace(/(\*\*|__)([\s\S]+?)\1/g, "$2");
s = s.replace(/(?<!\w)([*_])([^*_\n]+?)\1(?!\w)/g, "$2");
s = s.replace(/~~([^~]+)~~/g, "$1");
s = s.replace(/^[ \t]{0,3}#{1,6}[ \t]+/gm, "");
s = s.replace(/^[ \t]{0,3}(?:=+|-+)[ \t]*$/gm, "");
s = s.replace(/^[ \t]{0,3}>+[ \t]?/gm, "");
s = s.replace(/^[ \t]*[-*+][ \t]+/gm, "");
s = s.replace(/^[ \t]*\d+\.[ \t]+/gm, "");
s = s.replace(/^[ \t]{0,3}(?:[-*_])(?:[ \t]*[-*_]){2,}[ \t]*$/gm, "");
s = s.replace(/^[ \t]*\|?(?:[ \t]*:?-+:?[ \t]*\|)+[ \t]*:?-+:?[ \t]*\|?[ \t]*$/gm, "");
s = s.replace(/\\([\\`*_{}[\]()#+\-.!~>])/g, "$1");
return s;
}
export function buildCitationSearchCandidates(rawText: string): string[] {
if (!rawText) return [];
const stripped = stripMarkdownForMatch(rawText);
const normalized = normalizeWhitespace(stripped);
if (normalized.length < MIN_SNIPPET_LENGTH) return [];
const out: string[] = [];
const seen = new Set<string>();
const push = (s: string) => {
const t = normalizeWhitespace(s);
if (t.length >= MIN_SNIPPET_LENGTH && !seen.has(t)) {
out.push(t);
seen.add(t);
}
};
const sentenceMatch = normalized.match(/^[^.!?]+[.!?]/);
if (sentenceMatch) {
const sentence = sentenceMatch[0];
push(sentence.length > FIRST_SENTENCE_MAX ? sentence.slice(0, FIRST_SENTENCE_MAX) : sentence);
} else if (normalized.length > FIRST_SENTENCE_MAX) {
push(normalized.slice(0, FIRST_SENTENCE_MAX));
}
const words = normalized.split(" ").filter(Boolean);
if (words.length > FIRST_PHRASE_WORDS) {
push(words.slice(0, FIRST_PHRASE_WORDS).join(" "));
}
// Per-line candidates: each chunk line is roughly one block in the
// rendered editor. Trying them in order gives us a separate decorate
// attempt for each block, which matters when the first line is a
// heading containing a link (Plate's `FindReplacePlugin` will skip
// any block whose children aren't all text nodes).
const rawLines = stripped.split(/\r?\n/);
let lineCount = 0;
for (const line of rawLines) {
if (lineCount >= MAX_LINE_CANDIDATES) break;
const trimmed = normalizeWhitespace(line);
if (trimmed.length < MIN_SNIPPET_LENGTH) continue;
push(trimmed.length > LINE_CANDIDATE_MAX ? trimmed.slice(0, LINE_CANDIDATE_MAX) : trimmed);
lineCount++;
}
if (normalized.length <= FULL_CHUNK_MAX) {
push(normalized);
}
return out;
}

View file

@ -36,6 +36,7 @@
"@platejs/code-block": "^52.0.11", "@platejs/code-block": "^52.0.11",
"@platejs/combobox": "^52.0.15", "@platejs/combobox": "^52.0.15",
"@platejs/dnd": "^52.0.11", "@platejs/dnd": "^52.0.11",
"@platejs/find-replace": "^52.3.10",
"@platejs/floating": "^52.0.11", "@platejs/floating": "^52.0.11",
"@platejs/indent": "^52.0.11", "@platejs/indent": "^52.0.11",
"@platejs/link": "^52.0.11", "@platejs/link": "^52.0.11",

View file

@ -53,6 +53,9 @@ importers:
'@platejs/dnd': '@platejs/dnd':
specifier: ^52.0.11 specifier: ^52.0.11
version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4) version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
'@platejs/find-replace':
specifier: ^52.3.10
version: 52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
'@platejs/floating': '@platejs/floating':
specifier: ^52.0.11 specifier: ^52.0.11
version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4) version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
@ -2827,6 +2830,13 @@ packages:
react-dnd-html5-backend: '>=14.0.0' react-dnd-html5-backend: '>=14.0.0'
react-dom: '>=18.0.0' react-dom: '>=18.0.0'
'@platejs/find-replace@52.3.10':
resolution: {integrity: sha512-V/MOMMUYxHfEn/skd2+YO213xSATFDVsl8FzVzVRV/XaxwwVefH2EPD1lAVIvmYjennTVTTsHHtEI9K9iOsEaA==}
peerDependencies:
platejs: '>=52.0.11'
react: '>=18.0.0'
react-dom: '>=18.0.0'
'@platejs/floating@52.0.11': '@platejs/floating@52.0.11':
resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==} resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==}
peerDependencies: peerDependencies:
@ -11105,6 +11115,13 @@ snapshots:
react-dnd-html5-backend: 16.0.1 react-dnd-html5-backend: 16.0.1
react-dom: 19.2.4(react@19.2.4) react-dom: 19.2.4(react@19.2.4)
'@platejs/find-replace@52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
dependencies:
platejs: 52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4))
react: 19.2.4
react-compiler-runtime: 1.0.0(react@19.2.4)
react-dom: 19.2.4(react@19.2.4)
'@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)': '@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
dependencies: dependencies:
'@floating-ui/core': 1.7.4 '@floating-ui/core': 1.7.4