mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 13:22:41 +02:00
feat: various UI fixes, prompt optimizations, and allowing duplicate docs
- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths. - Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`. - Added detailed comments for clarity on the changes and their implications. - Introduced new citation handling in the editor for improved user experience with citation jumps. - Updated package dependencies in the frontend for better functionality.
This commit is contained in:
parent
e6433f78c4
commit
b9a66cb417
26 changed files with 1540 additions and 852 deletions
|
|
@ -0,0 +1,107 @@
|
|||
"""133_drop_documents_content_hash_unique
|
||||
|
||||
Revision ID: 133
|
||||
Revises: 132
|
||||
Create Date: 2026-04-29
|
||||
|
||||
Drop the global UNIQUE constraint on ``documents.content_hash`` so the
|
||||
new-chat agent's ``write_file`` flow can persist legitimate file copies
|
||||
(two paths, identical content) without hitting a constraint that mirrors
|
||||
no real filesystem semantic.
|
||||
|
||||
Path uniqueness still lives on ``documents.unique_identifier_hash`` (per
|
||||
search space), which is the right invariant — exactly like an inode at a
|
||||
given path on a POSIX filesystem.
|
||||
|
||||
The non-unique INDEX on ``content_hash`` is preserved so connector
|
||||
indexers' "have we seen this content before?" lookup
|
||||
(:func:`app.tasks.document_processors.base.check_duplicate_document`,
|
||||
which already uses ``.scalars().first()`` and is therefore tolerant of
|
||||
duplicates) stays cheap.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "133"
|
||||
down_revision: str | None = "132"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def _existing_constraint_names(bind, table: str) -> set[str]:
|
||||
inspector = inspect(bind)
|
||||
return {c["name"] for c in inspector.get_unique_constraints(table)}
|
||||
|
||||
|
||||
def _existing_index_names(bind, table: str) -> set[str]:
|
||||
inspector = inspect(bind)
|
||||
return {i["name"] for i in inspector.get_indexes(table)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
|
||||
# Both the named UniqueConstraint (added in revision 8) and the
|
||||
# implicit-unique-index variant SQLAlchemy may emit need draining.
|
||||
constraints = _existing_constraint_names(bind, "documents")
|
||||
if "uq_documents_content_hash" in constraints:
|
||||
op.drop_constraint(
|
||||
"uq_documents_content_hash", "documents", type_="unique"
|
||||
)
|
||||
|
||||
indexes = _existing_index_names(bind, "documents")
|
||||
# Some Postgres versions surface the unique constraint via a unique
|
||||
# index of the same name; check for that too.
|
||||
for idx_name in ("uq_documents_content_hash",):
|
||||
if idx_name in indexes:
|
||||
op.drop_index(idx_name, table_name="documents")
|
||||
|
||||
# Ensure the non-unique index is present for fast lookups.
|
||||
if "ix_documents_content_hash" not in indexes:
|
||||
op.create_index(
|
||||
"ix_documents_content_hash",
|
||||
"documents",
|
||||
["content_hash"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
|
||||
# Re-applying UNIQUE is destructive: there may now be legitimate
|
||||
# duplicates (e.g. two NOTE documents that share content because the
|
||||
# user explicitly copied one to a new path). To avoid the migration
|
||||
# silently deleting user data, we keep only the lowest-id row per
|
||||
# content_hash — same strategy revision 8 used when first introducing
|
||||
# the constraint.
|
||||
op.execute(
|
||||
"""
|
||||
DELETE FROM documents
|
||||
WHERE id NOT IN (
|
||||
SELECT MIN(id)
|
||||
FROM documents
|
||||
GROUP BY content_hash
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
indexes = _existing_index_names(bind, "documents")
|
||||
if "ix_documents_content_hash" in indexes:
|
||||
op.drop_index("ix_documents_content_hash", table_name="documents")
|
||||
|
||||
op.create_index(
|
||||
"ix_documents_content_hash",
|
||||
"documents",
|
||||
["content_hash"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_unique_constraint(
|
||||
"uq_documents_content_hash", "documents", ["content_hash"]
|
||||
)
|
||||
|
|
@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState
|
|||
from langchain_core.callbacks import dispatch_custom_event
|
||||
from langgraph.runtime import Runtime
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
|
|
@ -150,10 +151,11 @@ async def _create_document(
|
|||
virtual_path,
|
||||
search_space_id,
|
||||
)
|
||||
# Guard against the unique_identifier_hash constraint: another row at the
|
||||
# same virtual_path (this search space) already owns the hash. Callers are
|
||||
# expected to upsert via the wrapper, but this defends against bypasses
|
||||
# and gives a clean ValueError instead of a session-poisoning IntegrityError.
|
||||
# Filesystem-parity invariant: the only thing that *must* be unique is
|
||||
# the path. Two notes can legitimately share content (e.g. ``cp a b``).
|
||||
# Guard against the path-derived ``unique_identifier_hash`` constraint
|
||||
# so we surface a clean ValueError instead of letting the INSERT poison
|
||||
# the session with an IntegrityError.
|
||||
path_collision = await session.execute(
|
||||
select(Document.id).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
|
|
@ -165,17 +167,14 @@ async def _create_document(
|
|||
f"a document already exists at path '{virtual_path}' "
|
||||
"(unique_identifier_hash collision)"
|
||||
)
|
||||
# ``content_hash`` is intentionally NOT checked for uniqueness here.
|
||||
# In a real filesystem two files at different paths can hold identical
|
||||
# bytes, and the agent's ``write_file`` path needs that semantic to
|
||||
# support copy/duplicate operations. The hash remains useful as a
|
||||
# change-detection hint for connector indexers, which still consult it
|
||||
# via :func:`check_duplicate_document` but do so with a non-unique
|
||||
# lookup (``.first()``).
|
||||
content_hash = generate_content_hash(content, search_space_id)
|
||||
content_collision = await session.execute(
|
||||
select(Document.id).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.content_hash == content_hash,
|
||||
)
|
||||
)
|
||||
if content_collision.scalar_one_or_none() is not None:
|
||||
raise ValueError(
|
||||
f"a document with identical content already exists for path '{virtual_path}'"
|
||||
)
|
||||
doc = Document(
|
||||
title=title,
|
||||
document_type=DocumentType.NOTE,
|
||||
|
|
@ -493,19 +492,43 @@ async def commit_staged_filesystem_state(
|
|||
}
|
||||
)
|
||||
else:
|
||||
# Wrap each create in a SAVEPOINT so a residual
|
||||
# ``IntegrityError`` (e.g. a deployment that hasn't run
|
||||
# migration 133 yet, where ``documents.content_hash``
|
||||
# still carries its legacy global UNIQUE constraint)
|
||||
# rolls back only this one create instead of poisoning
|
||||
# the whole turn's transaction.
|
||||
try:
|
||||
new_doc = await _create_document(
|
||||
session,
|
||||
virtual_path=path,
|
||||
content=content,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=created_by_id,
|
||||
)
|
||||
async with session.begin_nested():
|
||||
new_doc = await _create_document(
|
||||
session,
|
||||
virtual_path=path,
|
||||
content=content,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=created_by_id,
|
||||
)
|
||||
except ValueError as exc:
|
||||
logger.warning(
|
||||
"kb_persistence: skipping %s create: %s", path, exc
|
||||
)
|
||||
continue
|
||||
except IntegrityError as exc:
|
||||
# The path-uniqueness check above already protected
|
||||
# against ``unique_identifier_hash`` collisions, so
|
||||
# the most likely culprit is the legacy
|
||||
# ``ix_documents_content_hash`` UNIQUE constraint
|
||||
# that migration 133 drops. Log loudly so operators
|
||||
# know to run the migration; do NOT silently swallow.
|
||||
msg = str(exc.orig) if exc.orig is not None else str(exc)
|
||||
logger.error(
|
||||
"kb_persistence: IntegrityError creating %s: %s. "
|
||||
"If this mentions content_hash, run alembic "
|
||||
"upgrade to apply migration 133 which drops the "
|
||||
"global UNIQUE constraint on documents.content_hash.",
|
||||
path,
|
||||
msg,
|
||||
)
|
||||
continue
|
||||
doc_id_by_path[path] = new_doc.id
|
||||
committed_creates.append(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -38,12 +38,38 @@ from app.db import ChatVisibility
|
|||
# Provider variant detection
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
ProviderVariant = str # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"
|
||||
# String literal alias for the supported provider-specific prompt variants.
|
||||
# When adding a new variant, also drop a matching ``providers/<variant>.md``
|
||||
# file in this package and (if appropriate) extend the regex matchers below.
|
||||
#
|
||||
# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
|
||||
# to SurfSense's "supplemental hints" architecture (each fragment is a
|
||||
# focused style nudge, NOT a full system prompt — the main prompt is
|
||||
# already assembled from base/ + tools/ + routing/).
|
||||
ProviderVariant = str
|
||||
# Known values:
|
||||
# "anthropic" — Claude family (XML-friendly, narrative todos)
|
||||
# "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
|
||||
# "openai_classic" — GPT-4 family (autonomous persistence)
|
||||
# "openai_codex" — gpt-*-codex (code-purist, terse, file:line refs)
|
||||
# "google" — Gemini (formal, <3-line, numbered workflow)
|
||||
# "kimi" — Moonshot Kimi-K* (action-bias, parallel tools)
|
||||
# "grok" — xAI Grok (extreme-terse, one-word ok)
|
||||
# "deepseek" — DeepSeek V3 / R1 (terse, R1-aware reasoning)
|
||||
# "default" — fallback, no provider-specific block emitted
|
||||
|
||||
# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
|
||||
# More specific patterns must come first (e.g. ``codex`` before
|
||||
# ``openai_reasoning`` because codex model ids contain ``gpt``).
|
||||
|
||||
_OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE)
|
||||
_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
|
||||
_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
|
||||
_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
|
||||
_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
|
||||
_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
|
||||
_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
|
||||
_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
|
||||
|
||||
|
||||
def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
||||
|
|
@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
|||
|
||||
Heuristic match on the model id; returns ``"default"`` when nothing
|
||||
matches so the composer can fall back to the empty placeholder file.
|
||||
|
||||
Order is significant: more-specific patterns are tried first so
|
||||
``gpt-5-codex`` routes to ``"openai_codex"`` rather than
|
||||
``"openai_reasoning"`` (mirrors OpenCode's
|
||||
``packages/opencode/src/session/system.ts`` dispatch).
|
||||
"""
|
||||
if not model_name:
|
||||
return "default"
|
||||
name = model_name.strip()
|
||||
if _OPENAI_CODEX_RE.search(name):
|
||||
return "openai_codex"
|
||||
if _OPENAI_REASONING_RE.search(name):
|
||||
return "openai_reasoning"
|
||||
if _OPENAI_CLASSIC_RE.search(name):
|
||||
|
|
@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
|||
return "anthropic"
|
||||
if _GOOGLE_RE.search(name):
|
||||
return "google"
|
||||
if _KIMI_RE.search(name):
|
||||
return "kimi"
|
||||
if _GROK_RE.search(name):
|
||||
return "grok"
|
||||
if _DEEPSEEK_RE.search(name):
|
||||
return "deepseek"
|
||||
return "default"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,20 @@
|
|||
<provider_hints>
|
||||
You are running on an Anthropic Claude model. Use XML tags liberally to structure
|
||||
intermediate reasoning when the task is complex. Prefer step-by-step plans inside
|
||||
`<thinking>` blocks before producing the final answer.
|
||||
You are running on an Anthropic Claude model.
|
||||
|
||||
Structured reasoning:
|
||||
- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
|
||||
- For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
|
||||
|
||||
Professional objectivity:
|
||||
- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
|
||||
- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
|
||||
- Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
|
||||
|
||||
Task management:
|
||||
- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
|
||||
- Narrate progress through the todo list itself, not through chatty status lines.
|
||||
|
||||
Tool calls:
|
||||
- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
|
||||
- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
<provider_hints>
|
||||
You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
|
||||
|
||||
Reasoning hygiene (R1-aware):
|
||||
- If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
|
||||
- Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
|
||||
- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
|
||||
|
||||
Output style:
|
||||
- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
|
||||
- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
|
||||
- For factual answers, cite once with `[citation:chunk_id]` and stop.
|
||||
|
||||
Tool calls:
|
||||
- Issue independent tool calls in parallel within a single turn.
|
||||
- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
|
||||
- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
|
||||
</provider_hints>
|
||||
|
|
@ -1,4 +1,20 @@
|
|||
<provider_hints>
|
||||
You are running on a Google Gemini model. Prefer concise, structured responses.
|
||||
When using tools, follow the function-calling protocol and avoid verbose preludes.
|
||||
You are running on a Google Gemini model.
|
||||
|
||||
Output style:
|
||||
- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
|
||||
- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
|
||||
- Format with GitHub-flavoured Markdown; assume monospace rendering.
|
||||
- For one-line factual answers, just answer. No headers, no bullets.
|
||||
|
||||
Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
|
||||
1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
|
||||
2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
|
||||
3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
|
||||
4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
|
||||
|
||||
Discipline:
|
||||
- Do not take significant actions beyond the clear scope of the user's request without confirming first.
|
||||
- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
|
||||
- Path arguments must be the exact strings returned by tools; do not synthesise file paths.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
<provider_hints>
|
||||
You are running on an xAI Grok model.
|
||||
|
||||
Maximum terseness:
|
||||
- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
|
||||
- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
|
||||
- Avoid restating the user's question.
|
||||
- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
|
||||
|
||||
Tool discipline:
|
||||
- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
|
||||
- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
|
||||
|
||||
Style:
|
||||
- No emojis unless the user asked. No nested bullets, no headers for short answers.
|
||||
- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
|
||||
</provider_hints>
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
<provider_hints>
|
||||
You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
|
||||
|
||||
Action bias:
|
||||
- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
|
||||
- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
|
||||
- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
|
||||
|
||||
Tool calls:
|
||||
- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
|
||||
- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
|
||||
- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
|
||||
|
||||
Language:
|
||||
- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
|
||||
|
||||
Discipline:
|
||||
- Stay on track. Never give the user more than what they asked for.
|
||||
- Fact-check before stating anything as factual; don't fabricate citations.
|
||||
- Keep it stupidly simple. Don't overcomplicate.
|
||||
</provider_hints>
|
||||
|
|
@ -1,5 +1,21 @@
|
|||
<provider_hints>
|
||||
You are running on a classic OpenAI chat model (GPT-4 family). Use direct
|
||||
function-calling for tools. When editing files, use the standard `edit_file`
|
||||
or `write_file` tools rather than diff-based patches.
|
||||
You are running on a classic OpenAI chat model (GPT-4 family).
|
||||
|
||||
Persistence:
|
||||
- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
|
||||
- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
|
||||
- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
|
||||
|
||||
Planning:
|
||||
- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
|
||||
- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
|
||||
|
||||
Output style:
|
||||
- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
|
||||
- Don't dump tool output verbatim — summarise the relevant lines.
|
||||
- Don't add a closing recap unless the user asked for one. After completing the work, just stop.
|
||||
|
||||
Tool calls:
|
||||
- Issue independent tool calls in parallel within one response.
|
||||
- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
<provider_hints>
|
||||
You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
|
||||
|
||||
Output style:
|
||||
- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
|
||||
- Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
|
||||
- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
|
||||
- Skip headers and heavy formatting for simple confirmations.
|
||||
- No emojis, no em-dashes, no nested bullets. Single-level lists only.
|
||||
|
||||
Code & structured-output tasks:
|
||||
- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
|
||||
- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
|
||||
- For multi-line snippets use fenced code blocks with a language tag.
|
||||
|
||||
Tool calls:
|
||||
- Run independent tool calls in parallel; chain only when later calls need earlier results.
|
||||
- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
|
||||
</provider_hints>
|
||||
|
|
@ -1,5 +1,21 @@
|
|||
<provider_hints>
|
||||
You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and
|
||||
direct in your responses. When editing files, prefer the `apply_patch` tool format
|
||||
where available. Avoid restating the user request before answering.
|
||||
You are running on an OpenAI reasoning model (GPT-5+ / o-series).
|
||||
|
||||
Output style:
|
||||
- Be terse and direct. Don't restate the user's request before answering.
|
||||
- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
|
||||
- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
|
||||
- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
|
||||
- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
|
||||
|
||||
Channels (for clients that support them):
|
||||
- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
|
||||
- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
|
||||
|
||||
Tool calls:
|
||||
- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
|
||||
- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
|
||||
|
||||
Autonomy:
|
||||
- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin):
|
|||
document_metadata = Column(JSON, nullable=True)
|
||||
|
||||
content = Column(Text, nullable=False)
|
||||
content_hash = Column(String, nullable=False, index=True, unique=True)
|
||||
# ``content_hash`` is intentionally NOT globally unique. In a real
|
||||
# filesystem two files at different paths can hold identical bytes,
|
||||
# and the agent's ``write_file`` flow needs that semantic to support
|
||||
# copy / duplicate operations. Path uniqueness lives on
|
||||
# ``unique_identifier_hash`` (per search space). The hash remains
|
||||
# indexed because connector indexers consult it as a change-detection
|
||||
# / cross-source dedup hint via :func:`check_duplicate_document`.
|
||||
# See migration 133.
|
||||
content_hash = Column(String, nullable=False, index=True)
|
||||
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
|
||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
||||
|
||||
|
|
|
|||
|
|
@ -25,17 +25,33 @@ class TestProviderVariantDetection:
|
|||
@pytest.mark.parametrize(
|
||||
"model_name,expected",
|
||||
[
|
||||
# GPT-4 family routes to "classic" (autonomous-persistence style)
|
||||
("openai:gpt-4o-mini", "openai_classic"),
|
||||
("openai:gpt-4-turbo", "openai_classic"),
|
||||
# GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
|
||||
("openai:gpt-5", "openai_reasoning"),
|
||||
("openai:gpt-5-codex", "openai_reasoning"),
|
||||
("openai:o1-preview", "openai_reasoning"),
|
||||
("openai:o3-mini", "openai_reasoning"),
|
||||
# Codex family beats reasoning (more specific). Mirrors OpenCode
|
||||
# ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
|
||||
("openai:gpt-5-codex", "openai_codex"),
|
||||
("openai:gpt-codex", "openai_codex"),
|
||||
("openai:codex-mini", "openai_codex"),
|
||||
# Anthropic + Google
|
||||
("anthropic:claude-3-5-sonnet", "anthropic"),
|
||||
("anthropic/claude-opus-4", "anthropic"),
|
||||
("google:gemini-2.0-flash", "google"),
|
||||
("vertex:gemini-1.5-pro", "google"),
|
||||
# Newly-covered families
|
||||
("moonshot:kimi-k2", "kimi"),
|
||||
("openrouter:moonshot/kimi-k2.5", "kimi"),
|
||||
("xai:grok-2", "grok"),
|
||||
("openrouter:x-ai/grok-3", "grok"),
|
||||
("openai:deepseek-v3", "deepseek"),
|
||||
("deepseek:deepseek-r1", "deepseek"),
|
||||
# Unknown families fall back to default (no provider block emitted)
|
||||
("groq:mixtral-8x7b", "default"),
|
||||
("together:llama-3.1-70b", "default"),
|
||||
(None, "default"),
|
||||
("", "default"),
|
||||
],
|
||||
|
|
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
|
|||
def test_detection(self, model_name: str | None, expected: str) -> None:
|
||||
assert detect_provider_variant(model_name) == expected
|
||||
|
||||
def test_codex_takes_precedence_over_reasoning(self) -> None:
|
||||
"""Regression guard: ``gpt-5-codex`` must NOT match the generic
|
||||
``gpt-5`` reasoning regex first. Codex is the more specialised
|
||||
prompt and mirrors OpenCode's dispatch order.
|
||||
"""
|
||||
from app.agents.new_chat.prompts.composer import detect_provider_variant
|
||||
|
||||
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
|
||||
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
|
||||
|
||||
|
||||
class TestCompose:
|
||||
def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
|
||||
|
|
@ -149,6 +175,52 @@ class TestCompose:
|
|||
prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
|
||||
assert "<provider_hints>" not in prompt
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,expected_marker",
|
||||
[
|
||||
# Each marker is a unique-ish phrase from the corresponding fragment.
|
||||
# If a fragment is renamed/rewritten such that the marker is gone,
|
||||
# update both the fragment and this test deliberately.
|
||||
("openai:gpt-5-codex", "Codex-class"),
|
||||
("openai:gpt-5", "OpenAI reasoning model"),
|
||||
("openai:gpt-4o", "classic OpenAI chat model"),
|
||||
("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
|
||||
("google:gemini-2.0-flash", "Google Gemini"),
|
||||
("moonshot:kimi-k2", "Moonshot Kimi"),
|
||||
("xai:grok-2", "xAI Grok"),
|
||||
("deepseek:deepseek-r1", "DeepSeek"),
|
||||
],
|
||||
)
|
||||
def test_each_known_variant_renders_with_its_marker(
|
||||
self,
|
||||
fixed_today: datetime,
|
||||
model_name: str,
|
||||
expected_marker: str,
|
||||
) -> None:
|
||||
"""Every supported variant must produce a ``<provider_hints>`` block
|
||||
containing its identifying marker. This pins the dispatch + the
|
||||
on-disk fragments together so a missing/renamed file is caught
|
||||
immediately.
|
||||
"""
|
||||
prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
|
||||
assert "<provider_hints>" in prompt, (
|
||||
f"variant for {model_name!r} did not emit a provider_hints block; "
|
||||
"the corresponding providers/<variant>.md may be missing"
|
||||
)
|
||||
assert expected_marker in prompt, (
|
||||
f"variant for {model_name!r} emitted hints but lacked the "
|
||||
f"expected marker {expected_marker!r} — the fragment may have "
|
||||
"drifted from the dispatch table"
|
||||
)
|
||||
|
||||
def test_provider_blocks_are_byte_stable_across_calls(
|
||||
self, fixed_today: datetime
|
||||
) -> None:
|
||||
"""Cache-stability guard: same model id → byte-identical prompt."""
|
||||
a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
|
||||
b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
|
||||
assert a == b
|
||||
|
||||
def test_custom_system_instructions_override_default(
|
||||
self, fixed_today: datetime
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,168 @@
|
|||
"""Unit tests for kb_persistence filesystem-parity invariants.
|
||||
|
||||
Specifically, these tests pin down that the agent-driven write_file flow
|
||||
treats path uniqueness — not content uniqueness — as the only hard
|
||||
invariant. This mirrors a real filesystem: ``cp a b`` produces two files
|
||||
with identical bytes living at different paths, and that should round-trip
|
||||
through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.middleware import kb_persistence
|
||||
from app.db import Document
|
||||
|
||||
|
||||
class _FakeResult:
|
||||
"""Minimal stand-in for ``sqlalchemy.engine.Result``."""
|
||||
|
||||
def __init__(self, value: Any = None) -> None:
|
||||
self._value = value
|
||||
|
||||
def scalar_one_or_none(self) -> Any:
|
||||
return self._value
|
||||
|
||||
def scalar(self) -> Any:
|
||||
return self._value
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
"""Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
|
||||
|
||||
Records every ``add`` so we can assert against the resulting Documents
|
||||
and Chunks. ``execute`` always returns "no row" by default — i.e. no
|
||||
folder hierarchy preexists and no path collision exists. Tests that
|
||||
want a path collision can override that on a per-call basis.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.added: list[Any] = []
|
||||
self.execute = AsyncMock(return_value=_FakeResult(None))
|
||||
self.flush = AsyncMock()
|
||||
|
||||
# Simulate ``await session.flush()`` assigning an id to the doc;
|
||||
# we increment a counter so each Document gets a unique id.
|
||||
self._next_id = 1
|
||||
|
||||
async def _flush_assigning_ids() -> None:
|
||||
for obj in self.added:
|
||||
if getattr(obj, "id", None) is None:
|
||||
obj.id = self._next_id
|
||||
self._next_id += 1
|
||||
|
||||
self.flush.side_effect = _flush_assigning_ids
|
||||
|
||||
def add(self, obj: Any) -> None:
|
||||
self.added.append(obj)
|
||||
|
||||
def add_all(self, objs: list[Any]) -> None:
|
||||
self.added.extend(objs)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Avoid loading the embedding model in unit tests."""
|
||||
monkeypatch.setattr(
|
||||
kb_persistence,
|
||||
"embed_texts",
|
||||
lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
|
||||
)
|
||||
monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_allows_identical_content_at_different_paths() -> None:
|
||||
"""The core regression: ``cp /a/notes.md /b/notes-copy.md``.
|
||||
|
||||
Both create calls must succeed even though the bytes are byte-for-byte
|
||||
identical, because path is the only filesystem-style unique key.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
content = "# Same body\n\nIdentical content used by two different paths.\n"
|
||||
|
||||
first = await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/a/notes.md",
|
||||
content=content,
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
assert isinstance(first, Document)
|
||||
assert first.title == "notes.md"
|
||||
|
||||
# Second create with byte-identical content at a different path should
|
||||
# not raise — that's the whole point of the filesystem-parity fix.
|
||||
second = await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/b/notes-copy.md",
|
||||
content=content,
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
assert isinstance(second, Document)
|
||||
assert second.title == "notes-copy.md"
|
||||
|
||||
# Both rows share the same content_hash but live at distinct paths
|
||||
# (distinct ``unique_identifier_hash``). That's the desired contract.
|
||||
assert first.content_hash == second.content_hash
|
||||
assert first.unique_identifier_hash != second.unique_identifier_hash
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_still_rejects_path_collision() -> None:
|
||||
"""Path uniqueness remains the hard invariant.
|
||||
|
||||
If ``unique_identifier_hash`` already points at an existing row in
|
||||
the same search space, the create call must raise ``ValueError``
|
||||
with a clear message — matching the behavior the commit loop relies
|
||||
on to upsert via the existing-row code path.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
|
||||
# Path with no folder parts so ``_ensure_folder_hierarchy`` is a
|
||||
# no-op and the only SELECT executed is the path-collision check.
|
||||
# That SELECT returns an existing doc id, triggering the guard.
|
||||
session.execute = AsyncMock(return_value=_FakeResult(value=99))
|
||||
|
||||
with pytest.raises(ValueError, match="already exists at path"):
|
||||
await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/notes.md",
|
||||
content="anything",
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_does_not_query_for_content_hash_collision(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Regression guard: the legacy second SELECT (content_hash collision
|
||||
pre-check) must be gone. Counting ``execute`` calls is a brittle but
|
||||
effective way to lock that in.
|
||||
|
||||
The current flow runs exactly one ``execute`` for the path-collision
|
||||
SELECT (no folder parts in this path → ``_ensure_folder_hierarchy``
|
||||
short-circuits). If a future refactor reintroduces a content-hash
|
||||
SELECT, this test will fail loud.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/notes.md",
|
||||
content="hello",
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
# Path-collision SELECT only. No content_hash SELECT.
|
||||
assert session.execute.await_count == 1, (
|
||||
f"Unexpected execute count {session.execute.await_count}; "
|
||||
"did the legacy content_hash collision pre-check get re-added?"
|
||||
)
|
||||
|
|
@ -210,6 +210,27 @@ button {
|
|||
}
|
||||
}
|
||||
|
||||
/* Citation-jump highlight — entrance pulse only. The `SearchHighlightLeaf`
|
||||
(see components/ui/search-highlight-node.tsx) is otherwise statically
|
||||
tinted; this animation runs once on mount to draw the eye to the cited
|
||||
text after `scrollIntoView` lands. The highlight itself is permanent
|
||||
until the user clicks inside the editor (or another dismissal trigger
|
||||
fires in `EditorPanelContent`). */
|
||||
@keyframes citation-flash-in {
|
||||
0% {
|
||||
background-color: transparent;
|
||||
box-shadow: 0 0 0 0 transparent;
|
||||
}
|
||||
40% {
|
||||
background-color: color-mix(in oklab, var(--primary) 30%, transparent);
|
||||
box-shadow: 0 0 0 3px color-mix(in oklab, var(--primary) 25%, transparent);
|
||||
}
|
||||
100% {
|
||||
background-color: color-mix(in oklab, var(--primary) 15%, transparent);
|
||||
box-shadow: 0 0 0 1px color-mix(in oklab, var(--primary) 40%, transparent);
|
||||
}
|
||||
}
|
||||
|
||||
/* Human-in-the-loop approval card animations */
|
||||
@keyframes pulse-subtle {
|
||||
0%,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
import { atom } from "jotai";
|
||||
|
||||
/**
|
||||
* Cross-component handoff for citation jumps. Set by `InlineCitation` when a
|
||||
* numeric chunk badge is clicked (after the document has been resolved); read
|
||||
* by `DocumentTabContent` once the matching document tab mounts so it can
|
||||
* scroll to and softly highlight the cited chunk inside the rendered markdown.
|
||||
*
|
||||
* Cleared by `DocumentTabContent` only after a terminal state — exact /
|
||||
* approximate / miss — has been reached, so that an escalation refetch (2MB
|
||||
* preview → 16MB) keeps the pending intent alive across the re-render.
|
||||
*/
|
||||
export interface PendingChunkHighlight {
|
||||
documentId: number;
|
||||
chunkId: number;
|
||||
chunkText: string;
|
||||
}
|
||||
|
||||
export const pendingChunkHighlightAtom = atom<PendingChunkHighlight | null>(null);
|
||||
|
|
@ -1,26 +1,45 @@
|
|||
"use client";
|
||||
|
||||
import { FileText } from "lucide-react";
|
||||
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
||||
import { useSetAtom } from "jotai";
|
||||
import { ExternalLink, FileText } from "lucide-react";
|
||||
import type { FC } from "react";
|
||||
import { useState } from "react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
|
||||
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
|
||||
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
|
||||
import { SourceDetailPanel } from "@/components/new-chat/source-detail-panel";
|
||||
import { MarkdownViewer } from "@/components/markdown-viewer";
|
||||
import { Citation } from "@/components/tool-ui/citation";
|
||||
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
|
||||
import { Spinner } from "@/components/ui/spinner";
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
|
||||
import { documentsApiService } from "@/lib/apis/documents-api.service";
|
||||
import { cacheKeys } from "@/lib/query-client/cache-keys";
|
||||
|
||||
interface InlineCitationProps {
|
||||
chunkId: number;
|
||||
isDocsChunk?: boolean;
|
||||
}
|
||||
|
||||
const POPOVER_HOVER_CLOSE_DELAY_MS = 150;
|
||||
|
||||
/**
|
||||
* Inline citation for knowledge-base chunks (numeric chunk IDs).
|
||||
* Renders a clickable badge showing the actual chunk ID that opens the SourceDetailPanel.
|
||||
* Negative chunk IDs indicate anonymous/synthetic uploads and render as a static badge.
|
||||
* Inline citation badge for knowledge-base chunks (numeric chunk IDs) and
|
||||
* Surfsense documentation chunks (`isDocsChunk`). Negative chunk IDs render as
|
||||
* a static "doc" pill (anonymous/synthetic uploads).
|
||||
*
|
||||
* Numeric KB chunks: clicking resolves the parent document via
|
||||
* `getDocumentByChunk`, opens the document in the right side panel (alongside
|
||||
* the chat — does not replace it), and stages the cited chunk text in
|
||||
* `pendingChunkHighlightAtom` so `EditorPanelContent` can scroll to and softly
|
||||
* highlight it inside the rendered markdown.
|
||||
*
|
||||
* Surfsense docs chunks: rendered as a hover-controlled shadcn Popover that
|
||||
* lazily fetches and previews the cited chunk inline, since those docs aren't
|
||||
* indexed into the user's search space and have no tab to open.
|
||||
*/
|
||||
export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk = false }) => {
|
||||
const [isOpen, setIsOpen] = useState(false);
|
||||
|
||||
if (chunkId < 0) {
|
||||
return (
|
||||
<Tooltip>
|
||||
|
|
@ -38,26 +57,185 @@ export const InlineCitation: FC<InlineCitationProps> = ({ chunkId, isDocsChunk =
|
|||
);
|
||||
}
|
||||
|
||||
if (isDocsChunk) {
|
||||
return <SurfsenseDocCitation chunkId={chunkId} />;
|
||||
}
|
||||
|
||||
return <NumericChunkCitation chunkId={chunkId} />;
|
||||
};
|
||||
|
||||
const NumericChunkCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||
const queryClient = useQueryClient();
|
||||
const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
|
||||
const openEditorPanel = useSetAtom(openEditorPanelAtom);
|
||||
const [resolving, setResolving] = useState(false);
|
||||
|
||||
const handleClick = useCallback(async () => {
|
||||
if (resolving) return;
|
||||
setResolving(true);
|
||||
console.log("[citation:click] start", { chunkId });
|
||||
try {
|
||||
const data = await queryClient.fetchQuery({
|
||||
// Local key with explicit window. The shared `cacheKeys.documents.byChunk`
|
||||
// is window-agnostic (latent footgun); namespace the call to avoid
|
||||
// reusing a different-window cached result.
|
||||
queryKey: ["documents", "by-chunk", chunkId, "w0"] as const,
|
||||
queryFn: () =>
|
||||
documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 0 }),
|
||||
staleTime: 5 * 60 * 1000,
|
||||
});
|
||||
const cited = data.chunks.find((c) => c.id === chunkId) ?? data.chunks[0];
|
||||
console.log("[citation:click] fetched doc-by-chunk", {
|
||||
docId: data.id,
|
||||
docTitle: data.title,
|
||||
chunksReturned: data.chunks.length,
|
||||
citedChunkId: cited?.id,
|
||||
citedChunkContentLen: cited?.content?.length ?? 0,
|
||||
citedChunkPreview:
|
||||
cited?.content && cited.content.length > 120
|
||||
? `${cited.content.slice(0, 120)}…(+${cited.content.length - 120})`
|
||||
: (cited?.content ?? ""),
|
||||
});
|
||||
// Stage the highlight BEFORE opening the panel so `EditorPanelContent`
|
||||
// already sees the pending intent on its very first render — avoids a
|
||||
// "fetch → render → no-pending → next-tick render with pending" race.
|
||||
setPendingHighlight({
|
||||
documentId: data.id,
|
||||
chunkId,
|
||||
chunkText: cited?.content ?? "",
|
||||
});
|
||||
openEditorPanel({
|
||||
documentId: data.id,
|
||||
searchSpaceId: data.search_space_id,
|
||||
title: data.title,
|
||||
});
|
||||
console.log("[citation:click] staged highlight + opened editor panel", {
|
||||
documentId: data.id,
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn("[citation:click] failed", err);
|
||||
toast.error(err instanceof Error ? err.message : "Couldn't open cited document");
|
||||
} finally {
|
||||
setResolving(false);
|
||||
}
|
||||
}, [chunkId, openEditorPanel, queryClient, resolving, setPendingHighlight]);
|
||||
|
||||
return (
|
||||
<SourceDetailPanel
|
||||
open={isOpen}
|
||||
onOpenChange={setIsOpen}
|
||||
chunkId={chunkId}
|
||||
sourceType={isDocsChunk ? "SURFSENSE_DOCS" : ""}
|
||||
title={isDocsChunk ? "Surfsense Documentation" : "Source"}
|
||||
description=""
|
||||
url=""
|
||||
isDocsChunk={isDocsChunk}
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleClick}
|
||||
disabled={resolving}
|
||||
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none disabled:cursor-progress disabled:opacity-70"
|
||||
title={`View source chunk #${chunkId}`}
|
||||
aria-label={`Jump to cited chunk ${chunkId}`}
|
||||
>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setIsOpen(true)}
|
||||
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center rounded-md bg-muted/60 px-1.5 text-[11px] font-medium text-muted-foreground align-baseline shadow-sm transition-colors hover:bg-muted hover:text-foreground focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
|
||||
title={`View source chunk #${chunkId}`}
|
||||
{resolving ? <Spinner size="xs" /> : chunkId}
|
||||
</button>
|
||||
);
|
||||
};
|
||||
|
||||
const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||
const [open, setOpen] = useState(false);
|
||||
const closeTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
|
||||
const cancelClose = useCallback(() => {
|
||||
if (closeTimerRef.current) {
|
||||
clearTimeout(closeTimerRef.current);
|
||||
closeTimerRef.current = null;
|
||||
}
|
||||
}, []);
|
||||
|
||||
const scheduleClose = useCallback(() => {
|
||||
cancelClose();
|
||||
closeTimerRef.current = setTimeout(() => {
|
||||
setOpen(false);
|
||||
closeTimerRef.current = null;
|
||||
}, POPOVER_HOVER_CLOSE_DELAY_MS);
|
||||
}, [cancelClose]);
|
||||
|
||||
useEffect(() => () => cancelClose(), [cancelClose]);
|
||||
|
||||
const { data, isLoading, error } = useQuery({
|
||||
queryKey: cacheKeys.documents.byChunk(`doc-${chunkId}`),
|
||||
queryFn: () => documentsApiService.getSurfsenseDocByChunk(chunkId),
|
||||
enabled: open,
|
||||
staleTime: 5 * 60 * 1000,
|
||||
});
|
||||
|
||||
const citedChunk = data?.chunks.find((c) => c.id === chunkId) ?? data?.chunks[0];
|
||||
|
||||
return (
|
||||
<Popover open={open} onOpenChange={setOpen}>
|
||||
<PopoverTrigger asChild>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setOpen((prev) => !prev)}
|
||||
onMouseEnter={() => {
|
||||
cancelClose();
|
||||
setOpen(true);
|
||||
}}
|
||||
onMouseLeave={scheduleClose}
|
||||
onFocus={() => {
|
||||
cancelClose();
|
||||
setOpen(true);
|
||||
}}
|
||||
onBlur={scheduleClose}
|
||||
className="ml-0.5 inline-flex h-5 min-w-5 cursor-pointer items-center justify-center gap-0.5 rounded-md bg-primary/10 px-1.5 text-[11px] font-medium text-primary align-baseline shadow-sm transition-colors hover:bg-primary/15 focus-visible:ring-ring focus-visible:ring-2 focus-visible:outline-none"
|
||||
aria-label={`Show Surfsense documentation chunk ${chunkId}`}
|
||||
title="Surfsense documentation"
|
||||
>
|
||||
<FileText className="size-3" />
|
||||
doc
|
||||
</button>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent
|
||||
className="w-96 max-w-[calc(100vw-2rem)] p-0"
|
||||
align="start"
|
||||
sideOffset={6}
|
||||
onMouseEnter={cancelClose}
|
||||
onMouseLeave={scheduleClose}
|
||||
onOpenAutoFocus={(e) => e.preventDefault()}
|
||||
>
|
||||
{chunkId}
|
||||
</button>
|
||||
</SourceDetailPanel>
|
||||
<div className="flex items-center justify-between gap-2 border-b px-3 py-2">
|
||||
<div className="min-w-0">
|
||||
<p className="truncate text-sm font-medium">
|
||||
{data?.title ?? "Surfsense documentation"}
|
||||
</p>
|
||||
<p className="text-[11px] text-muted-foreground">Chunk #{chunkId}</p>
|
||||
</div>
|
||||
{data?.source && (
|
||||
<a
|
||||
href={data.source}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-flex shrink-0 items-center gap-1 rounded-md px-2 py-1 text-[11px] font-medium text-primary hover:bg-primary/10"
|
||||
>
|
||||
<ExternalLink className="size-3" />
|
||||
Open
|
||||
</a>
|
||||
)}
|
||||
</div>
|
||||
<div className="max-h-72 overflow-auto px-3 py-2 text-sm">
|
||||
{isLoading && (
|
||||
<div className="flex items-center gap-2 py-4 text-muted-foreground">
|
||||
<Spinner size="xs" />
|
||||
<span className="text-xs">Loading…</span>
|
||||
</div>
|
||||
)}
|
||||
{error && (
|
||||
<p className="py-4 text-xs text-destructive">
|
||||
{error instanceof Error ? error.message : "Failed to load chunk"}
|
||||
</p>
|
||||
)}
|
||||
{!isLoading && !error && citedChunk?.content && (
|
||||
<MarkdownViewer content={citedChunk.content} maxLength={1500} />
|
||||
)}
|
||||
{!isLoading && !error && !citedChunk?.content && (
|
||||
<p className="py-4 text-xs text-muted-foreground">No content available.</p>
|
||||
)}
|
||||
</div>
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
);
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"use client";
|
||||
|
||||
import { FindReplacePlugin } from "@platejs/find-replace";
|
||||
import { useAtomValue, useSetAtom } from "jotai";
|
||||
import {
|
||||
Check,
|
||||
|
|
@ -14,17 +15,21 @@ import {
|
|||
import dynamic from "next/dynamic";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { pendingChunkHighlightAtom } from "@/atoms/document-viewer/pending-chunk-highlight.atom";
|
||||
import { closeEditorPanelAtom, editorPanelAtom } from "@/atoms/editor/editor-panel.atom";
|
||||
import { VersionHistoryButton } from "@/components/documents/version-history";
|
||||
import type { PlateEditorInstance } from "@/components/editor/plate-editor";
|
||||
import { SourceCodeEditor } from "@/components/editor/source-code-editor";
|
||||
import { MarkdownViewer } from "@/components/markdown-viewer";
|
||||
import { Alert, AlertDescription } from "@/components/ui/alert";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Drawer, DrawerContent, DrawerHandle, DrawerTitle } from "@/components/ui/drawer";
|
||||
import { CITATION_HIGHLIGHT_CLASS } from "@/components/ui/search-highlight-node";
|
||||
import { Spinner } from "@/components/ui/spinner";
|
||||
import { useMediaQuery } from "@/hooks/use-media-query";
|
||||
import { useElectronAPI } from "@/hooks/use-platform";
|
||||
import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils";
|
||||
import { buildCitationSearchCandidates } from "@/lib/citation-search";
|
||||
import { inferMonacoLanguageFromPath } from "@/lib/editor-language";
|
||||
|
||||
const PlateEditor = dynamic(
|
||||
|
|
@ -32,7 +37,10 @@ const PlateEditor = dynamic(
|
|||
{ ssr: false, loading: () => <EditorPanelSkeleton /> }
|
||||
);
|
||||
|
||||
type CitationHighlightStatus = "exact" | "miss";
|
||||
|
||||
const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB
|
||||
const CITATION_MAX_LENGTH = 16 * 1024 * 1024; // 16MB on-demand cap for citation jumps
|
||||
|
||||
interface EditorContent {
|
||||
document_id: number;
|
||||
|
|
@ -136,6 +144,61 @@ export function EditorPanelContent({
|
|||
const [displayTitle, setDisplayTitle] = useState(title || "Untitled");
|
||||
const isLocalFileMode = kind === "local_file";
|
||||
const editorRenderMode: EditorRenderMode = isLocalFileMode ? "source_code" : "rich_markdown";
|
||||
|
||||
// --- Citation-jump highlight wiring ----------------------------------
|
||||
// `EditorPanelContent` is the consumer of `pendingChunkHighlightAtom`: when
|
||||
// a citation badge is clicked, the badge stages `{documentId, chunkId,
|
||||
// chunkText}` and opens this panel. We drive Plate's `FindReplacePlugin`
|
||||
// (registered in every preset) to highlight the cited text natively via
|
||||
// Slate decorations — no DOM walking, no Range gymnastics. The state
|
||||
// machine below escalates the document fetch from 2MB → 16MB once if no
|
||||
// candidate snippet matched in the preview, and surfaces miss outcomes
|
||||
// via an inline alert.
|
||||
const pending = useAtomValue(pendingChunkHighlightAtom);
|
||||
const setPendingHighlight = useSetAtom(pendingChunkHighlightAtom);
|
||||
const [fetchKey, setFetchKey] = useState(0);
|
||||
const [maxLengthOverride, setMaxLengthOverride] = useState<number | null>(null);
|
||||
const [highlightResult, setHighlightResult] = useState<CitationHighlightStatus | null>(null);
|
||||
const editorRef = useRef<PlateEditorInstance | null>(null);
|
||||
const escalatedForRef = useRef<number | null>(null);
|
||||
const lastAppliedChunkIdRef = useRef<number | null>(null);
|
||||
// Tracks whether a citation highlight is currently decorated in the
|
||||
// editor. We use a ref (not state) because the click-to-dismiss handler
|
||||
// runs in a stable callback that would otherwise close over stale state.
|
||||
const isHighlightActiveRef = useRef(false);
|
||||
// Once a citation jump targets this doc we have to keep `PlateEditor`
|
||||
// mounted for the *rest of the doc session* — even after the highlight
|
||||
// effect clears `pendingChunkHighlightAtom` (which it does as soon as
|
||||
// the decoration is applied, so a follow-up citation on the same chunk
|
||||
// can re-trigger). Without this latch, non-editable docs would re-render
|
||||
// back into `MarkdownViewer` the instant `pending` is released, tearing
|
||||
// down the Plate decorations and dropping the highlight after a frame.
|
||||
const [stickyPlateMode, setStickyPlateMode] = useState(false);
|
||||
|
||||
const clearCitationSearch = useCallback(() => {
|
||||
isHighlightActiveRef.current = false;
|
||||
const editor = editorRef.current;
|
||||
if (!editor) return;
|
||||
try {
|
||||
editor.setOption(FindReplacePlugin, "search", "");
|
||||
editor.api.redecorate();
|
||||
} catch (err) {
|
||||
console.warn("[EditorPanelContent] clearCitationSearch failed:", err);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Dismiss the highlight when the user interacts with the editor surface.
|
||||
// `onPointerDown` fires before focus / selection changes so the click
|
||||
// itself feels responsive — the highlight clears in the same event tick
|
||||
// that places the cursor. No-op when nothing is highlighted, so we don't
|
||||
// thrash `redecorate` on every click in normal editing.
|
||||
const handleEditorPointerDown = useCallback(() => {
|
||||
if (!isHighlightActiveRef.current) return;
|
||||
clearCitationSearch();
|
||||
setHighlightResult(null);
|
||||
}, [clearCitationSearch]);
|
||||
|
||||
const isCitationTarget = !!pending && !isLocalFileMode && pending.documentId === documentId;
|
||||
const resolveLocalVirtualPath = useCallback(
|
||||
async (candidatePath: string): Promise<string> => {
|
||||
if (!electronAPI?.getAgentFilesystemMounts) {
|
||||
|
|
@ -155,6 +218,8 @@ export function EditorPanelContent({
|
|||
|
||||
const isLargeDocument = (editorDoc?.content_size_bytes ?? 0) > LARGE_DOCUMENT_THRESHOLD;
|
||||
|
||||
// `fetchKey` is an explicit re-fetch trigger (escalation bumps it to force
|
||||
// a new request even when documentId/searchSpaceId haven't changed).
|
||||
useEffect(() => {
|
||||
const controller = new AbortController();
|
||||
setIsLoading(true);
|
||||
|
|
@ -166,6 +231,12 @@ export function EditorPanelContent({
|
|||
setIsEditing(false);
|
||||
initialLoadDone.current = false;
|
||||
changeCountRef.current = 0;
|
||||
// Clear any in-flight FindReplacePlugin search before the editor
|
||||
// re-mounts on new content (a fresh editor key is generated below
|
||||
// from documentId + isEditing, so the previous editor + its
|
||||
// decorations are about to be discarded anyway, but we belt-and-
|
||||
// brace here for the case where only `fetchKey` changed).
|
||||
clearCitationSearch();
|
||||
|
||||
const doFetch = async () => {
|
||||
try {
|
||||
|
|
@ -210,7 +281,11 @@ export function EditorPanelContent({
|
|||
const url = new URL(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/editor-content`
|
||||
);
|
||||
url.searchParams.set("max_length", String(LARGE_DOCUMENT_THRESHOLD));
|
||||
url.searchParams.set("max_length", String(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD));
|
||||
// `fetchKey` participates here so biome's noUnusedVariables sees it
|
||||
// as consumed; bumping it forces a fresh request even when the URL
|
||||
// is otherwise identical.
|
||||
if (fetchKey > 0) url.searchParams.set("_n", String(fetchKey));
|
||||
|
||||
const response = await authenticatedFetch(url.toString(), { method: "GET" });
|
||||
|
||||
|
|
@ -256,8 +331,259 @@ export function EditorPanelContent({
|
|||
resolveLocalVirtualPath,
|
||||
searchSpaceId,
|
||||
title,
|
||||
fetchKey,
|
||||
maxLengthOverride,
|
||||
clearCitationSearch,
|
||||
]);
|
||||
|
||||
// Reset citation-jump bookkeeping whenever the panel switches to a different
|
||||
// document (or local file). Body only writes setters — the deps are the
|
||||
// real triggers we want to react to.
|
||||
// biome-ignore lint/correctness/useExhaustiveDependencies: documentId/localFilePath are intentional triggers.
|
||||
useEffect(() => {
|
||||
clearCitationSearch();
|
||||
escalatedForRef.current = null;
|
||||
lastAppliedChunkIdRef.current = null;
|
||||
setHighlightResult(null);
|
||||
setMaxLengthOverride(null);
|
||||
setFetchKey(0);
|
||||
// Drop sticky Plate mode when the panel moves to a different doc
|
||||
// — the next doc starts in its preferred render mode (Plate for
|
||||
// editable, MarkdownViewer for everything else) until/unless a
|
||||
// citation jump targets it.
|
||||
setStickyPlateMode(false);
|
||||
}, [documentId, localFilePath, clearCitationSearch]);
|
||||
|
||||
// Latch sticky Plate mode the first time a citation jump targets this
|
||||
// doc. We keep it sticky for the remainder of this doc session so the
|
||||
// highlight effect's `setPendingHighlight(null)` doesn't unmount the
|
||||
// editor mid-flight (see comment on `stickyPlateMode` declaration).
|
||||
useEffect(() => {
|
||||
if (isCitationTarget) setStickyPlateMode(true);
|
||||
}, [isCitationTarget]);
|
||||
|
||||
// `isEditorReady` is what `useEffect` actually depends on — `editorRef`
|
||||
// is a ref so changes don't trigger re-runs. We flip this to `true` once
|
||||
// `PlateEditor` calls back with its live editor instance (its
|
||||
// `usePlateEditor` value-init runs synchronously, so by the time this
|
||||
// flips true the markdown is already deserialized into the Slate tree).
|
||||
const [isEditorReady, setIsEditorReady] = useState(false);
|
||||
const handleEditorReady = useCallback((editor: PlateEditorInstance | null) => {
|
||||
console.log("[citation:editor] handleEditorReady", { ready: !!editor });
|
||||
editorRef.current = editor;
|
||||
setIsEditorReady(!!editor);
|
||||
}, []);
|
||||
|
||||
// --- Citation jump highlight effect -----------------------------------
|
||||
// Drives Plate's FindReplacePlugin to highlight the cited chunk:
|
||||
// 1. Build candidate snippets from the chunk text (first sentence,
|
||||
// first 8 words, full chunk if short). Plate's decorate runs per-
|
||||
// block and won't cross block boundaries, so the shorter
|
||||
// candidates exist to give us something that fits in one
|
||||
// paragraph / heading.
|
||||
// 2. For each candidate: setOption('search', ...) → redecorate →
|
||||
// wait two animation frames for React to flush → query the editor
|
||||
// DOM for `.${CITATION_HIGHLIGHT_CLASS}`. First hit wins.
|
||||
//
|
||||
// Why a className and not a `data-*` attribute? Plate's
|
||||
// `PlateLeaf` runs its props through `useNodeAttributes`, which
|
||||
// only forwards `attributes`, `className`, `ref`, and `style` —
|
||||
// arbitrary `data-*` attributes are silently dropped. `className`
|
||||
// is the only escape hatch guaranteed to survive into the DOM.
|
||||
// 3. On hit: smooth-scroll the first match into view, mark the
|
||||
// highlight active (so a click inside the editor can dismiss it),
|
||||
// release the pending atom.
|
||||
// 4. On terminal miss: if the doc was truncated and we haven't
|
||||
// escalated yet, bump the fetch's `max_length` to the citation
|
||||
// cap and re-fetch — the post-refetch render will re-run this
|
||||
// effect against the larger preview. Otherwise, release the
|
||||
// atom and show the miss alert.
|
||||
useEffect(() => {
|
||||
console.log("[citation:effect] fired", {
|
||||
isCitationTarget,
|
||||
pendingDocId: pending?.documentId,
|
||||
pendingChunkId: pending?.chunkId,
|
||||
pendingChunkTextLen: pending?.chunkText?.length,
|
||||
documentId,
|
||||
isLocalFileMode,
|
||||
isEditing,
|
||||
hasMarkdown: !!editorDoc?.source_markdown,
|
||||
markdownLen: editorDoc?.source_markdown?.length,
|
||||
truncated: editorDoc?.truncated,
|
||||
isEditorReady,
|
||||
editorRefSet: !!editorRef.current,
|
||||
maxLengthOverride,
|
||||
});
|
||||
if (!isCitationTarget || !pending) {
|
||||
console.log("[citation:effect] guard ✗ no citation target / no pending");
|
||||
return;
|
||||
}
|
||||
if (isLocalFileMode || isEditing) {
|
||||
console.log("[citation:effect] guard ✗ localFileMode/editing");
|
||||
return;
|
||||
}
|
||||
if (!editorDoc?.source_markdown) {
|
||||
console.log("[citation:effect] guard ✗ source_markdown not ready");
|
||||
return;
|
||||
}
|
||||
if (!isEditorReady) {
|
||||
console.log("[citation:effect] guard ✗ editor not ready yet");
|
||||
return;
|
||||
}
|
||||
const editor = editorRef.current;
|
||||
if (!editor) {
|
||||
console.log("[citation:effect] guard ✗ editorRef.current is null");
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastAppliedChunkIdRef.current !== pending.chunkId) {
|
||||
lastAppliedChunkIdRef.current = pending.chunkId;
|
||||
}
|
||||
|
||||
let cancelled = false;
|
||||
|
||||
const finishMiss = () => {
|
||||
console.log("[citation:effect] terminal miss — no candidate matched");
|
||||
try {
|
||||
editor.setOption(FindReplacePlugin, "search", "");
|
||||
editor.api.redecorate();
|
||||
} catch (err) {
|
||||
console.warn("[EditorPanelContent] reset search after miss failed:", err);
|
||||
}
|
||||
const canEscalate =
|
||||
editorDoc.truncated === true &&
|
||||
(maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD) < CITATION_MAX_LENGTH &&
|
||||
escalatedForRef.current !== pending.chunkId;
|
||||
console.log("[citation:effect] miss decision", {
|
||||
truncated: editorDoc.truncated,
|
||||
currentMaxLength: maxLengthOverride ?? LARGE_DOCUMENT_THRESHOLD,
|
||||
canEscalate,
|
||||
});
|
||||
if (canEscalate) {
|
||||
escalatedForRef.current = pending.chunkId;
|
||||
setMaxLengthOverride(CITATION_MAX_LENGTH);
|
||||
setFetchKey((k) => k + 1);
|
||||
// Keep the atom set so the post-refetch render re-runs.
|
||||
return;
|
||||
}
|
||||
setHighlightResult("miss");
|
||||
setPendingHighlight(null);
|
||||
};
|
||||
|
||||
const tryCandidates = async () => {
|
||||
const candidates = buildCitationSearchCandidates(pending.chunkText);
|
||||
console.log("[citation:effect] candidates built", {
|
||||
count: candidates.length,
|
||||
previews: candidates.map((c) => c.slice(0, 60)),
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
if (!cancelled) finishMiss();
|
||||
return;
|
||||
}
|
||||
// Resolve the editor's rendered DOM root via Slate's stable
|
||||
// `[data-slate-editor="true"]` attribute (set by slate-react's
|
||||
// `<Editable>`). Scoping queries to this root prevents
|
||||
// `<mark>` elements rendered elsewhere on the page (e.g. chat
|
||||
// search-highlight leaves in another mounted PlateEditor) from
|
||||
// being mistaken for citation hits.
|
||||
const editorRoot = document.querySelector<HTMLElement>('[data-slate-editor="true"]');
|
||||
console.log("[citation:effect] editor root", {
|
||||
hasRoot: !!editorRoot,
|
||||
});
|
||||
const root: ParentNode = editorRoot ?? document;
|
||||
|
||||
for (let i = 0; i < candidates.length; i++) {
|
||||
const candidate = candidates[i];
|
||||
if (cancelled) return;
|
||||
try {
|
||||
editor.setOption(FindReplacePlugin, "search", candidate);
|
||||
editor.api.redecorate();
|
||||
console.log(`[citation:effect] try #${i} setOption + redecorate`, {
|
||||
len: candidate.length,
|
||||
preview: candidate.slice(0, 80),
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn("[EditorPanelContent] setOption/redecorate failed:", err);
|
||||
continue;
|
||||
}
|
||||
// Two rAFs: first lets Slate flush its onChange, second lets
|
||||
// React commit the decoration leaves into the DOM.
|
||||
await new Promise<void>((resolve) =>
|
||||
requestAnimationFrame(() => requestAnimationFrame(() => resolve()))
|
||||
);
|
||||
if (cancelled) return;
|
||||
// Primary probe: by our stable class on the rendered <mark>.
|
||||
let el = root.querySelector<HTMLElement>(`.${CITATION_HIGHLIGHT_CLASS}`);
|
||||
const classMarkCount = root.querySelectorAll(`.${CITATION_HIGHLIGHT_CLASS}`).length;
|
||||
// Diagnostic fallback: any <mark> inside the editor root.
|
||||
// If we ever see allMarks > 0 but classMarkCount === 0,
|
||||
// the className was stripped again and we need to revisit
|
||||
// `useNodeAttributes` filtering.
|
||||
const allMarkCount = root.querySelectorAll("mark").length;
|
||||
if (!el && allMarkCount > 0) {
|
||||
el = root.querySelector<HTMLElement>("mark");
|
||||
}
|
||||
console.log(`[citation:effect] try #${i} DOM probe`, {
|
||||
foundEl: !!el,
|
||||
classMarkCount,
|
||||
allMarkCount,
|
||||
usedFallback: !!el && classMarkCount === 0,
|
||||
});
|
||||
if (el) {
|
||||
try {
|
||||
el.scrollIntoView({ block: "center", behavior: "smooth" });
|
||||
} catch {
|
||||
el.scrollIntoView();
|
||||
}
|
||||
isHighlightActiveRef.current = true;
|
||||
setHighlightResult("exact");
|
||||
console.log(`[citation:effect] ✓ exact via candidate #${i} — atom released`);
|
||||
// No auto-clear timer — the highlight is intentionally
|
||||
// permanent until the user clicks inside the editor (see
|
||||
// `handleEditorPointerDown`) or another dismissal trigger
|
||||
// fires (doc switch, edit-mode toggle, panel unmount,
|
||||
// next citation jump). Sticky Plate mode keeps the
|
||||
// editor mounted after the atom clears.
|
||||
setPendingHighlight(null);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!cancelled) finishMiss();
|
||||
};
|
||||
|
||||
void tryCandidates();
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [
|
||||
isCitationTarget,
|
||||
pending,
|
||||
documentId,
|
||||
editorDoc?.source_markdown,
|
||||
editorDoc?.truncated,
|
||||
isLocalFileMode,
|
||||
isEditing,
|
||||
isEditorReady,
|
||||
maxLengthOverride,
|
||||
clearCitationSearch,
|
||||
setPendingHighlight,
|
||||
]);
|
||||
|
||||
// Cleanup any active highlight on unmount.
|
||||
useEffect(() => {
|
||||
return () => clearCitationSearch();
|
||||
}, [clearCitationSearch]);
|
||||
|
||||
// Toggling into edit mode swaps Plate out of readOnly. Clear the citation
|
||||
// search so stale leaves don't linger in the editing surface.
|
||||
useEffect(() => {
|
||||
if (isEditing) {
|
||||
clearCitationSearch();
|
||||
setHighlightResult(null);
|
||||
}
|
||||
}, [isEditing, clearCitationSearch]);
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (copyResetTimeoutRef.current) {
|
||||
|
|
@ -367,6 +693,15 @@ export function EditorPanelContent({
|
|||
EDITABLE_DOCUMENT_TYPES.has(editorDoc.document_type ?? "")) &&
|
||||
!isLargeDocument
|
||||
: false;
|
||||
// Use PlateEditor for any of:
|
||||
// - Editable doc types (FILE/NOTE) — existing editing UX.
|
||||
// - Active citation jump in flight (`isCitationTarget`) — covers the
|
||||
// mount in the very first render where the atom is set but the
|
||||
// sticky effect hasn't fired yet.
|
||||
// - Sticky Plate mode latched on a previous citation jump — keeps
|
||||
// the editor mounted (with its decorations) after the highlight
|
||||
// effect clears the atom. Resets when the doc changes.
|
||||
const renderInPlateEditor = isEditableType || isCitationTarget || stickyPlateMode;
|
||||
const hasUnsavedChanges = editedMarkdown !== null;
|
||||
const showDesktopHeader = !!onClose;
|
||||
const showEditingActions = isEditableType && isEditing;
|
||||
|
|
@ -381,6 +716,90 @@ export function EditorPanelContent({
|
|||
setIsEditing(false);
|
||||
}, [editorDoc?.source_markdown]);
|
||||
|
||||
const handleDownloadMarkdown = useCallback(async () => {
|
||||
if (!searchSpaceId || !documentId) return;
|
||||
setDownloading(true);
|
||||
try {
|
||||
const response = await authenticatedFetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
|
||||
{ method: "GET" }
|
||||
);
|
||||
if (!response.ok) throw new Error("Download failed");
|
||||
const blob = await response.blob();
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
const disposition = response.headers.get("content-disposition");
|
||||
const match = disposition?.match(/filename="(.+)"/);
|
||||
a.download = match?.[1] ?? `${editorDoc?.title || "document"}.md`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
a.remove();
|
||||
URL.revokeObjectURL(url);
|
||||
toast.success("Download started");
|
||||
} catch {
|
||||
toast.error("Failed to download document");
|
||||
} finally {
|
||||
setDownloading(false);
|
||||
}
|
||||
}, [documentId, editorDoc?.title, searchSpaceId]);
|
||||
|
||||
// We no longer surface an "approximate" status — Plate's FindReplacePlugin
|
||||
// either decorates an exact match or it doesn't, and the candidate snippet
|
||||
// strategy (first sentence → first 8 words → full chunk) means we either
|
||||
// land on the citation start or fall through to the miss alert.
|
||||
const showMissAlert = isCitationTarget && highlightResult === "miss";
|
||||
|
||||
const citationAlerts = showMissAlert && (
|
||||
<Alert variant="destructive" className="mb-4">
|
||||
<FileQuestionMark className="size-4" />
|
||||
<AlertDescription className="flex items-center justify-between gap-4">
|
||||
<span>Cited section couldn't be located in this view.</span>
|
||||
{editorDoc?.truncated && (
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="relative shrink-0"
|
||||
disabled={downloading}
|
||||
onClick={handleDownloadMarkdown}
|
||||
>
|
||||
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
|
||||
<Download className="size-3.5" />
|
||||
Download .md
|
||||
</span>
|
||||
{downloading && <Spinner size="sm" className="absolute" />}
|
||||
</Button>
|
||||
)}
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
);
|
||||
|
||||
const largeDocAlert = isLargeDocument && !isLocalFileMode && editorDoc && (
|
||||
<Alert className="mb-4">
|
||||
<FileText className="size-4" />
|
||||
<AlertDescription className="flex items-center justify-between gap-4">
|
||||
<span>
|
||||
This document is too large for the editor (
|
||||
{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
|
||||
{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
|
||||
</span>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="relative shrink-0"
|
||||
disabled={downloading}
|
||||
onClick={handleDownloadMarkdown}
|
||||
>
|
||||
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
|
||||
<Download className="size-3.5" />
|
||||
Download .md
|
||||
</span>
|
||||
{downloading && <Spinner size="sm" className="absolute" />}
|
||||
</Button>
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
{showDesktopHeader ? (
|
||||
|
|
@ -565,61 +984,6 @@ export function EditorPanelContent({
|
|||
</p>
|
||||
</div>
|
||||
</div>
|
||||
) : isLargeDocument && !isLocalFileMode ? (
|
||||
<div className="h-full overflow-y-auto px-5 py-4">
|
||||
<Alert className="mb-4">
|
||||
<FileText className="size-4" />
|
||||
<AlertDescription className="flex items-center justify-between gap-4">
|
||||
<span>
|
||||
This document is too large for the editor (
|
||||
{Math.round((editorDoc.content_size_bytes ?? 0) / 1024 / 1024)}MB,{" "}
|
||||
{editorDoc.chunk_count ?? 0} chunks). Showing a preview below.
|
||||
</span>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
className="relative shrink-0"
|
||||
disabled={downloading}
|
||||
onClick={async () => {
|
||||
setDownloading(true);
|
||||
try {
|
||||
if (!searchSpaceId || !documentId) {
|
||||
throw new Error("Missing document context");
|
||||
}
|
||||
const response = await authenticatedFetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-spaces/${searchSpaceId}/documents/${documentId}/download-markdown`,
|
||||
{ method: "GET" }
|
||||
);
|
||||
if (!response.ok) throw new Error("Download failed");
|
||||
const blob = await response.blob();
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
const disposition = response.headers.get("content-disposition");
|
||||
const match = disposition?.match(/filename="(.+)"/);
|
||||
a.download = match?.[1] ?? `${editorDoc.title || "document"}.md`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
a.remove();
|
||||
URL.revokeObjectURL(url);
|
||||
toast.success("Download started");
|
||||
} catch {
|
||||
toast.error("Failed to download document");
|
||||
} finally {
|
||||
setDownloading(false);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<span className={`flex items-center gap-1.5 ${downloading ? "opacity-0" : ""}`}>
|
||||
<Download className="size-3.5" />
|
||||
Download .md
|
||||
</span>
|
||||
{downloading && <Spinner size="sm" className="absolute" />}
|
||||
</Button>
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
<MarkdownViewer content={editorDoc.source_markdown} />
|
||||
</div>
|
||||
) : editorRenderMode === "source_code" ? (
|
||||
<div className="h-full overflow-hidden">
|
||||
<SourceCodeEditor
|
||||
|
|
@ -638,20 +1002,46 @@ export function EditorPanelContent({
|
|||
}}
|
||||
/>
|
||||
</div>
|
||||
) : isEditableType ? (
|
||||
<PlateEditor
|
||||
key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
|
||||
preset="full"
|
||||
markdown={editorDoc.source_markdown}
|
||||
onMarkdownChange={handleMarkdownChange}
|
||||
readOnly={!isEditing}
|
||||
placeholder="Start writing..."
|
||||
editorVariant="default"
|
||||
allowModeToggle={false}
|
||||
reserveToolbarSpace
|
||||
defaultEditing={isEditing}
|
||||
className="[&_[role=toolbar]]:!bg-sidebar"
|
||||
/>
|
||||
) : isLargeDocument && !isLocalFileMode && !isCitationTarget ? (
|
||||
// Large doc, no active citation — fast Streamdown preview
|
||||
// + download CTA. We only fall back to MarkdownViewer here
|
||||
// because Plate is heavy on multi-MB docs and the user
|
||||
// isn't waiting on a specific citation to render.
|
||||
<div className="h-full overflow-y-auto px-5 py-4">
|
||||
{largeDocAlert}
|
||||
<MarkdownViewer content={editorDoc.source_markdown} />
|
||||
</div>
|
||||
) : renderInPlateEditor ? (
|
||||
// Editable doc (FILE/NOTE) OR active citation jump (any
|
||||
// doc type). The citation path uses Plate's
|
||||
// FindReplacePlugin for native, decoration-based
|
||||
// highlighting — see the citation-jump highlight effect
|
||||
// above for how `editorRef` and `handleEditorReady` are
|
||||
// wired.
|
||||
<div className="flex h-full min-h-0 flex-col">
|
||||
{(citationAlerts || (isLargeDocument && isCitationTarget && !isLocalFileMode)) && (
|
||||
<div className="shrink-0 px-5 pt-4">
|
||||
{isLargeDocument && isCitationTarget && largeDocAlert}
|
||||
{citationAlerts}
|
||||
</div>
|
||||
)}
|
||||
<div className="flex-1 min-h-0 overflow-hidden" onPointerDown={handleEditorPointerDown}>
|
||||
<PlateEditor
|
||||
key={`${isLocalFileMode ? (localFilePath ?? "local-file") : documentId}-${isEditing ? "editing" : "viewing"}`}
|
||||
preset="full"
|
||||
markdown={editorDoc.source_markdown}
|
||||
onMarkdownChange={handleMarkdownChange}
|
||||
readOnly={!isEditing}
|
||||
placeholder="Start writing..."
|
||||
editorVariant="default"
|
||||
allowModeToggle={false}
|
||||
reserveToolbarSpace
|
||||
defaultEditing={isEditing}
|
||||
className="[&_[role=toolbar]]:!bg-sidebar"
|
||||
onEditorReady={handleEditorReady}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="h-full overflow-y-auto px-5 py-4">
|
||||
<MarkdownViewer content={editorDoc.source_markdown} />
|
||||
|
|
|
|||
|
|
@ -12,6 +12,12 @@ import { type EditorPreset, presetMap } from "@/components/editor/presets";
|
|||
import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
|
||||
import { Editor, EditorContainer } from "@/components/ui/editor";
|
||||
|
||||
/** Live editor instance returned by `usePlateEditor`. Exposed via the
|
||||
* `onEditorReady` prop so callers (e.g. `EditorPanelContent`) can drive
|
||||
* plugin options imperatively — most notably setting
|
||||
* `FindReplacePlugin`'s `search` option for citation-jump highlights. */
|
||||
export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
|
||||
|
||||
export interface PlateEditorProps {
|
||||
/** Markdown string to load as initial content */
|
||||
markdown?: string;
|
||||
|
|
@ -62,6 +68,15 @@ export interface PlateEditorProps {
|
|||
* without modifying the core editor component.
|
||||
*/
|
||||
extraPlugins?: AnyPluginConfig[];
|
||||
/**
|
||||
* Called whenever the live editor instance (re)mounts, with `null` on
|
||||
* unmount. Used by callers that need to drive plugin options imperatively
|
||||
* — e.g. `EditorPanelContent` setting `FindReplacePlugin`'s `search`
|
||||
* option for citation-jump highlights. The callback is invoked exactly
|
||||
* once per editor lifetime (the parent's `key` prop forces a fresh
|
||||
* editor when needed, e.g. on edit-mode toggle).
|
||||
*/
|
||||
onEditorReady?: (editor: PlateEditorInstance | null) => void;
|
||||
}
|
||||
|
||||
function PlateEditorContent({
|
||||
|
|
@ -100,6 +115,7 @@ export function PlateEditor({
|
|||
defaultEditing = false,
|
||||
preset = "full",
|
||||
extraPlugins = [],
|
||||
onEditorReady,
|
||||
}: PlateEditorProps) {
|
||||
const lastMarkdownRef = useRef(markdown);
|
||||
const lastHtmlRef = useRef(html);
|
||||
|
|
@ -156,6 +172,21 @@ export function PlateEditor({
|
|||
: undefined,
|
||||
});
|
||||
|
||||
// Expose the live editor instance to imperative callers (e.g. citation
|
||||
// jump highlights). We deliberately don't depend on `onEditorReady`
|
||||
// itself in the cleanup closure — callers commonly pass an arrow that
|
||||
// closes over a stable ref setter, but if they pass a freshly-bound
|
||||
// callback per render, the `onEditorReady?.(editor)` re-fires which is
|
||||
// idempotent for ref-style setters.
|
||||
const onEditorReadyRef = useRef(onEditorReady);
|
||||
useEffect(() => {
|
||||
onEditorReadyRef.current = onEditorReady;
|
||||
}, [onEditorReady]);
|
||||
useEffect(() => {
|
||||
onEditorReadyRef.current?.(editor);
|
||||
return () => onEditorReadyRef.current?.(null);
|
||||
}, [editor]);
|
||||
|
||||
// Update editor content when html prop changes externally
|
||||
useEffect(() => {
|
||||
if (html !== undefined && html !== lastHtmlRef.current) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"use client";
|
||||
|
||||
import { FindReplacePlugin } from "@platejs/find-replace";
|
||||
import type { AnyPluginConfig } from "platejs";
|
||||
import { TrailingBlockPlugin } from "platejs";
|
||||
|
||||
|
|
@ -17,6 +18,30 @@ import { SelectionKit } from "@/components/editor/plugins/selection-kit";
|
|||
import { SlashCommandKit } from "@/components/editor/plugins/slash-command-kit";
|
||||
import { TableKit } from "@/components/editor/plugins/table-kit";
|
||||
import { ToggleKit } from "@/components/editor/plugins/toggle-kit";
|
||||
import { SearchHighlightLeaf } from "@/components/ui/search-highlight-node";
|
||||
|
||||
/**
|
||||
* Citation-jump highlighter. Re-uses Plate's built-in `FindReplacePlugin`
|
||||
* (decorate-only, no editing surface) to drive the "scroll-to-cited-text"
|
||||
* UX in `EditorPanelContent`. We register it in every preset because:
|
||||
* - Decorate is a no-op when `search` is empty (single getOptions() check
|
||||
* per block), so cost is effectively zero for non-citation viewers.
|
||||
* - Keeping it preset-agnostic means citations work whether the doc is
|
||||
* opened in editable (`full`) or pure-viewer (`readonly`) modes.
|
||||
*
|
||||
* The parent component drives `setOption(FindReplacePlugin, 'search', ...)`
|
||||
* + `editor.api.redecorate()` to trigger highlights, then queries the
|
||||
* editor DOM for `.citation-highlight-leaf` to scroll the first match
|
||||
* into view. (We can't use a `data-*` attribute here — Plate's
|
||||
* `PlateLeaf` runs props through `useNodeAttributes`, which only forwards
|
||||
* `attributes`, `className`, `ref`, `style`; arbitrary `data-*` props are
|
||||
* silently dropped.) See `components/ui/search-highlight-node.tsx` for
|
||||
* the leaf component and `CITATION_HIGHLIGHT_CLASS` constant.
|
||||
*/
|
||||
const CitationFindReplacePlugin = FindReplacePlugin.configure({
|
||||
options: { search: "" },
|
||||
render: { node: SearchHighlightLeaf },
|
||||
});
|
||||
|
||||
/**
|
||||
* Full preset – every plugin kit enabled.
|
||||
|
|
@ -38,6 +63,7 @@ export const fullPreset: AnyPluginConfig[] = [
|
|||
...AutoformatKit,
|
||||
...DndKit,
|
||||
TrailingBlockPlugin,
|
||||
CitationFindReplacePlugin,
|
||||
];
|
||||
|
||||
/**
|
||||
|
|
@ -52,6 +78,7 @@ export const minimalPreset: AnyPluginConfig[] = [
|
|||
...LinkKit,
|
||||
...AutoformatKit,
|
||||
TrailingBlockPlugin,
|
||||
CitationFindReplacePlugin,
|
||||
];
|
||||
|
||||
/**
|
||||
|
|
@ -68,6 +95,7 @@ export const readonlyPreset: AnyPluginConfig[] = [
|
|||
...CalloutKit,
|
||||
...ToggleKit,
|
||||
...MathKit,
|
||||
CitationFindReplacePlugin,
|
||||
];
|
||||
|
||||
/** All available preset names */
|
||||
|
|
|
|||
|
|
@ -1,719 +0,0 @@
|
|||
"use client";
|
||||
|
||||
import { useQuery } from "@tanstack/react-query";
|
||||
import {
|
||||
BookOpen,
|
||||
ChevronDown,
|
||||
ChevronUp,
|
||||
ExternalLink,
|
||||
FileQuestionMark,
|
||||
FileText,
|
||||
Hash,
|
||||
Loader2,
|
||||
Sparkles,
|
||||
X,
|
||||
} from "lucide-react";
|
||||
import { AnimatePresence, motion, useReducedMotion } from "motion/react";
|
||||
import { useTranslations } from "next-intl";
|
||||
import type React from "react";
|
||||
import { forwardRef, memo, type ReactNode, useCallback, useEffect, useRef, useState } from "react";
|
||||
import { createPortal } from "react-dom";
|
||||
import { MarkdownViewer } from "@/components/markdown-viewer";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { ScrollArea } from "@/components/ui/scroll-area";
|
||||
import { Spinner } from "@/components/ui/spinner";
|
||||
import type {
|
||||
GetDocumentByChunkResponse,
|
||||
GetSurfsenseDocsByChunkResponse,
|
||||
} from "@/contracts/types/document.types";
|
||||
import { documentsApiService } from "@/lib/apis/documents-api.service";
|
||||
import { cacheKeys } from "@/lib/query-client/cache-keys";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
type DocumentData = GetDocumentByChunkResponse | GetSurfsenseDocsByChunkResponse;
|
||||
|
||||
interface SourceDetailPanelProps {
|
||||
open: boolean;
|
||||
onOpenChange: (open: boolean) => void;
|
||||
chunkId: number;
|
||||
sourceType: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
url?: string;
|
||||
children?: ReactNode;
|
||||
isDocsChunk?: boolean;
|
||||
}
|
||||
|
||||
const formatDocumentType = (type: string) => {
|
||||
if (!type) return "";
|
||||
return type
|
||||
.split("_")
|
||||
.map((word) => word.charAt(0) + word.slice(1).toLowerCase())
|
||||
.join(" ");
|
||||
};
|
||||
|
||||
// Chunk card component
|
||||
// For large documents (>30 chunks), we disable animation to prevent layout shifts
|
||||
// which break auto-scroll functionality
|
||||
interface ChunkCardProps {
|
||||
chunk: { id: number; content: string };
|
||||
localIndex: number;
|
||||
chunkNumber: number;
|
||||
totalChunks: number;
|
||||
isCited: boolean;
|
||||
isActive: boolean;
|
||||
disableLayoutAnimation?: boolean;
|
||||
}
|
||||
|
||||
const ChunkCard = memo(
|
||||
forwardRef<HTMLDivElement, ChunkCardProps>(
|
||||
({ chunk, localIndex, chunkNumber, totalChunks, isCited }, ref) => {
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
data-chunk-index={localIndex}
|
||||
className={cn(
|
||||
"group relative rounded-2xl border-2 transition-all duration-300",
|
||||
isCited
|
||||
? "bg-linear-to-br from-primary/5 via-primary/10 to-primary/5 border-primary shadow-lg shadow-primary/10"
|
||||
: "bg-card border-border/50 hover:border-border hover:shadow-md"
|
||||
)}
|
||||
>
|
||||
{isCited && <div className="absolute inset-0 rounded-2xl bg-primary/5 blur-xl -z-10" />}
|
||||
|
||||
<div className="flex items-center justify-between px-5 py-4 border-b border-border/50">
|
||||
<div className="flex items-center gap-3">
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center justify-center w-8 h-8 rounded-full text-sm font-semibold transition-colors",
|
||||
isCited
|
||||
? "bg-primary text-primary-foreground"
|
||||
: "bg-muted text-muted-foreground group-hover:bg-muted/80"
|
||||
)}
|
||||
>
|
||||
{chunkNumber}
|
||||
</div>
|
||||
<span className="text-sm text-muted-foreground">
|
||||
Chunk {chunkNumber} of {totalChunks}
|
||||
</span>
|
||||
</div>
|
||||
{isCited && (
|
||||
<Badge variant="default" className="gap-1.5 px-3 py-1">
|
||||
<Sparkles className="h-3 w-3" />
|
||||
Cited Source
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="p-5 overflow-hidden">
|
||||
<MarkdownViewer content={chunk.content} maxLength={100_000} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
)
|
||||
);
|
||||
ChunkCard.displayName = "ChunkCard";
|
||||
|
||||
export function SourceDetailPanel({
|
||||
open,
|
||||
onOpenChange,
|
||||
chunkId,
|
||||
sourceType,
|
||||
title,
|
||||
description,
|
||||
url,
|
||||
children,
|
||||
isDocsChunk = false,
|
||||
}: SourceDetailPanelProps) {
|
||||
const t = useTranslations("dashboard");
|
||||
const scrollAreaRef = useRef<HTMLDivElement>(null);
|
||||
const hasScrolledRef = useRef(false); // Use ref to avoid stale closures
|
||||
const scrollTimersRef = useRef<ReturnType<typeof setTimeout>[]>([]);
|
||||
const [activeChunkIndex, setActiveChunkIndex] = useState<number | null>(null);
|
||||
const [mounted, setMounted] = useState(false);
|
||||
const shouldReduceMotion = useReducedMotion();
|
||||
|
||||
useEffect(() => {
|
||||
setMounted(true);
|
||||
}, []);
|
||||
|
||||
const {
|
||||
data: documentData,
|
||||
isLoading: isDocumentByChunkFetching,
|
||||
error: documentByChunkFetchingError,
|
||||
} = useQuery<DocumentData>({
|
||||
queryKey: isDocsChunk
|
||||
? cacheKeys.documents.byChunk(`doc-${chunkId}`)
|
||||
: cacheKeys.documents.byChunk(chunkId.toString()),
|
||||
queryFn: async () => {
|
||||
if (isDocsChunk) {
|
||||
return documentsApiService.getSurfsenseDocByChunk(chunkId);
|
||||
}
|
||||
return documentsApiService.getDocumentByChunk({ chunk_id: chunkId, chunk_window: 5 });
|
||||
},
|
||||
enabled: !!chunkId && open,
|
||||
staleTime: 5 * 60 * 1000,
|
||||
});
|
||||
|
||||
const totalChunks =
|
||||
documentData && "total_chunks" in documentData
|
||||
? (documentData.total_chunks ?? documentData.chunks.length)
|
||||
: (documentData?.chunks?.length ?? 0);
|
||||
const [beforeChunks, setBeforeChunks] = useState<
|
||||
Array<{ id: number; content: string; created_at: string }>
|
||||
>([]);
|
||||
const [afterChunks, setAfterChunks] = useState<
|
||||
Array<{ id: number; content: string; created_at: string }>
|
||||
>([]);
|
||||
const [loadingBefore, setLoadingBefore] = useState(false);
|
||||
const [loadingAfter, setLoadingAfter] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
setBeforeChunks([]);
|
||||
setAfterChunks([]);
|
||||
}, [chunkId, open]);
|
||||
|
||||
const chunkStartIndex =
|
||||
documentData && "chunk_start_index" in documentData ? (documentData.chunk_start_index ?? 0) : 0;
|
||||
const initialChunks = documentData?.chunks ?? [];
|
||||
const allChunks = [...beforeChunks, ...initialChunks, ...afterChunks];
|
||||
const absoluteStart = chunkStartIndex - beforeChunks.length;
|
||||
const absoluteEnd = chunkStartIndex + initialChunks.length + afterChunks.length;
|
||||
const canLoadBefore = absoluteStart > 0;
|
||||
const canLoadAfter = absoluteEnd < totalChunks;
|
||||
|
||||
const EXPAND_SIZE = 10;
|
||||
|
||||
const loadBefore = useCallback(async () => {
|
||||
if (!documentData || !("search_space_id" in documentData) || !canLoadBefore) return;
|
||||
setLoadingBefore(true);
|
||||
try {
|
||||
const count = Math.min(EXPAND_SIZE, absoluteStart);
|
||||
const result = await documentsApiService.getDocumentChunks({
|
||||
document_id: documentData.id,
|
||||
page: 0,
|
||||
page_size: count,
|
||||
start_offset: absoluteStart - count,
|
||||
});
|
||||
const existingIds = new Set(allChunks.map((c) => c.id));
|
||||
const newChunks = result.items
|
||||
.filter((c) => !existingIds.has(c.id))
|
||||
.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
|
||||
setBeforeChunks((prev) => [...newChunks, ...prev]);
|
||||
} catch (err) {
|
||||
console.error("Failed to load earlier chunks:", err);
|
||||
} finally {
|
||||
setLoadingBefore(false);
|
||||
}
|
||||
}, [documentData, absoluteStart, canLoadBefore, allChunks]);
|
||||
|
||||
const loadAfter = useCallback(async () => {
|
||||
if (!documentData || !("search_space_id" in documentData) || !canLoadAfter) return;
|
||||
setLoadingAfter(true);
|
||||
try {
|
||||
const result = await documentsApiService.getDocumentChunks({
|
||||
document_id: documentData.id,
|
||||
page: 0,
|
||||
page_size: EXPAND_SIZE,
|
||||
start_offset: absoluteEnd,
|
||||
});
|
||||
const existingIds = new Set(allChunks.map((c) => c.id));
|
||||
const newChunks = result.items
|
||||
.filter((c) => !existingIds.has(c.id))
|
||||
.map((c) => ({ id: c.id, content: c.content, created_at: c.created_at }));
|
||||
setAfterChunks((prev) => [...prev, ...newChunks]);
|
||||
} catch (err) {
|
||||
console.error("Failed to load later chunks:", err);
|
||||
} finally {
|
||||
setLoadingAfter(false);
|
||||
}
|
||||
}, [documentData, absoluteEnd, canLoadAfter, allChunks]);
|
||||
|
||||
const isDirectRenderSource =
|
||||
sourceType === "TAVILY_API" ||
|
||||
sourceType === "LINKUP_API" ||
|
||||
sourceType === "SEARXNG_API" ||
|
||||
sourceType === "BAIDU_SEARCH_API";
|
||||
|
||||
const citedChunkIndex = allChunks.findIndex((chunk) => chunk.id === chunkId);
|
||||
|
||||
// Simple scroll function that scrolls to a chunk by index
|
||||
const scrollToChunkByIndex = useCallback(
|
||||
(chunkIndex: number, smooth = true) => {
|
||||
const scrollContainer = scrollAreaRef.current;
|
||||
if (!scrollContainer) return;
|
||||
|
||||
const viewport = scrollContainer.querySelector(
|
||||
"[data-radix-scroll-area-viewport]"
|
||||
) as HTMLElement | null;
|
||||
if (!viewport) return;
|
||||
|
||||
const chunkElement = scrollContainer.querySelector(
|
||||
`[data-chunk-index="${chunkIndex}"]`
|
||||
) as HTMLElement | null;
|
||||
if (!chunkElement) return;
|
||||
|
||||
// Get positions using getBoundingClientRect for accuracy
|
||||
const viewportRect = viewport.getBoundingClientRect();
|
||||
const chunkRect = chunkElement.getBoundingClientRect();
|
||||
|
||||
// Calculate where to scroll to center the chunk
|
||||
const currentScrollTop = viewport.scrollTop;
|
||||
const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
|
||||
const scrollTarget =
|
||||
chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
|
||||
|
||||
viewport.scrollTo({
|
||||
top: Math.max(0, scrollTarget),
|
||||
behavior: smooth && !shouldReduceMotion ? "smooth" : "auto",
|
||||
});
|
||||
|
||||
setActiveChunkIndex(chunkIndex);
|
||||
},
|
||||
[shouldReduceMotion]
|
||||
);
|
||||
|
||||
// Callback ref for the cited chunk - scrolls when the element mounts
|
||||
const citedChunkRefCallback = useCallback(
|
||||
(node: HTMLDivElement | null) => {
|
||||
if (node && !hasScrolledRef.current && open) {
|
||||
hasScrolledRef.current = true; // Mark immediately to prevent duplicate scrolls
|
||||
|
||||
// Store the node reference for the delayed scroll
|
||||
const scrollToCitedChunk = () => {
|
||||
const scrollContainer = scrollAreaRef.current;
|
||||
if (!scrollContainer || !node.isConnected) return false;
|
||||
|
||||
const viewport = scrollContainer.querySelector(
|
||||
"[data-radix-scroll-area-viewport]"
|
||||
) as HTMLElement | null;
|
||||
if (!viewport) return false;
|
||||
|
||||
// Get positions
|
||||
const viewportRect = viewport.getBoundingClientRect();
|
||||
const chunkRect = node.getBoundingClientRect();
|
||||
|
||||
// Calculate scroll position to center the chunk
|
||||
const currentScrollTop = viewport.scrollTop;
|
||||
const chunkTopRelativeToViewport = chunkRect.top - viewportRect.top + currentScrollTop;
|
||||
const scrollTarget =
|
||||
chunkTopRelativeToViewport - viewportRect.height / 2 + chunkRect.height / 2;
|
||||
|
||||
viewport.scrollTo({
|
||||
top: Math.max(0, scrollTarget),
|
||||
behavior: "auto", // Instant scroll for initial positioning
|
||||
});
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// Scroll multiple times with delays to handle progressive content rendering
|
||||
// Each subsequent scroll will correct for any layout shifts
|
||||
const scrollAttempts = [50, 150, 300, 600, 1000];
|
||||
|
||||
scrollAttempts.forEach((delay) => {
|
||||
scrollTimersRef.current.push(
|
||||
setTimeout(() => {
|
||||
scrollToCitedChunk();
|
||||
}, delay)
|
||||
);
|
||||
});
|
||||
|
||||
// After final attempt, mark the cited chunk as active
|
||||
scrollTimersRef.current.push(
|
||||
setTimeout(
|
||||
() => {
|
||||
setActiveChunkIndex(citedChunkIndex);
|
||||
},
|
||||
scrollAttempts[scrollAttempts.length - 1] + 50
|
||||
)
|
||||
);
|
||||
}
|
||||
},
|
||||
[open, citedChunkIndex]
|
||||
);
|
||||
|
||||
// Reset scroll state when panel closes
|
||||
useEffect(() => {
|
||||
if (!open) {
|
||||
scrollTimersRef.current.forEach(clearTimeout);
|
||||
scrollTimersRef.current = [];
|
||||
hasScrolledRef.current = false;
|
||||
setActiveChunkIndex(null);
|
||||
}
|
||||
return () => {
|
||||
scrollTimersRef.current.forEach(clearTimeout);
|
||||
scrollTimersRef.current = [];
|
||||
};
|
||||
}, [open]);
|
||||
|
||||
// Handle escape key
|
||||
useEffect(() => {
|
||||
const handleEscape = (e: KeyboardEvent) => {
|
||||
if (e.key === "Escape" && open) {
|
||||
onOpenChange(false);
|
||||
}
|
||||
};
|
||||
window.addEventListener("keydown", handleEscape);
|
||||
return () => window.removeEventListener("keydown", handleEscape);
|
||||
}, [open, onOpenChange]);
|
||||
|
||||
// Prevent body scroll when open
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
document.body.style.overflow = "hidden";
|
||||
} else {
|
||||
document.body.style.overflow = "";
|
||||
}
|
||||
return () => {
|
||||
document.body.style.overflow = "";
|
||||
};
|
||||
}, [open]);
|
||||
|
||||
const handleUrlClick = (e: React.MouseEvent, clickUrl: string) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
window.open(clickUrl, "_blank", "noopener,noreferrer");
|
||||
};
|
||||
|
||||
const scrollToChunk = useCallback(
|
||||
(index: number) => {
|
||||
scrollToChunkByIndex(index, true);
|
||||
},
|
||||
[scrollToChunkByIndex]
|
||||
);
|
||||
|
||||
const panelContent = (
|
||||
<AnimatePresence mode="wait">
|
||||
{open && (
|
||||
<>
|
||||
{/* Backdrop */}
|
||||
<motion.div
|
||||
key="backdrop"
|
||||
initial={{ opacity: 0 }}
|
||||
animate={{ opacity: 1 }}
|
||||
exit={{ opacity: 0 }}
|
||||
transition={{ duration: 0.2 }}
|
||||
className="fixed inset-0 z-50 bg-black/60 backdrop-blur-sm"
|
||||
onClick={() => onOpenChange(false)}
|
||||
/>
|
||||
|
||||
{/* Panel */}
|
||||
<motion.div
|
||||
key="panel"
|
||||
initial={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
|
||||
animate={{ opacity: 1, scale: 1, y: 0 }}
|
||||
exit={shouldReduceMotion ? { opacity: 0 } : { opacity: 0, scale: 0.95, y: 20 }}
|
||||
transition={{
|
||||
type: "spring",
|
||||
damping: 30,
|
||||
stiffness: 300,
|
||||
}}
|
||||
className="fixed inset-3 sm:inset-6 md:inset-10 lg:inset-16 z-50 flex flex-col bg-background rounded-3xl shadow-2xl border overflow-hidden"
|
||||
>
|
||||
{/* Header */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: -10 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.1 }}
|
||||
className="flex items-center justify-between px-6 py-5 border-b bg-linear-to-r from-muted/50 to-muted/30"
|
||||
>
|
||||
<div className="min-w-0 flex-1">
|
||||
<h2 className="text-xl font-semibold truncate">
|
||||
{documentData?.title || title || "Source Document"}
|
||||
</h2>
|
||||
<p className="text-sm text-muted-foreground mt-0.5">
|
||||
{documentData && "document_type" in documentData
|
||||
? formatDocumentType(documentData.document_type)
|
||||
: sourceType && formatDocumentType(sourceType)}
|
||||
{totalChunks > 0 && (
|
||||
<span className="ml-2">
|
||||
• {totalChunks} chunk{totalChunks !== 1 ? "s" : ""}
|
||||
{allChunks.length < totalChunks && ` (showing ${allChunks.length})`}
|
||||
</span>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 shrink-0">
|
||||
{url && (
|
||||
<Button
|
||||
size="sm"
|
||||
variant="outline"
|
||||
onClick={(e) => handleUrlClick(e, url)}
|
||||
className="hidden sm:flex gap-2 rounded-xl"
|
||||
>
|
||||
<ExternalLink className="h-4 w-4" />
|
||||
Open Source
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
size="icon"
|
||||
variant="ghost"
|
||||
onClick={() => onOpenChange(false)}
|
||||
className="h-8 w-8 rounded-full"
|
||||
>
|
||||
<X className="h-4 w-4" />
|
||||
<span className="sr-only">Close</span>
|
||||
</Button>
|
||||
</div>
|
||||
</motion.div>
|
||||
|
||||
{/* Loading State */}
|
||||
{!isDirectRenderSource && isDocumentByChunkFetching && (
|
||||
<div className="flex-1 flex items-center justify-center">
|
||||
<motion.div
|
||||
initial={{ opacity: 0, scale: 0.9 }}
|
||||
animate={{ opacity: 1, scale: 1 }}
|
||||
className="flex flex-col items-center gap-4"
|
||||
>
|
||||
<Spinner size="lg" />
|
||||
<p className="text-sm text-muted-foreground font-medium">
|
||||
{t("loading_document")}
|
||||
</p>
|
||||
</motion.div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error State */}
|
||||
{!isDirectRenderSource && documentByChunkFetchingError && (
|
||||
<div className="flex-1 flex items-center justify-center">
|
||||
<motion.div
|
||||
initial={{ opacity: 0, scale: 0.9 }}
|
||||
animate={{ opacity: 1, scale: 1 }}
|
||||
className="flex flex-col items-center gap-4 text-center px-6"
|
||||
>
|
||||
<div className="w-20 h-20 rounded-full bg-muted/50 flex items-center justify-center">
|
||||
<FileQuestionMark className="h-10 w-10 text-muted-foreground" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-semibold text-foreground text-lg">Document unavailable</p>
|
||||
<p className="text-sm text-muted-foreground mt-2 max-w-md">
|
||||
{documentByChunkFetchingError.message ||
|
||||
"An unexpected error occurred. Please try again."}
|
||||
</p>
|
||||
</div>
|
||||
<Button variant="outline" onClick={() => onOpenChange(false)} className="mt-2">
|
||||
Close Panel
|
||||
</Button>
|
||||
</motion.div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Direct render for web search providers */}
|
||||
{isDirectRenderSource && (
|
||||
<ScrollArea className="flex-1">
|
||||
<div className="p-6 max-w-3xl mx-auto">
|
||||
{url && (
|
||||
<Button
|
||||
size="default"
|
||||
variant="outline"
|
||||
onClick={(e) => handleUrlClick(e, url)}
|
||||
className="w-full mb-6 sm:hidden rounded-xl"
|
||||
>
|
||||
<ExternalLink className="mr-2 h-4 w-4" />
|
||||
Open in Browser
|
||||
</Button>
|
||||
)}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 10 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
className="p-6 bg-muted/50 rounded-2xl border"
|
||||
>
|
||||
<h3 className="text-base font-semibold mb-4 flex items-center gap-2">
|
||||
<BookOpen className="h-4 w-4" />
|
||||
Source Information
|
||||
</h3>
|
||||
<div className="text-sm text-muted-foreground mb-3 font-medium">
|
||||
{title || "Untitled"}
|
||||
</div>
|
||||
<div className="text-sm text-foreground leading-relaxed">
|
||||
{description || "No content available"}
|
||||
</div>
|
||||
</motion.div>
|
||||
</div>
|
||||
</ScrollArea>
|
||||
)}
|
||||
|
||||
{/* API-fetched document content */}
|
||||
{!isDirectRenderSource && documentData && (
|
||||
<div className="flex-1 flex overflow-hidden">
|
||||
{/* Chunk Navigation Sidebar */}
|
||||
{allChunks.length > 1 && (
|
||||
<motion.div
|
||||
initial={{ opacity: 0, x: -20 }}
|
||||
animate={{ opacity: 1, x: 0 }}
|
||||
transition={{ delay: 0.2 }}
|
||||
className="hidden lg:flex flex-col w-16 border-r bg-muted/10 overflow-hidden"
|
||||
>
|
||||
<ScrollArea className="flex-1 h-full">
|
||||
<div className="p-2 pt-3 flex flex-col gap-1.5">
|
||||
{allChunks.map((chunk, idx) => {
|
||||
const absNum = absoluteStart + idx + 1;
|
||||
const isCited = chunk.id === chunkId;
|
||||
const isActive = activeChunkIndex === idx;
|
||||
return (
|
||||
<motion.button
|
||||
key={chunk.id}
|
||||
type="button"
|
||||
onClick={() => scrollToChunk(idx)}
|
||||
initial={{ opacity: 0, scale: 0.8 }}
|
||||
animate={{ opacity: 1, scale: 1 }}
|
||||
transition={{ delay: Math.min(idx * 0.02, 0.2) }}
|
||||
className={cn(
|
||||
"relative w-11 h-9 mx-auto rounded-lg text-xs font-semibold transition-all duration-200 flex items-center justify-center",
|
||||
isCited
|
||||
? "bg-primary text-primary-foreground shadow-md"
|
||||
: isActive
|
||||
? "bg-muted text-foreground"
|
||||
: "bg-muted/50 text-muted-foreground hover:bg-muted hover:text-foreground"
|
||||
)}
|
||||
title={isCited ? `Chunk ${absNum} (Cited)` : `Chunk ${absNum}`}
|
||||
>
|
||||
{absNum}
|
||||
{isCited && (
|
||||
<span className="absolute -top-1.5 -right-1.5 flex items-center justify-center w-4 h-4 bg-primary rounded-full border-2 border-background shadow-sm">
|
||||
<Sparkles className="h-2.5 w-2.5 text-primary-foreground" />
|
||||
</span>
|
||||
)}
|
||||
</motion.button>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</ScrollArea>
|
||||
</motion.div>
|
||||
)}
|
||||
|
||||
{/* Main Content */}
|
||||
<ScrollArea className="flex-1" ref={scrollAreaRef}>
|
||||
<div className="p-6 lg:p-8 max-w-4xl mx-auto space-y-6">
|
||||
{/* Document Metadata */}
|
||||
{"document_metadata" in documentData &&
|
||||
documentData.document_metadata &&
|
||||
Object.keys(documentData.document_metadata).length > 0 && (
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 10 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.1 }}
|
||||
className="p-5 bg-muted/30 rounded-2xl border"
|
||||
>
|
||||
<h3 className="text-sm font-semibold mb-4 text-muted-foreground uppercase tracking-wider flex items-center gap-2">
|
||||
<FileText className="h-4 w-4" />
|
||||
Document Information
|
||||
</h3>
|
||||
<dl className="grid grid-cols-1 sm:grid-cols-2 gap-4 text-sm">
|
||||
{Object.entries(documentData.document_metadata).map(([key, value]) => (
|
||||
<div key={key} className="space-y-1">
|
||||
<dt className="font-medium text-muted-foreground capitalize text-xs">
|
||||
{key.replace(/_/g, " ")}
|
||||
</dt>
|
||||
<dd className="text-foreground wrap-break-word">{String(value)}</dd>
|
||||
</div>
|
||||
))}
|
||||
</dl>
|
||||
</motion.div>
|
||||
)}
|
||||
|
||||
{/* Chunks Header */}
|
||||
<div className="flex items-center justify-between pt-2">
|
||||
<h3 className="text-sm font-semibold text-muted-foreground uppercase tracking-wider flex items-center gap-2">
|
||||
<Hash className="h-4 w-4" />
|
||||
Chunks {absoluteStart + 1}–{absoluteEnd} of {totalChunks}
|
||||
</h3>
|
||||
{citedChunkIndex !== -1 && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
size="sm"
|
||||
onClick={() => scrollToChunk(citedChunkIndex)}
|
||||
className="gap-2 text-primary hover:text-primary"
|
||||
>
|
||||
<Sparkles className="h-3.5 w-3.5" />
|
||||
Jump to cited
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Load Earlier */}
|
||||
{canLoadBefore && (
|
||||
<div className="flex items-center justify-center">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={loadBefore}
|
||||
disabled={loadingBefore}
|
||||
className="gap-2"
|
||||
>
|
||||
{loadingBefore ? (
|
||||
<Loader2 className="h-3.5 w-3.5 animate-spin" />
|
||||
) : (
|
||||
<ChevronUp className="h-3.5 w-3.5" />
|
||||
)}
|
||||
{loadingBefore
|
||||
? "Loading..."
|
||||
: `Load ${Math.min(EXPAND_SIZE, absoluteStart)} earlier chunks`}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Chunks */}
|
||||
<div className="space-y-4">
|
||||
{allChunks.map((chunk, idx) => {
|
||||
const isCited = chunk.id === chunkId;
|
||||
const chunkNumber = absoluteStart + idx + 1;
|
||||
return (
|
||||
<ChunkCard
|
||||
key={chunk.id}
|
||||
ref={isCited ? citedChunkRefCallback : undefined}
|
||||
chunk={chunk}
|
||||
localIndex={idx}
|
||||
chunkNumber={chunkNumber}
|
||||
totalChunks={totalChunks}
|
||||
isCited={isCited}
|
||||
isActive={activeChunkIndex === idx}
|
||||
disableLayoutAnimation={allChunks.length > 30}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Load Later */}
|
||||
{canLoadAfter && (
|
||||
<div className="flex items-center justify-center py-3">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={loadAfter}
|
||||
disabled={loadingAfter}
|
||||
className="gap-2"
|
||||
>
|
||||
{loadingAfter ? (
|
||||
<Loader2 className="h-3.5 w-3.5 animate-spin" />
|
||||
) : (
|
||||
<ChevronDown className="h-3.5 w-3.5" />
|
||||
)}
|
||||
{loadingAfter
|
||||
? "Loading..."
|
||||
: `Load ${Math.min(EXPAND_SIZE, totalChunks - absoluteEnd)} later chunks`}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</ScrollArea>
|
||||
</div>
|
||||
)}
|
||||
</motion.div>
|
||||
</>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
);
|
||||
|
||||
if (!mounted) return <>{children}</>;
|
||||
|
||||
return (
|
||||
<>
|
||||
{children}
|
||||
{createPortal(panelContent, globalThis.document.body)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
|
@ -67,9 +67,6 @@ const DesktopShortcutsContent = dynamic(
|
|||
import(
|
||||
"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
|
||||
).then((m) => ({ default: m.DesktopShortcutsContent })),
|
||||
import(
|
||||
"@/app/dashboard/[search_space_id]/user-settings/components/DesktopShortcutsContent"
|
||||
).then((m) => ({ default: m.DesktopShortcutsContent })),
|
||||
{ ssr: false }
|
||||
);
|
||||
const MemoryContent = dynamic(
|
||||
|
|
|
|||
45
surfsense_web/components/ui/search-highlight-node.tsx
Normal file
45
surfsense_web/components/ui/search-highlight-node.tsx
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
"use client";
|
||||
|
||||
import type { PlateLeafProps } from "platejs/react";
|
||||
import { PlateLeaf } from "platejs/react";
|
||||
|
||||
/**
|
||||
* Stable class name used to identify Plate-rendered citation highlight
|
||||
* leaves in the DOM. We can't use a `data-*` attribute here — Plate's
|
||||
* `PlateLeaf` runs its props through `useNodeAttributes`, which only
|
||||
* forwards `attributes`, `className`, `ref`, and `style` to the rendered
|
||||
* element; arbitrary `data-*` props are silently dropped (verified
|
||||
* against `@platejs/core/dist/react/index.js` v52). So `className` is
|
||||
* the only escape hatch that's guaranteed to survive into the DOM.
|
||||
*/
|
||||
export const CITATION_HIGHLIGHT_CLASS = "citation-highlight-leaf";
|
||||
|
||||
/**
|
||||
* Leaf rendered for ranges decorated by `@platejs/find-replace`'s
|
||||
* `FindReplacePlugin`. We re-purpose that plugin to drive the citation-jump
|
||||
* highlight: when a citation is staged, the parent sets the plugin's `search`
|
||||
* option to a snippet of the chunk text and Plate decorates every match with
|
||||
* `searchHighlight: true`. This component renders those decorations as a
|
||||
* `<mark>` tagged with `CITATION_HIGHLIGHT_CLASS` so the parent can:
|
||||
* 1. Query the first match in DOM order to scroll it into view.
|
||||
* 2. Detect the active-highlight state without a separate React ref.
|
||||
*
|
||||
* The highlight is **persistent** — it does not auto-fade. The parent in
|
||||
* `EditorPanelContent` clears it by setting the plugin's `search` option
|
||||
* back to "" when one of: (a) the user clicks anywhere inside the editor,
|
||||
* (b) the panel switches to a different document, (c) the user toggles
|
||||
* into edit mode, (d) another citation jump is staged, (e) the panel
|
||||
* unmounts. We use a brief entrance pulse (`citation-flash-in`, see
|
||||
* `globals.css`) purely to draw the eye after `scrollIntoView` lands.
|
||||
*/
|
||||
export function SearchHighlightLeaf(props: PlateLeafProps) {
|
||||
return (
|
||||
<PlateLeaf
|
||||
{...props}
|
||||
as="mark"
|
||||
className={`${CITATION_HIGHLIGHT_CLASS} bg-primary/15 ring-1 ring-primary/40 rounded-sm px-0.5 text-inherit animate-[citation-flash-in_400ms_ease-out]`}
|
||||
>
|
||||
{props.children}
|
||||
</PlateLeaf>
|
||||
);
|
||||
}
|
||||
125
surfsense_web/lib/citation-search.ts
Normal file
125
surfsense_web/lib/citation-search.ts
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
/**
|
||||
* Snippet generation for the citation-jump highlight, driven by Plate's
|
||||
* `FindReplacePlugin`. The plugin runs `decorate` per-block and only matches
|
||||
* within blocks whose children are all `Text` nodes (so it crosses inline
|
||||
* marks like bold/italic but **not** block boundaries, and a block that
|
||||
* contains even one inline element such as a link is silently skipped).
|
||||
* That means a full chunk that spans heading + paragraph won't match as a
|
||||
* single string — we have to pick a shorter snippet that fits inside one
|
||||
* rendered block.
|
||||
*
|
||||
* `buildCitationSearchCandidates` returns search strings ordered from
|
||||
* "most-specific anchor" to "broadest fallback":
|
||||
* 1. First sentence of the chunk (capped at `FIRST_SENTENCE_MAX`).
|
||||
* 2. First `FIRST_PHRASE_WORDS` words.
|
||||
* 3. Each non-trivial line of the chunk, in source order — gives us a
|
||||
* separate attempt for each rendered block, so a heading line with
|
||||
* an inline link doesn't doom the whole jump.
|
||||
* 4. Full chunk (only if it's already short enough to plausibly fit
|
||||
* inside one block).
|
||||
*
|
||||
* The caller tries each candidate in turn — set the plugin's `search`
|
||||
* option, `editor.api.redecorate()`, then check the editor DOM for a
|
||||
* `.citation-highlight-leaf` element. First candidate that produces one
|
||||
* wins; subsequent candidates are skipped.
|
||||
*/
|
||||
|
||||
const FIRST_SENTENCE_MAX = 120;
|
||||
const FIRST_PHRASE_WORDS = 8;
|
||||
const MIN_SNIPPET_LENGTH = 6;
|
||||
const FULL_CHUNK_MAX = FIRST_SENTENCE_MAX * 2;
|
||||
const MAX_LINE_CANDIDATES = 6;
|
||||
const LINE_CANDIDATE_MAX = FIRST_SENTENCE_MAX;
|
||||
|
||||
function normalizeWhitespace(input: string): string {
|
||||
return input.replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip the markdown syntax that won't survive into the rendered editor's
|
||||
* plain text, so the chunk text (which comes back from the indexer as raw
|
||||
* source markdown) can be matched against the literal text values stored
|
||||
* in Plate's Slate tree.
|
||||
*
|
||||
* Order matters: handle multi-char and "container" syntax before single-
|
||||
* char emphasis, otherwise `**text**` collapses to `*text*` first.
|
||||
*
|
||||
* Heuristic only — we don't aim to be a full markdown parser, just to
|
||||
* remove the common markers (`**bold**`, `[text](url)`, `# headings`,
|
||||
* `- list`, etc.) that show up in connector-doc chunks and would break
|
||||
* literal substring search.
|
||||
*/
|
||||
export function stripMarkdownForMatch(input: string): string {
|
||||
let s = input;
|
||||
s = s.replace(/```[a-z0-9_+-]*\n?([\s\S]*?)```/gi, (_, body: string) => body);
|
||||
s = s.replace(/<!--[\s\S]*?-->/g, " ");
|
||||
s = s.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
|
||||
s = s.replace(/!\[([^\]]*)\]\[[^\]]*\]/g, "$1");
|
||||
s = s.replace(/\[([^\]]+)\]\([^)]*\)/g, "$1");
|
||||
s = s.replace(/\[([^\]]+)\]\[[^\]]*\]/g, "$1");
|
||||
s = s.replace(/<((?:https?|mailto):[^>\s]+)>/g, "$1");
|
||||
s = s.replace(/`+([^`\n]+?)`+/g, "$1");
|
||||
s = s.replace(/(\*\*|__)([\s\S]+?)\1/g, "$2");
|
||||
s = s.replace(/(?<!\w)([*_])([^*_\n]+?)\1(?!\w)/g, "$2");
|
||||
s = s.replace(/~~([^~]+)~~/g, "$1");
|
||||
s = s.replace(/^[ \t]{0,3}#{1,6}[ \t]+/gm, "");
|
||||
s = s.replace(/^[ \t]{0,3}(?:=+|-+)[ \t]*$/gm, "");
|
||||
s = s.replace(/^[ \t]{0,3}>+[ \t]?/gm, "");
|
||||
s = s.replace(/^[ \t]*[-*+][ \t]+/gm, "");
|
||||
s = s.replace(/^[ \t]*\d+\.[ \t]+/gm, "");
|
||||
s = s.replace(/^[ \t]{0,3}(?:[-*_])(?:[ \t]*[-*_]){2,}[ \t]*$/gm, "");
|
||||
s = s.replace(/^[ \t]*\|?(?:[ \t]*:?-+:?[ \t]*\|)+[ \t]*:?-+:?[ \t]*\|?[ \t]*$/gm, "");
|
||||
s = s.replace(/\\([\\`*_{}[\]()#+\-.!~>])/g, "$1");
|
||||
return s;
|
||||
}
|
||||
|
||||
export function buildCitationSearchCandidates(rawText: string): string[] {
|
||||
if (!rawText) return [];
|
||||
const stripped = stripMarkdownForMatch(rawText);
|
||||
const normalized = normalizeWhitespace(stripped);
|
||||
if (normalized.length < MIN_SNIPPET_LENGTH) return [];
|
||||
|
||||
const out: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
const push = (s: string) => {
|
||||
const t = normalizeWhitespace(s);
|
||||
if (t.length >= MIN_SNIPPET_LENGTH && !seen.has(t)) {
|
||||
out.push(t);
|
||||
seen.add(t);
|
||||
}
|
||||
};
|
||||
|
||||
const sentenceMatch = normalized.match(/^[^.!?]+[.!?]/);
|
||||
if (sentenceMatch) {
|
||||
const sentence = sentenceMatch[0];
|
||||
push(sentence.length > FIRST_SENTENCE_MAX ? sentence.slice(0, FIRST_SENTENCE_MAX) : sentence);
|
||||
} else if (normalized.length > FIRST_SENTENCE_MAX) {
|
||||
push(normalized.slice(0, FIRST_SENTENCE_MAX));
|
||||
}
|
||||
|
||||
const words = normalized.split(" ").filter(Boolean);
|
||||
if (words.length > FIRST_PHRASE_WORDS) {
|
||||
push(words.slice(0, FIRST_PHRASE_WORDS).join(" "));
|
||||
}
|
||||
|
||||
// Per-line candidates: each chunk line is roughly one block in the
|
||||
// rendered editor. Trying them in order gives us a separate decorate
|
||||
// attempt for each block, which matters when the first line is a
|
||||
// heading containing a link (Plate's `FindReplacePlugin` will skip
|
||||
// any block whose children aren't all text nodes).
|
||||
const rawLines = stripped.split(/\r?\n/);
|
||||
let lineCount = 0;
|
||||
for (const line of rawLines) {
|
||||
if (lineCount >= MAX_LINE_CANDIDATES) break;
|
||||
const trimmed = normalizeWhitespace(line);
|
||||
if (trimmed.length < MIN_SNIPPET_LENGTH) continue;
|
||||
push(trimmed.length > LINE_CANDIDATE_MAX ? trimmed.slice(0, LINE_CANDIDATE_MAX) : trimmed);
|
||||
lineCount++;
|
||||
}
|
||||
|
||||
if (normalized.length <= FULL_CHUNK_MAX) {
|
||||
push(normalized);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
|
@ -36,6 +36,7 @@
|
|||
"@platejs/code-block": "^52.0.11",
|
||||
"@platejs/combobox": "^52.0.15",
|
||||
"@platejs/dnd": "^52.0.11",
|
||||
"@platejs/find-replace": "^52.3.10",
|
||||
"@platejs/floating": "^52.0.11",
|
||||
"@platejs/indent": "^52.0.11",
|
||||
"@platejs/link": "^52.0.11",
|
||||
|
|
|
|||
17
surfsense_web/pnpm-lock.yaml
generated
17
surfsense_web/pnpm-lock.yaml
generated
|
|
@ -53,6 +53,9 @@ importers:
|
|||
'@platejs/dnd':
|
||||
specifier: ^52.0.11
|
||||
version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dnd-html5-backend@16.0.1)(react-dnd@16.0.1(@types/node@20.19.33)(@types/react@19.2.14)(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
|
||||
'@platejs/find-replace':
|
||||
specifier: ^52.3.10
|
||||
version: 52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
|
||||
'@platejs/floating':
|
||||
specifier: ^52.0.11
|
||||
version: 52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
|
||||
|
|
@ -2827,6 +2830,13 @@ packages:
|
|||
react-dnd-html5-backend: '>=14.0.0'
|
||||
react-dom: '>=18.0.0'
|
||||
|
||||
'@platejs/find-replace@52.3.10':
|
||||
resolution: {integrity: sha512-V/MOMMUYxHfEn/skd2+YO213xSATFDVsl8FzVzVRV/XaxwwVefH2EPD1lAVIvmYjennTVTTsHHtEI9K9iOsEaA==}
|
||||
peerDependencies:
|
||||
platejs: '>=52.0.11'
|
||||
react: '>=18.0.0'
|
||||
react-dom: '>=18.0.0'
|
||||
|
||||
'@platejs/floating@52.0.11':
|
||||
resolution: {integrity: sha512-ApNpw4KWml+kuK+XTTpji+f/7GxTR4nRzlnfJMvGBrJpLPQ4elS5MABm3oUi81DZn+aub5HvsyH7UqCw7F76IA==}
|
||||
peerDependencies:
|
||||
|
|
@ -11105,6 +11115,13 @@ snapshots:
|
|||
react-dnd-html5-backend: 16.0.1
|
||||
react-dom: 19.2.4(react@19.2.4)
|
||||
|
||||
'@platejs/find-replace@52.3.10(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
|
||||
dependencies:
|
||||
platejs: 52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4))
|
||||
react: 19.2.4
|
||||
react-compiler-runtime: 1.0.0(react@19.2.4)
|
||||
react-dom: 19.2.4(react@19.2.4)
|
||||
|
||||
'@platejs/floating@52.0.11(platejs@52.0.17(@types/react@19.2.14)(immer@10.2.0)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(scheduler@0.27.0)(use-sync-external-store@1.6.0(react@19.2.4)))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
|
||||
dependencies:
|
||||
'@floating-ui/core': 1.7.4
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue