feat: improved document, folder mentions rendering
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-09 22:15:51 -07:00
parent 28a02a9143
commit c8374e6c5b
59 changed files with 1725 additions and 361 deletions

View file

@ -46,6 +46,10 @@ class SurfSenseContextSchema:
Read by ``KnowledgePriorityMiddleware`` to seed its priority
list. Stays out of the compiled-agent cache key that's the
whole point of putting it here.
mentioned_folder_ids: KB folders the user @-mentioned this turn
(cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
entries in ``<priority_documents>`` so the agent prioritises
walking those folders with ``ls`` / ``find_documents``.
file_operation_contract: One-shot file operation contract emitted
by ``FileIntentMiddleware`` for the upcoming turn.
turn_id / request_id: Correlation IDs surfaced by the streaming
@ -59,6 +63,7 @@ class SurfSenseContextSchema:
search_space_id: int | None = None
mentioned_document_ids: list[int] = field(default_factory=list)
mentioned_folder_ids: list[int] = field(default_factory=list)
file_operation_contract: FileOperationContractState | None = None
turn_id: str | None = None
request_id: str | None = None

View file

@ -0,0 +1,281 @@
"""Resolve @-mention chips to canonical virtual paths and substitute the
user-visible ``@title`` tokens with backtick-wrapped paths in the prompt
the agent sees.
The frontend's mention seam is a single discriminated-union list of
``{kind: "doc" | "folder", id, title, document_type?}`` chips (see
``surfsense_web/atoms/chat/mentioned-documents.atom.ts``). When a turn
reaches the backend stream task we have three needs that this module
centralises:
1. Map each chip to its canonical virtual path
(``/documents/.../file.xml`` for docs, ``/documents/MyFolder/`` for
folders) so the agent sees concrete filesystem locations instead of
ambiguous ``@``-titles.
2. Substitute ``@title`` tokens in the user-typed text with backtick-
wrapped paths so the path becomes part of the ``HumanMessage`` body
the LLM consumes without rewriting the persisted user message
text (which keeps ``@title`` so chip rendering on reload is
unchanged).
3. Surface the resolved id sets (docs + folders) to the priority
middleware so it can render ``[USER-MENTIONED]`` priority entries
without re-doing path resolution.
This is intentionally one module see the architectural note in
``mention-paths-and-folders`` plan: previously the doc-resolution lived
inline in ``stream_new_chat`` and the folder mention had no resolution
at all. Centralising both behind a single ``resolve_mentions`` call
turns a leaky multi-field seam into a single deeper interface.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.path_resolver import (
DOCUMENTS_ROOT,
build_path_index,
doc_to_virtual_path,
)
from app.db import Document, Folder
from app.schemas.new_chat import MentionedDocumentInfo
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class ResolvedMention:
"""Canonical view of a single @-mention chip.
``virtual_path`` is the path the agent will see (no trailing slash
for documents, trailing ``/`` for folders to match the convention
used by ``KnowledgeTreeMiddleware``).
"""
kind: str # "doc" | "folder"
id: int
title: str
virtual_path: str
@dataclass
class ResolvedMentionSet:
"""Aggregate result of resolving a turn's mention chips.
``token_to_path`` maps ``@title`` (the literal token the user typed
and the editor emitted) to the canonical virtual path for that
chip. It is produced longest-token-first so substitution mirrors
``parseMentionSegments`` on the frontend (a longer title like
``@Project Roadmap`` is never shadowed by a shorter prefix
``@Project``).
``mentioned_document_ids`` collapses doc + surfsense_doc chips into
a single ordered, deduped list because the priority middleware
treats them uniformly downstream see
``KnowledgePriorityMiddleware._compute_priority_paths``.
"""
mentions: list[ResolvedMention] = field(default_factory=list)
token_to_path: list[tuple[str, str]] = field(default_factory=list)
mentioned_document_ids: list[int] = field(default_factory=list)
mentioned_folder_ids: list[int] = field(default_factory=list)
def _folder_virtual_path(folder_id: int, folder_paths: dict[int, str]) -> str:
"""Return ``/documents/Folder/Sub/`` for a folder id.
Falls back to the documents root when the folder is missing from
the index (deleted or in a different search space). Trailing slash
matches ``KnowledgeTreeMiddleware`` (``/documents/MyFolder/``) so
the agent's ``ls`` can dispatch on it as a directory.
"""
base = folder_paths.get(folder_id, DOCUMENTS_ROOT)
return f"{base}/" if not base.endswith("/") else base
async def resolve_mentions(
session: AsyncSession,
*,
search_space_id: int,
mentioned_documents: list[MentionedDocumentInfo] | None,
mentioned_document_ids: list[int] | None = None,
mentioned_surfsense_doc_ids: list[int] | None = None,
mentioned_folder_ids: list[int] | None = None,
) -> ResolvedMentionSet:
"""Resolve every @-mention chip on a turn into virtual paths.
The function takes both the ``mentioned_documents`` discriminated
list (chip metadata used for substitution + persistence) and the
parallel id arrays (``mentioned_document_ids``,
``mentioned_surfsense_doc_ids``, ``mentioned_folder_ids``) for two
reasons:
* Legacy clients that haven't migrated to the unified chip list
still send the id arrays we treat the union as authoritative.
* The id arrays are the canonical input to
``KnowledgePriorityMiddleware`` (via ``SurfSenseContextSchema``);
returning the deduped, validated lists lets the route forward
them unchanged.
Resolution is best-effort: a chip whose id no longer exists (e.g.
document was deleted between mention and submit) is silently
dropped. The agent still sees the user's original text, just
without a backtick-path substitution for that chip.
"""
chip_doc_ids: list[int] = []
chip_folder_ids: list[int] = []
chip_titles_by_id: dict[tuple[str, int], str] = {}
if mentioned_documents:
for chip in mentioned_documents:
kind = chip.kind
if kind == "folder":
chip_folder_ids.append(chip.id)
else:
chip_doc_ids.append(chip.id)
chip_titles_by_id[(kind, chip.id)] = chip.title
doc_id_pool: list[int] = list(
dict.fromkeys(
[
*(mentioned_document_ids or []),
*(mentioned_surfsense_doc_ids or []),
*chip_doc_ids,
]
)
)
folder_id_pool: list[int] = list(
dict.fromkeys([*(mentioned_folder_ids or []), *chip_folder_ids])
)
if not doc_id_pool and not folder_id_pool:
return ResolvedMentionSet()
index = await build_path_index(session, search_space_id)
doc_rows: dict[int, Document] = {}
if doc_id_pool:
result = await session.execute(
select(Document).where(
Document.search_space_id == search_space_id,
Document.id.in_(doc_id_pool),
)
)
for row in result.scalars().all():
doc_rows[row.id] = row
folder_rows: dict[int, Folder] = {}
if folder_id_pool:
result = await session.execute(
select(Folder).where(
Folder.search_space_id == search_space_id,
Folder.id.in_(folder_id_pool),
)
)
for row in result.scalars().all():
folder_rows[row.id] = row
resolved: list[ResolvedMention] = []
accepted_doc_ids: list[int] = []
accepted_folder_ids: list[int] = []
for doc_id in doc_id_pool:
row = doc_rows.get(doc_id)
if row is None:
logger.debug(
"mention_resolver: dropping doc id=%s (not found in space=%s)",
doc_id,
search_space_id,
)
continue
title = chip_titles_by_id.get(("doc", doc_id), str(row.title or ""))
path = doc_to_virtual_path(
doc_id=row.id,
title=str(row.title or "untitled"),
folder_id=row.folder_id,
index=index,
)
resolved.append(
ResolvedMention(kind="doc", id=row.id, title=title, virtual_path=path)
)
accepted_doc_ids.append(row.id)
for folder_id in folder_id_pool:
row = folder_rows.get(folder_id)
if row is None:
logger.debug(
"mention_resolver: dropping folder id=%s (not found in space=%s)",
folder_id,
search_space_id,
)
continue
title = chip_titles_by_id.get(("folder", folder_id), str(row.name or ""))
path = _folder_virtual_path(row.id, index.folder_paths)
resolved.append(
ResolvedMention(kind="folder", id=row.id, title=title, virtual_path=path)
)
accepted_folder_ids.append(row.id)
token_to_path: list[tuple[str, str]] = []
seen_tokens: set[str] = set()
for mention in resolved:
if not mention.title:
continue
token = f"@{mention.title}"
if token in seen_tokens:
continue
seen_tokens.add(token)
token_to_path.append((token, mention.virtual_path))
token_to_path.sort(key=lambda pair: len(pair[0]), reverse=True)
return ResolvedMentionSet(
mentions=resolved,
token_to_path=token_to_path,
mentioned_document_ids=accepted_doc_ids,
mentioned_folder_ids=accepted_folder_ids,
)
def substitute_in_text(text: str, token_to_path: list[tuple[str, str]]) -> str:
"""Replace each ``@title`` token with a backtick-wrapped virtual path.
Mirrors ``parseMentionSegments`` on the frontend: longest token
first, single forward pass, no regex (titles can contain regex
metacharacters). The substitution is idempotent for already-
substituted text because the backtick-wrapped path no longer
starts with ``@``.
Empty / no-op cases short-circuit so callers can pass this through
unconditionally without paying for a scan.
"""
if not text or not token_to_path:
return text
out: list[str] = []
i = 0
n = len(text)
while i < n:
matched: tuple[str, str] | None = None
for token, path in token_to_path:
if text.startswith(token, i):
matched = (token, path)
break
if matched is None:
out.append(text[i])
i += 1
continue
token, path = matched
out.append(f"`{path}`")
i += len(token)
return "".join(out)
__all__ = [
"ResolvedMention",
"ResolvedMentionSet",
"resolve_mentions",
"substitute_in_text",
]

View file

@ -54,6 +54,7 @@ from app.db import (
NATIVE_TO_LEGACY_DOCTYPE,
Chunk,
Document,
Folder,
shielded_async_session,
)
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
@ -832,6 +833,22 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
mention_ids = list(self.mentioned_document_ids)
self.mentioned_document_ids = []
# Folder mentions live alongside doc mentions on the runtime
# context. They never feed hybrid search (folders aren't
# embedded) — they're surfaced purely as ``[USER-MENTIONED]``
# priority entries so the agent walks the folder with ``ls`` /
# ``find_documents`` instead of ignoring it. Cloud filesystem
# mode only.
folder_mention_ids: list[int] = []
if (
ctx is not None
and getattr(self, "filesystem_mode", FilesystemMode.CLOUD)
== FilesystemMode.CLOUD
):
ctx_folders = getattr(ctx, "mentioned_folder_ids", None)
if ctx_folders:
folder_mention_ids = list(ctx_folders)
mentioned_results: list[dict[str, Any]] = []
if mention_ids:
mentioned_results = await fetch_mentioned_documents(
@ -876,16 +893,21 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
priority, matched_chunk_ids = await self._materialize_priority(merged)
if folder_mention_ids:
folder_entries = await self._materialize_folder_priority(folder_mention_ids)
priority = folder_entries + priority
new_messages = list(messages)
insert_at = max(len(new_messages) - 1, 0)
new_messages.insert(insert_at, _render_priority_message(priority))
_perf_log.info(
"[kb_priority] completed in %.3fs query=%r priority=%d mentioned=%d",
"[kb_priority] completed in %.3fs query=%r priority=%d mentioned=%d folders=%d",
asyncio.get_event_loop().time() - t0,
user_text[:80],
len(priority),
len(mentioned_results),
len(folder_mention_ids),
)
return {
@ -894,6 +916,58 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
"messages": new_messages,
}
async def _materialize_folder_priority(
self, folder_ids: list[int]
) -> list[dict[str, Any]]:
"""Resolve user-mentioned folder ids to ``<priority_documents>`` entries.
Each entry uses the canonical ``/documents/Folder/Sub/`` virtual
path (matching ``KnowledgeTreeMiddleware`` and the agent's
``ls`` adapter) and is flagged ``mentioned=True`` so the
rendered line carries ``[USER-MENTIONED]``. ``score`` is left
``None`` so the renderer prints ``n/a`` folders aren't
ranked, the agent decides which children to read.
"""
if not folder_ids:
return []
async with shielded_async_session() as session:
index: PathIndex = await build_path_index(session, self.search_space_id)
folder_rows = await session.execute(
select(Folder.id, Folder.name).where(
Folder.search_space_id == self.search_space_id,
Folder.id.in_(folder_ids),
)
)
folder_titles: dict[int, str] = {
row.id: row.name for row in folder_rows.all()
}
entries: list[dict[str, Any]] = []
seen: set[int] = set()
for folder_id in folder_ids:
if folder_id in seen:
continue
seen.add(folder_id)
base = index.folder_paths.get(folder_id)
if base is None:
logger.debug(
"kb_priority: dropping folder id=%s (missing from path index)",
folder_id,
)
continue
path = base if base.endswith("/") else f"{base}/"
entries.append(
{
"path": path,
"score": None,
"document_id": None,
"folder_id": folder_id,
"title": folder_titles.get(folder_id, ""),
"mentioned": True,
}
)
return entries
async def _materialize_priority(
self, merged: list[dict[str, Any]]
) -> tuple[list[dict[str, Any]], dict[int, list[int]]]:

View file

@ -7,6 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
from app.agents.new_chat.path_resolver import virtual_path_to_doc
from app.db import (
Chunk,
Document,
@ -752,7 +753,24 @@ async def get_document_by_virtual_path(
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Resolve a knowledge-base document id by exact virtual path."""
"""Resolve a knowledge-base document by its agent-facing virtual path.
The agent renders every document under ``/documents/...`` with a
``.xml`` extension appended via ``safe_filename`` (so a PDF titled
``2025-W2.pdf`` becomes ``/documents/2025-W2.pdf.xml``). When the user
clicks that path in an answer, this endpoint must round-trip back to
the underlying ``Document`` row regardless of its type agent-created
NOTE docs (which carry ``virtual_path`` in metadata), uploaded PDFs,
and connector docs all flow through here.
Resolution is delegated to :func:`virtual_path_to_doc`, the single
source of truth that handles:
* ``unique_identifier_hash`` lookup (agent NOTE fast path)
* ``" (<doc_id>).xml"`` disambiguation suffixes
* ``.xml`` extension stripping for title-based fallback
* ``safe_filename`` round-trip for connector titles with lossy chars
"""
try:
await check_permission(
session,
@ -762,24 +780,19 @@ async def get_document_by_virtual_path(
"You don't have permission to read documents in this search space",
)
result = await session.execute(
select(
Document.id,
Document.title,
Document.document_type,
).filter(
Document.search_space_id == search_space_id,
Document.document_metadata["virtual_path"].as_string() == virtual_path,
)
document = await virtual_path_to_doc(
session,
search_space_id=search_space_id,
virtual_path=virtual_path,
)
row = result.first()
if row is None:
if document is None:
raise HTTPException(status_code=404, detail="Document not found")
return DocumentTitleRead(
id=row.id,
title=row.title,
document_type=row.document_type,
id=document.id,
title=document.title,
document_type=document.document_type,
folder_id=document.folder_id,
)
except HTTPException:
raise

View file

@ -1781,6 +1781,7 @@ async def handle_new_chat(
llm_config_id=llm_config_id,
mentioned_document_ids=request.mentioned_document_ids,
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
mentioned_folder_ids=request.mentioned_folder_ids,
mentioned_documents=mentioned_documents_payload,
needs_history_bootstrap=thread.needs_history_bootstrap,
thread_visibility=thread.visibility,
@ -2266,6 +2267,7 @@ async def regenerate_response(
llm_config_id=llm_config_id,
mentioned_document_ids=request.mentioned_document_ids,
mentioned_surfsense_doc_ids=request.mentioned_surfsense_doc_ids,
mentioned_folder_ids=request.mentioned_folder_ids,
mentioned_documents=mentioned_documents_payload,
checkpoint_id=target_checkpoint_id,
needs_history_bootstrap=thread.needs_history_bootstrap,

View file

@ -201,18 +201,34 @@ class NewChatUserImagePart(BaseModel):
class MentionedDocumentInfo(BaseModel):
"""Display metadata for a single ``@``-mentioned document.
"""Display metadata for a single ``@``-mention chip.
The full triple ``{id, title, document_type}`` is forwarded by the
frontend mention chip so the server can embed it in the persisted
user message ``ContentPart[]`` (single ``mentioned-documents`` part).
The history loader then renders the chips on reload without an extra
Carries either a knowledge-base document or a knowledge-base folder
(discriminated by ``kind``). The full triple
``{id, title, document_type}`` is forwarded by the frontend mention
chip so the server can embed it in the persisted user message
``ContentPart[]`` (single ``mentioned-documents`` part). The
history loader then renders the chips on reload without an extra
fetch mirrors the pre-refactor frontend ``persistUserTurn`` shape.
``kind`` defaults to ``"doc"`` so legacy clients and persisted rows
that predate folder mentions deserialise unchanged.
"""
id: int
title: str = Field(..., min_length=1, max_length=500)
document_type: str = Field(..., min_length=1, max_length=100)
kind: Literal["doc", "folder"] = Field(
default="doc",
description=(
"Discriminator for the chip's referent: ``doc`` is a "
"knowledge-base ``Document`` row, ``folder`` is a "
"knowledge-base ``Folder`` row. Folders carry the sentinel "
"``document_type='FOLDER'`` to keep the frontend dedup key "
"``(kind:document_type:id)`` from colliding doc and folder "
"ids that happen to share an integer value."
),
)
class NewChatRequest(BaseModel):
@ -228,15 +244,26 @@ class NewChatRequest(BaseModel):
mentioned_surfsense_doc_ids: list[int] | None = (
None # Optional SurfSense documentation IDs mentioned with @ in the chat
)
mentioned_folder_ids: list[int] | None = Field(
default=None,
description=(
"Optional knowledge-base folder IDs the user mentioned with "
"@. Resolved to virtual paths (``/documents/.../``) by "
"``mention_resolver`` and surfaced to the agent via "
"(a) backtick-wrapped substitution in ``user_query`` and "
"(b) a ``[USER-MENTIONED]`` entry in ``<priority_documents>``. "
"The agent's ``ls`` tool can then walk the folder itself."
),
)
mentioned_documents: list[MentionedDocumentInfo] | None = Field(
default=None,
description=(
"Display metadata (id, title, document_type) for every "
"@-mentioned document. Persisted as a ``mentioned-documents`` "
"ContentPart on the user message so reload renders chips "
"without an extra fetch. Optional and additive — when None "
"the user message is persisted without a mentioned-documents "
"part."
"Display metadata (id, title, document_type, kind) for every "
"@-mention chip — both documents and folders. Persisted as a "
"``mentioned-documents`` ContentPart on the user message so "
"reload renders chips without an extra fetch. Optional and "
"additive — when None the user message is persisted without "
"a mentioned-documents part."
),
)
disabled_tools: list[str] | None = (
@ -290,14 +317,22 @@ class RegenerateRequest(BaseModel):
)
mentioned_document_ids: list[int] | None = None
mentioned_surfsense_doc_ids: list[int] | None = None
mentioned_folder_ids: list[int] | None = Field(
default=None,
description=(
"Optional knowledge-base folder IDs the user mentioned with "
"@ on the edited user turn. Only used when ``user_query`` is "
"non-None (edit). Mirrors ``NewChatRequest.mentioned_folder_ids``."
),
)
mentioned_documents: list[MentionedDocumentInfo] | None = Field(
default=None,
description=(
"Display metadata (id, title, document_type) for every "
"@-mentioned document on the edited user turn. Only used "
"when ``user_query`` is non-None (edit). Persisted as a "
"``mentioned-documents`` ContentPart on the new user "
"message. None means no chip metadata."
"Display metadata (id, title, document_type, kind) for every "
"@-mention chip on the edited user turn — both documents and "
"folders. Only used when ``user_query`` is non-None (edit). "
"Persisted as a ``mentioned-documents`` ContentPart on the "
"new user message. None means no chip metadata."
),
)
disabled_tools: list[str] | None = None
@ -373,6 +408,16 @@ class ResumeRequest(BaseModel):
filesystem_mode: Literal["cloud", "desktop_local_folder"] = "cloud"
client_platform: Literal["web", "desktop"] = "web"
local_filesystem_mounts: list[LocalFilesystemMountPayload] | None = None
mentioned_folder_ids: list[int] | None = Field(
default=None,
description=(
"Forwarded for symmetry with /new_chat and /regenerate. "
"Resume reuses the original interrupted user turn so this "
"field is informational only — the originating turn's "
"folder mentions already shaped the priority hints baked "
"into the agent's checkpoint."
),
)
mentioned_documents: list[MentionedDocumentInfo] | None = Field(
default=None,
description=(

View file

@ -43,9 +43,7 @@ class EmitterRegistry:
return main_emitter()
def has_active_subagents(self) -> bool:
return any(
emitter.level == "subagent" for emitter in self._by_run_id.values()
)
return any(emitter.level == "subagent" for emitter in self._by_run_id.values())
def clear(self) -> None:
self._by_run_id.clear()

View file

@ -6,9 +6,7 @@ from ..emitter import Emitter, attach_emitted_by
from ..envelope import format_sse
def format_reasoning_start(
reasoning_id: str, *, emitter: Emitter | None = None
) -> str:
def format_reasoning_start(reasoning_id: str, *, emitter: Emitter | None = None) -> str:
return format_sse(
attach_emitted_by({"type": "reasoning-start", "id": reasoning_id}, emitter)
)
@ -28,9 +26,7 @@ def format_reasoning_delta(
)
def format_reasoning_end(
reasoning_id: str, *, emitter: Emitter | None = None
) -> str:
def format_reasoning_end(reasoning_id: str, *, emitter: Emitter | None = None) -> str:
return format_sse(
attach_emitted_by({"type": "reasoning-end", "id": reasoning_id}, emitter)
)

View file

@ -7,9 +7,7 @@ from ..envelope import format_sse
def format_text_start(text_id: str, *, emitter: Emitter | None = None) -> str:
return format_sse(
attach_emitted_by({"type": "text-start", "id": text_id}, emitter)
)
return format_sse(attach_emitted_by({"type": "text-start", "id": text_id}, emitter))
def format_text_delta(
@ -26,6 +24,4 @@ def format_text_delta(
def format_text_end(text_id: str, *, emitter: Emitter | None = None) -> str:
return format_sse(
attach_emitted_by({"type": "text-end", "id": text_id}, emitter)
)
return format_sse(attach_emitted_by({"type": "text-end", "id": text_id}, emitter))

View file

@ -84,9 +84,7 @@ class StreamingService:
def format_step_finish(self, *, emitter: Emitter | None = None) -> str:
return lifecycle.format_step_finish(emitter=emitter)
def format_text_start(
self, text_id: str, *, emitter: Emitter | None = None
) -> str:
def format_text_start(self, text_id: str, *, emitter: Emitter | None = None) -> str:
return text.format_text_start(text_id, emitter=emitter)
def format_text_delta(
@ -94,9 +92,7 @@ class StreamingService:
) -> str:
return text.format_text_delta(text_id, delta, emitter=emitter)
def format_text_end(
self, text_id: str, *, emitter: Emitter | None = None
) -> str:
def format_text_end(self, text_id: str, *, emitter: Emitter | None = None) -> str:
return text.format_text_end(text_id, emitter=emitter)
def format_reasoning_start(

View file

@ -51,7 +51,9 @@ logger = logging.getLogger(__name__)
_MEANINGFUL_PART_TYPES: frozenset[str] = frozenset({"text", "reasoning", "tool-call"})
def _merge_tool_part_metadata(part: dict[str, Any], metadata: dict[str, Any] | None) -> None:
def _merge_tool_part_metadata(
part: dict[str, Any], metadata: dict[str, Any] | None
) -> None:
"""Shallow-merge ``metadata`` into ``part["metadata"]``; first key wins.
Used for tool-call linkage (``spanId``, ``thinkingStepId``, ): a later

View file

@ -109,17 +109,18 @@ def _build_user_content(
[{"type": "text", "text": "..."},
{"type": "image", "image": "data:..."},
{"type": "mentioned-documents", "documents": [{"id": int,
"title": str, "document_type": str}, ...]}]
"title": str, "document_type": str, "kind": "doc" | "folder"},
...]}]
The companion reader is
``app.utils.user_message_multimodal.split_persisted_user_content_parts``
which expects exactly this shape keep them in sync.
``mentioned_documents``: optional list of ``{id, title, document_type}``
dicts. When non-empty (and a ``mentioned-documents`` part is not already
in some other input shape), a single ``{"type": "mentioned-documents",
"documents": [...]}`` part is appended. Mirrors the FE injection at
``page.tsx:281-286`` (``persistUserTurn``).
``mentioned_documents``: optional list of mention chip dicts. Each
dict may include a ``kind`` discriminator (``"doc"`` or ``"folder"``)
so the persisted ContentPart round-trips folder chips on reload.
When ``kind`` is missing we default to ``"doc"`` so legacy clients
that haven't migrated to the union schema still persist correctly.
"""
parts: list[dict[str, Any]] = [{"type": "text", "text": user_query or ""}]
for url in user_image_data_urls or ():
@ -135,11 +136,14 @@ def _build_user_content(
document_type = doc.get("document_type")
if doc_id is None or title is None or document_type is None:
continue
kind_raw = doc.get("kind", "doc")
kind = kind_raw if kind_raw in ("doc", "folder") else "doc"
normalized.append(
{
"id": doc_id,
"title": str(title),
"document_type": str(document_type),
"kind": kind,
}
)
if normalized:

View file

@ -43,6 +43,7 @@ from app.agents.new_chat.memory_extraction import (
extract_and_save_memory,
extract_and_save_team_memory,
)
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
from app.agents.new_chat.middleware.busy_mutex import (
end_turn,
get_cancel_state,
@ -929,6 +930,7 @@ async def stream_new_chat(
llm_config_id: int = -1,
mentioned_document_ids: list[int] | None = None,
mentioned_surfsense_doc_ids: list[int] | None = None,
mentioned_folder_ids: list[int] | None = None,
mentioned_documents: list[dict[str, Any]] | None = None,
checkpoint_id: str | None = None,
needs_history_bootstrap: bool = False,
@ -958,6 +960,7 @@ async def stream_new_chat(
needs_history_bootstrap: If True, load message history from DB (for cloned chats)
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
mentioned_folder_ids: Optional list of knowledge-base folder IDs mentioned with @ (cloud mode)
checkpoint_id: Optional checkpoint ID to rewind/fork from (for edit/reload operations)
Yields:
@ -1502,6 +1505,53 @@ async def stream_new_chat(
)
recent_reports = list(recent_reports_result.scalars().all())
# Resolve @-mention chips to canonical virtual paths and rewrite
# the user-typed text so the LLM sees ``\`/documents/...\``` instead
# of bare ``@title``. The persisted user-message text keeps
# ``@title`` so chip rendering on reload is unchanged — see
# ``persistence._build_user_content``.
#
# Cloud mode only: local-folder mode keeps the legacy
# ``@title`` text path; mention support there is a follow-up
# task because the path scheme (mount-rooted) and the picker
# UI both need separate work.
accepted_folder_ids: list[int] = []
if fs_mode == FilesystemMode.CLOUD.value and (
mentioned_document_ids
or mentioned_surfsense_doc_ids
or mentioned_folder_ids
or mentioned_documents
):
from app.schemas.new_chat import (
MentionedDocumentInfo as _MentionedDocumentInfo,
)
chip_objs: list[_MentionedDocumentInfo] | None = None
if mentioned_documents:
chip_objs = []
for raw in mentioned_documents:
if isinstance(raw, _MentionedDocumentInfo):
chip_objs.append(raw)
continue
try:
chip_objs.append(_MentionedDocumentInfo.model_validate(raw))
except Exception:
logger.debug(
"stream_new_chat: dropping malformed mention chip %r",
raw,
)
resolved = await resolve_mentions(
session,
search_space_id=search_space_id,
mentioned_documents=chip_objs,
mentioned_document_ids=mentioned_document_ids,
mentioned_surfsense_doc_ids=mentioned_surfsense_doc_ids,
mentioned_folder_ids=mentioned_folder_ids,
)
user_query = substitute_in_text(user_query, resolved.token_to_path)
accepted_folder_ids = resolved.mentioned_folder_ids
# Format the user query with context (SurfSense docs + reports only)
final_query = user_query
context_parts = []
@ -1901,6 +1951,9 @@ async def stream_new_chat(
runtime_context = SurfSenseContextSchema(
search_space_id=search_space_id,
mentioned_document_ids=list(mentioned_document_ids or []),
mentioned_folder_ids=list(
accepted_folder_ids or mentioned_folder_ids or []
),
request_id=request_id,
turn_id=stream_result.turn_id,
)

View file

@ -26,9 +26,7 @@ def handle_report_progress(
return None, last_active_step_items
phase = data.get("phase", "")
topic_items = [
item for item in last_active_step_items if item.startswith("Topic:")
]
topic_items = [item for item in last_active_step_items if item.startswith("Topic:")]
if phase in ("revising_section", "adding_section"):
plan_items = [
@ -56,7 +54,9 @@ def handle_report_progress(
return frame, new_items
def handle_document_created(data: dict[str, Any], *, streaming_service: Any) -> str | None:
def handle_document_created(
data: dict[str, Any], *, streaming_service: Any
) -> str | None:
if not data.get("id"):
return None
return streaming_service.format_data(

View file

@ -13,7 +13,9 @@ from app.tasks.chat.streaming.handlers.tools import (
)
from app.tasks.chat.streaming.helpers.tool_output import tool_output_has_error
from app.tasks.chat.streaming.relay.state import AgentEventRelayState
from app.tasks.chat.streaming.relay.task_span import clear_task_span_if_delegating_task_ended
from app.tasks.chat.streaming.relay.task_span import (
clear_task_span_if_delegating_task_ended,
)
from app.tasks.chat.streaming.relay.thinking_step_sse import emit_thinking_step_frame
@ -32,9 +34,7 @@ def iter_tool_end_frames(
run_id = event.get("run_id", "")
tool_name = event.get("name", "unknown_tool")
raw_output = event.get("data", {}).get("output", "")
staged_file_path = (
state.file_path_by_run.pop(run_id, None) if run_id else None
)
staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None
if tool_name == "update_memory":
state.called_update_memory = True
@ -116,6 +116,4 @@ def iter_tool_end_frames(
)
yield from iter_tool_completion_emission_frames(emission_ctx)
clear_task_span_if_delegating_task_ended(
state, tool_name=tool_name, run_id=run_id
)
clear_task_span_if_delegating_task_ended(state, tool_name=tool_name, run_id=run_id)

View file

@ -15,7 +15,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
return default_thinking.resolve_completed_thinking(
tool_name, tool_output, last_items

View file

@ -23,7 +23,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items

View file

@ -34,7 +34,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items

View file

@ -29,7 +29,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items
@ -44,9 +46,7 @@ def resolve_completed_thinking(
else "Report"
)
word_count = (
tool_output.get("word_count", 0)
if isinstance(tool_output, dict)
else 0
tool_output.get("word_count", 0) if isinstance(tool_output, dict) else 0
)
is_revision = (
tool_output.get("is_revision", False)

View file

@ -17,7 +17,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
return default_thinking.resolve_completed_thinking(
tool_name, tool_output, last_items

View file

@ -17,7 +17,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items

View file

@ -21,7 +21,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items

View file

@ -21,7 +21,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Editing file", last_items)

View file

@ -24,9 +24,7 @@ def iter_completion_emission_frames(
output_text = om.group(1) if om else ""
thread_id_str = ctx.langgraph_config.get("configurable", {}).get("thread_id", "")
for sf_match in re.finditer(
r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE
):
for sf_match in re.finditer(r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE):
fpath = sf_match.group(1).strip()
if fpath and fpath not in ctx.stream_result.sandbox_files:
ctx.stream_result.sandbox_files.append(fpath)

View file

@ -22,7 +22,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items

View file

@ -21,7 +21,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Searching files", last_items)

View file

@ -25,7 +25,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Searching content", last_items)

View file

@ -20,7 +20,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
if isinstance(tool_output, dict):
@ -38,9 +40,7 @@ def resolve_completed_thinking(
paths = [str(p) for p in parsed]
except (ValueError, SyntaxError):
paths = [
line.strip()
for line in ls_output.strip().split("\n")
if line.strip()
line.strip() for line in ls_output.strip().split("\n") if line.strip()
]
for p in paths:
name = p.rstrip("/").split("/")[-1]

View file

@ -17,11 +17,15 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
d = as_tool_input_dict(tool_input)
p = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input)
display = p if len(p) <= 80 else "" + p[-77:]
return ToolStartThinking(title="Creating folder", items=[display] if display else [])
return ToolStartThinking(
title="Creating folder", items=[display] if display else []
)
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Creating folder", last_items)

View file

@ -27,7 +27,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Moving file", last_items)

View file

@ -21,7 +21,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Reading file", last_items)

View file

@ -22,7 +22,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Deleting file", last_items)

View file

@ -17,11 +17,15 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
d = as_tool_input_dict(tool_input)
p = d.get("path", "") if isinstance(tool_input, dict) else str(tool_input)
display = p if len(p) <= 80 else "" + p[-77:]
return ToolStartThinking(title="Deleting folder", items=[display] if display else [])
return ToolStartThinking(
title="Deleting folder", items=[display] if display else []
)
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Deleting folder", last_items)

View file

@ -21,7 +21,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Writing file", last_items)

View file

@ -20,15 +20,15 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
return ToolStartThinking(
title="Planning tasks",
items=(
[f"{todo_count} task{'s' if todo_count != 1 else ''}"]
if todo_count
else []
[f"{todo_count} task{'s' if todo_count != 1 else ''}"] if todo_count else []
),
)
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_output, tool_name
return ("Planning tasks", last_items)

View file

@ -58,14 +58,18 @@ def _emission_module(tool_name: str) -> str:
def _import_thinking(tool_name: str):
try:
return importlib.import_module(f"{_BASE}.{_thinking_module(tool_name)}.thinking")
return importlib.import_module(
f"{_BASE}.{_thinking_module(tool_name)}.thinking"
)
except ModuleNotFoundError:
return importlib.import_module(f"{_BASE}.default.thinking")
def _import_emission(tool_name: str):
try:
return importlib.import_module(f"{_BASE}.{_emission_module(tool_name)}.emission")
return importlib.import_module(
f"{_BASE}.{_emission_module(tool_name)}.emission"
)
except ModuleNotFoundError:
return importlib.import_module(f"{_BASE}.default.emission")

View file

@ -23,7 +23,9 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking
def resolve_completed_thinking(
tool_name: str, tool_output: Any, last_items: list[str],
tool_name: str,
tool_output: Any,
last_items: list[str],
) -> tuple[str, list[str]]:
del tool_name
items = last_items

View file

@ -28,11 +28,7 @@ def iter_completion_emission_frames(
xml,
):
chunk_url, content = m.group(1).strip(), m.group(2).strip()
if (
chunk_url.startswith("http")
and chunk_url in citations
and content
):
if chunk_url.startswith("http") and chunk_url in citations and content:
citations[chunk_url]["snippet"] = (
content[:200] + "" if len(content) > 200 else content
)

View file

@ -367,18 +367,26 @@ class TestPersistUserTurn:
db_thread,
patched_shielded_session,
):
"""The full ``{id, title, document_type}`` triple forwarded by
the FE must round-trip into a single ``mentioned-documents``
ContentPart on the persisted user message the history loader
renders the chips on reload from this part directly.
"""The full ``{id, title, document_type, kind}`` chip metadata
forwarded by the FE must round-trip into a single
``mentioned-documents`` ContentPart on the persisted user
message the history loader renders the chips on reload from
this part directly. Folder chips ride alongside doc chips so
the FE can render mixed mention bars without a second fetch.
"""
thread_id = db_thread.id
user_id_str = str(db_user.id)
turn_id = f"{thread_id}:8200"
mentioned = [
{"id": 11, "title": "Alpha", "document_type": "GENERAL"},
{"id": 22, "title": "Beta", "document_type": "GENERAL"},
{"id": 11, "title": "Alpha", "document_type": "GENERAL", "kind": "doc"},
{"id": 22, "title": "Beta", "document_type": "GENERAL", "kind": "doc"},
{
"id": 33,
"title": "Reports",
"document_type": "FOLDER",
"kind": "folder",
},
]
msg_id = await persist_user_turn(
chat_id=thread_id,
@ -397,8 +405,61 @@ class TestPersistUserTurn:
assert row.content[1] == {
"type": "mentioned-documents",
"documents": [
{"id": 11, "title": "Alpha", "document_type": "GENERAL"},
{"id": 22, "title": "Beta", "document_type": "GENERAL"},
{"id": 11, "title": "Alpha", "document_type": "GENERAL", "kind": "doc"},
{"id": 22, "title": "Beta", "document_type": "GENERAL", "kind": "doc"},
{
"id": 33,
"title": "Reports",
"document_type": "FOLDER",
"kind": "folder",
},
],
}
async def test_legacy_chip_without_kind_defaults_to_doc(
self,
db_session,
db_user,
db_thread,
patched_shielded_session,
):
"""Pre-folder clients send chips without ``kind``. The persistence
layer defaults them to ``"doc"`` so the round-trip stays
consistent on reload the FE schema's optional default
produces the same value, but persisting it explicitly keeps
the DB row self-describing.
"""
thread_id = db_thread.id
user_id_str = str(db_user.id)
turn_id = f"{thread_id}:8201"
mentioned = [
{"id": 77, "title": "Legacy", "document_type": "GENERAL"},
]
msg_id = await persist_user_turn(
chat_id=thread_id,
user_id=user_id_str,
turn_id=turn_id,
user_query="hi",
mentioned_documents=mentioned,
)
assert isinstance(msg_id, int)
row = await db_session.get(NewChatMessage, msg_id)
assert row is not None
assert isinstance(row.content, list)
mentioned_part = next(
p for p in row.content if p.get("type") == "mentioned-documents"
)
assert mentioned_part == {
"type": "mentioned-documents",
"documents": [
{
"id": 77,
"title": "Legacy",
"document_type": "GENERAL",
"kind": "doc",
},
],
}

View file

@ -0,0 +1,285 @@
"""Tests for the @-mention resolver.
These tests pin down the contract that ``mention_resolver`` is the
single seam between ``MentionedDocumentInfo`` chips (frontend) and the
canonical ``/documents/...`` virtual paths (agent). The streaming task,
priority middleware, and persistence layer all consume the resolver's
output keeping the tests focused on substitute-in-text + the
returned id partition keeps the seam stable across refactors.
"""
from __future__ import annotations
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from app.agents.new_chat import mention_resolver
from app.agents.new_chat.mention_resolver import (
ResolvedMention,
ResolvedMentionSet,
resolve_mentions,
substitute_in_text,
)
from app.agents.new_chat.path_resolver import DOCUMENTS_ROOT, PathIndex
from app.schemas.new_chat import MentionedDocumentInfo
pytestmark = pytest.mark.unit
class TestSubstituteInText:
"""``substitute_in_text`` is a pure string transform and is exercised
on every cloud-mode turn, so it has to be both fast and behaviour-
identical to the frontend's ``parseMentionSegments`` (longest-token
first, single forward pass)."""
def test_returns_text_unchanged_when_no_tokens(self):
assert substitute_in_text("hello @foo", []) == "hello @foo"
def test_returns_text_unchanged_when_empty(self):
assert substitute_in_text("", [("@x", "/documents/x.xml")]) == ""
def test_replaces_single_token_with_backtick_path(self):
out = substitute_in_text(
"see @notes please",
[("@notes", "/documents/notes.xml")],
)
assert out == "see `/documents/notes.xml` please"
def test_longest_token_wins_over_prefix(self):
# ``@Project Roadmap`` must NOT be partially matched by ``@Project``.
# Mirrors the FE's parseMentionSegments contract.
token_to_path = [
("@Project Roadmap", "/documents/Roadmap.xml"),
("@Project", "/documents/Project.xml"),
]
out = substitute_in_text("about @Project Roadmap today", token_to_path)
assert out == "about `/documents/Roadmap.xml` today"
def test_handles_repeated_mentions(self):
out = substitute_in_text(
"@A and @A again @B",
[
("@A", "/documents/a.xml"),
("@B", "/documents/b.xml"),
],
)
assert (
out == "`/documents/a.xml` and `/documents/a.xml` again `/documents/b.xml`"
)
def test_does_not_match_inside_word(self):
# Substitution is positional — there's no word-boundary semantics.
# ``@Pro`` inside ``foo@Project`` still matches; this is the same
# behaviour as parseMentionSegments. The test pins it so a
# future "fix" doesn't accidentally diverge between FE/BE.
out = substitute_in_text("foo@Pro", [("@Pro", "/documents/p.xml")])
assert out == "foo`/documents/p.xml`"
def test_idempotent_after_substitution(self):
# The output starts with a backtick, not ``@``, so re-running
# the substitution leaves it alone.
once = substitute_in_text("@A", [("@A", "/documents/a.xml")])
twice = substitute_in_text(once, [("@A", "/documents/a.xml")])
assert once == twice
class TestResolveMentions:
"""``resolve_mentions`` resolves chip ids → virtual paths and emits
a ``ResolvedMentionSet`` whose id partitions feed
``KnowledgePriorityMiddleware``."""
@pytest.mark.asyncio
async def test_returns_empty_when_no_mentions(self):
session = MagicMock()
session.execute = AsyncMock()
result = await resolve_mentions(
session,
search_space_id=1,
mentioned_documents=None,
)
assert isinstance(result, ResolvedMentionSet)
assert result.mentions == []
assert result.token_to_path == []
assert result.mentioned_document_ids == []
assert result.mentioned_folder_ids == []
# No DB roundtrips when there's nothing to resolve.
session.execute.assert_not_awaited()
@pytest.mark.asyncio
async def test_resolves_doc_chip_to_virtual_path(self, monkeypatch):
chip = MentionedDocumentInfo(
id=42,
title="Notes",
document_type="EXTENSION",
kind="doc",
)
doc_row = SimpleNamespace(id=42, title="Notes", folder_id=None)
async def fake_build_index(_session, _ssid):
return PathIndex()
monkeypatch.setattr(mention_resolver, "build_path_index", fake_build_index)
scalars = MagicMock()
scalars.all.return_value = [doc_row]
result = MagicMock()
result.scalars.return_value = scalars
session = MagicMock()
session.execute = AsyncMock(return_value=result)
out = await resolve_mentions(
session,
search_space_id=5,
mentioned_documents=[chip],
)
assert len(out.mentions) == 1
mention = out.mentions[0]
assert mention.kind == "doc"
assert mention.id == 42
assert mention.virtual_path == f"{DOCUMENTS_ROOT}/Notes.xml"
assert out.mentioned_document_ids == [42]
assert out.mentioned_folder_ids == []
assert ("@Notes", f"{DOCUMENTS_ROOT}/Notes.xml") in out.token_to_path
@pytest.mark.asyncio
async def test_resolves_folder_chip_with_trailing_slash(self, monkeypatch):
chip = MentionedDocumentInfo(
id=9,
title="Reports",
document_type="FOLDER",
kind="folder",
)
folder_row = SimpleNamespace(id=9, name="Reports")
async def fake_build_index(_session, _ssid):
return PathIndex(folder_paths={9: f"{DOCUMENTS_ROOT}/Reports"})
monkeypatch.setattr(mention_resolver, "build_path_index", fake_build_index)
scalars = MagicMock()
scalars.all.return_value = [folder_row]
result = MagicMock()
result.scalars.return_value = scalars
session = MagicMock()
session.execute = AsyncMock(return_value=result)
out = await resolve_mentions(
session,
search_space_id=3,
mentioned_documents=[chip],
)
assert len(out.mentions) == 1
mention = out.mentions[0]
assert mention.kind == "folder"
assert mention.id == 9
assert mention.virtual_path == f"{DOCUMENTS_ROOT}/Reports/"
assert out.mentioned_document_ids == []
assert out.mentioned_folder_ids == [9]
@pytest.mark.asyncio
async def test_drops_chip_when_doc_is_missing(self, monkeypatch):
chip = MentionedDocumentInfo(
id=99, title="ghost", document_type="EXTENSION", kind="doc"
)
async def fake_build_index(_session, _ssid):
return PathIndex()
monkeypatch.setattr(mention_resolver, "build_path_index", fake_build_index)
scalars = MagicMock()
scalars.all.return_value = []
result = MagicMock()
result.scalars.return_value = scalars
session = MagicMock()
session.execute = AsyncMock(return_value=result)
out = await resolve_mentions(
session,
search_space_id=1,
mentioned_documents=[chip],
)
assert out.mentions == []
assert out.mentioned_document_ids == []
assert out.token_to_path == []
@pytest.mark.asyncio
async def test_token_to_path_is_longest_first(self, monkeypatch):
# Two chips whose titles are prefixes of each other — the
# resolver MUST sort longest-first so substitution doesn't
# break the ``@Project Roadmap`` vs ``@Project`` invariant.
chip_short = MentionedDocumentInfo(
id=1, title="A", document_type="EXTENSION", kind="doc"
)
chip_long = MentionedDocumentInfo(
id=2, title="A long one", document_type="EXTENSION", kind="doc"
)
rows = [
SimpleNamespace(id=1, title="A", folder_id=None),
SimpleNamespace(id=2, title="A long one", folder_id=None),
]
async def fake_build_index(_session, _ssid):
return PathIndex()
monkeypatch.setattr(mention_resolver, "build_path_index", fake_build_index)
scalars = MagicMock()
scalars.all.return_value = rows
result = MagicMock()
result.scalars.return_value = scalars
session = MagicMock()
session.execute = AsyncMock(return_value=result)
out = await resolve_mentions(
session,
search_space_id=1,
mentioned_documents=[chip_short, chip_long],
)
tokens = [tok for tok, _ in out.token_to_path]
assert tokens == sorted(tokens, key=len, reverse=True)
@pytest.mark.asyncio
async def test_legacy_id_arrays_resolve_without_chip_metadata(self, monkeypatch):
# ``mentioned_document_ids`` (the legacy parallel array) must
# still resolve when no chip metadata is available — covers
# callers that haven't migrated to the discriminated chip list.
doc_row = SimpleNamespace(id=7, title="Legacy", folder_id=None)
async def fake_build_index(_session, _ssid):
return PathIndex()
monkeypatch.setattr(mention_resolver, "build_path_index", fake_build_index)
scalars = MagicMock()
scalars.all.return_value = [doc_row]
result = MagicMock()
result.scalars.return_value = scalars
session = MagicMock()
session.execute = AsyncMock(return_value=result)
out = await resolve_mentions(
session,
search_space_id=2,
mentioned_documents=None,
mentioned_document_ids=[7],
)
assert out.mentioned_document_ids == [7]
assert len(out.mentions) == 1
assert out.mentions[0].title == "Legacy"
class TestResolvedMentionEquality:
"""Smoke check on the dataclass behaviour we rely on for asserting
test outputs."""
def test_equal_when_fields_equal(self):
a = ResolvedMention(
kind="doc", id=1, title="x", virtual_path="/documents/x.xml"
)
b = ResolvedMention(
kind="doc", id=1, title="x", virtual_path="/documents/x.xml"
)
assert a == b

View file

@ -196,3 +196,50 @@ class TestVirtualPathToDoc:
)
assert document is target_doc
assert session.execute.await_count == 2
@pytest.mark.asyncio
async def test_resolves_double_extension_for_uploaded_pdf(self):
# Regression: the agent renders every KB document under
# ``/documents/`` with a trailing ``.xml`` (via ``safe_filename``),
# so an uploaded PDF whose DB title is ``2025-W2.pdf`` shows up as
# ``/documents/2025-W2.pdf.xml`` in answers. Clicking that path
# must round-trip back to the row even though the title itself
# does NOT end in ``.xml``.
target_doc = SimpleNamespace(id=99, title="2025-W2.pdf", folder_id=None)
session = MagicMock()
session.execute = AsyncMock(
side_effect=[
_result_from_one(None),
_result_from_scalars([target_doc]),
]
)
document = await virtual_path_to_doc(
session,
search_space_id=5,
virtual_path=f"{DOCUMENTS_ROOT}/2025-W2.pdf.xml",
)
assert document is target_doc
@pytest.mark.asyncio
async def test_resolves_path_without_xml_suffix(self):
# The user (or a hand-edited link) may pass the title-only form
# ``/documents/2025-W2.pdf``. The resolver must still find the row
# by literal title equality.
target_doc = SimpleNamespace(id=99, title="2025-W2.pdf", folder_id=None)
session = MagicMock()
session.execute = AsyncMock(
side_effect=[
_result_from_one(None),
_result_from_scalars([target_doc]),
]
)
document = await virtual_path_to_doc(
session,
search_space_id=5,
virtual_path=f"{DOCUMENTS_ROOT}/2025-W2.pdf",
)
assert document is target_doc

View file

@ -118,12 +118,8 @@ def test_get_by_tool_call_id_matches_action_request_payload() -> None:
tasks=(
_Task(
interrupts=(
_Interrupt(
value=_hitl("a", tool_call_id="call_xxx"), id="int_a"
),
_Interrupt(
value=_hitl("b", tool_call_id="call_yyy"), id="int_b"
),
_Interrupt(value=_hitl("a", tool_call_id="call_xxx"), id="int_a"),
_Interrupt(value=_hitl("b", tool_call_id="call_yyy"), id="int_b"),
)
),
)
@ -146,9 +142,7 @@ def test_first_pending_interrupt_matches_legacy_first_wins_behaviour() -> None:
def test_interrupt_without_id_falls_back_to_none() -> None:
"""Snapshots from older LangGraph versions may omit ``id`` — preserve that."""
state = _State(
tasks=(_Task(interrupts=(_Interrupt(value=_hitl("a"), id=None),)),)
)
state = _State(tasks=(_Task(interrupts=(_Interrupt(value=_hitl("a"), id=None),)),))
pending = list_pending_interrupts(state)
assert len(pending) == 1
assert pending[0].interrupt_id is None

View file

@ -37,9 +37,7 @@ def test_custom_interrupt_primitive_is_converted_to_canonical_shape() -> None:
"context": {"reason": "destructive"},
}
out = normalize_interrupt_payload(raw)
assert out["action_requests"] == [
{"name": "send_email", "args": {"to": "a@b"}}
]
assert out["action_requests"] == [{"name": "send_email", "args": {"to": "a@b"}}]
assert out["review_configs"] == [
{
"action_name": "send_email",

View file

@ -158,9 +158,7 @@ def _classify_cases() -> list[Exception]:
"""Inputs that the FE depends on being mapped to specific error codes."""
return [
Exception("totally generic error"),
Exception(
'{"error":{"type":"rate_limit_error","message":"slow down"}}'
),
Exception('{"error":{"type":"rate_limit_error","message":"slow down"}}'),
Exception(
'OpenrouterException - {"error":{"message":"Provider returned error",'
'"code":429}}'
@ -220,7 +218,7 @@ class _FakeStreamingService:
self.calls.append(
{"message": message, "error_code": error_code, "extra": extra}
)
return f"data: {{\"type\":\"error\",\"errorText\":\"{message}\"}}\n\n"
return f'data: {{"type":"error","errorText":"{message}"}}\n\n'
def test_emit_stream_terminal_error_matches_old_output_and_logs(caplog) -> None:

View file

@ -60,8 +60,14 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:
service = _StreamingService()
agent = _Agent(
[
{"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content="Hello")}},
{"event": "on_chat_model_stream", "data": {"chunk": _Chunk(content=" world")}},
{
"event": "on_chat_model_stream",
"data": {"chunk": _Chunk(content="Hello")},
},
{
"event": "on_chat_model_stream",
"data": {"chunk": _Chunk(content=" world")},
},
]
)
result = StreamingResult()

View file

@ -37,7 +37,9 @@ def test_clear_ignored_for_non_task_tool() -> None:
def test_clear_ignored_when_task_run_id_mismatches() -> None:
state = AgentEventRelayState.for_invocation()
open_task_span(state, run_id="run-open")
clear_task_span_if_delegating_task_ended(state, tool_name="task", run_id="run-other")
clear_task_span_if_delegating_task_ended(
state, tool_name="task", run_id="run-other"
)
assert state.active_span_id is not None
assert state.active_task_run_id == "run-open"

View file

@ -240,9 +240,7 @@ class TestToolHeavyTurn:
class TestToolCallSpanMetadata:
def test_input_available_merges_new_metadata_keys_after_start(self):
b = AssistantContentBuilder()
b.on_tool_input_start(
"call_t", "task", "lc_t", metadata={"spanId": "spn_1"}
)
b.on_tool_input_start("call_t", "task", "lc_t", metadata={"spanId": "spn_1"})
b.on_tool_input_available(
"call_t",
"task",
@ -257,9 +255,7 @@ class TestToolCallSpanMetadata:
def test_input_available_does_not_overwrite_existing_metadata_keys(self):
b = AssistantContentBuilder()
b.on_tool_input_start(
"call_t", "task", "lc_t", metadata={"spanId": "spn_keep"}
)
b.on_tool_input_start("call_t", "task", "lc_t", metadata={"spanId": "spn_keep"})
b.on_tool_input_available(
"call_t", "task", {}, "lc_t", metadata={"spanId": "spn_other"}
)

View file

@ -199,12 +199,16 @@ function pairBundleToolCallIds(
}
/**
* Zod schema for mentioned document info (for type-safe parsing)
* Zod schema for mentioned document info (for type-safe parsing).
*
* ``kind`` defaults to ``"doc"`` so messages persisted before folder
* mentions existed deserialise unchanged.
*/
const MentionedDocumentInfoSchema = z.object({
id: z.number(),
title: z.string(),
document_type: z.string(),
kind: z.union([z.literal("doc"), z.literal("folder")]).optional().default("doc"),
});
const MentionedDocumentsPartSchema = z.object({
@ -913,18 +917,29 @@ export default function NewChatPage() {
hasAttachments: userImages.length > 0,
hasMentionedDocuments:
mentionedDocumentIds.surfsense_doc_ids.length > 0 ||
mentionedDocumentIds.document_ids.length > 0,
mentionedDocumentIds.document_ids.length > 0 ||
mentionedDocumentIds.folder_ids.length > 0,
messageLength: userQuery.length,
});
// Collect unique mentioned docs for display & persistence
// Collect unique mention chips for display & persistence.
// Dedup key is ``kind:document_type:id`` so a folder and a
// doc with the same integer id never collapse into one
// entry. The ``kind`` field is forwarded to the backend
// so the persisted ``mentioned-documents`` content part
// can render the correct chip type on reload.
const allMentionedDocs: MentionedDocumentInfo[] = [];
const seenDocKeys = new Set<string>();
for (const doc of mentionedDocuments) {
const key = `${doc.document_type}:${doc.id}`;
const key = `${doc.kind}:${doc.document_type}:${doc.id}`;
if (seenDocKeys.has(key)) continue;
seenDocKeys.add(key);
allMentionedDocs.push({ id: doc.id, title: doc.title, document_type: doc.document_type });
allMentionedDocs.push({
id: doc.id,
title: doc.title,
document_type: doc.document_type,
kind: doc.kind,
});
}
if (allMentionedDocs.length > 0) {
@ -986,9 +1001,10 @@ export default function NewChatPage() {
// Get mentioned document IDs for context (separate fields for backend)
const hasDocumentIds = mentionedDocumentIds.document_ids.length > 0;
const hasSurfsenseDocIds = mentionedDocumentIds.surfsense_doc_ids.length > 0;
const hasFolderIds = mentionedDocumentIds.folder_ids.length > 0;
// Clear mentioned documents after capturing them
if (hasDocumentIds || hasSurfsenseDocIds) {
if (hasDocumentIds || hasSurfsenseDocIds || hasFolderIds) {
setMentionedDocuments([]);
}
@ -1013,7 +1029,11 @@ export default function NewChatPage() {
mentioned_surfsense_doc_ids: hasSurfsenseDocIds
? mentionedDocumentIds.surfsense_doc_ids
: undefined,
// Full mention metadata so the BE can embed a
mentioned_folder_ids: hasFolderIds
? mentionedDocumentIds.folder_ids
: undefined,
// Full mention metadata (docs + folders, with
// ``kind`` discriminator) so the BE can embed a
// ``mentioned-documents`` ContentPart on the
// persisted user message (replaces the old FE-side
// injection in ``persistUserTurn``).
@ -1023,6 +1043,7 @@ export default function NewChatPage() {
id: d.id,
title: d.title,
document_type: d.document_type,
kind: d.kind,
}))
: undefined,
disabled_tools: disabledTools.length > 0 ? disabledTools : undefined,
@ -1855,6 +1876,23 @@ export default function NewChatPage() {
const selection = await getAgentFilesystemSelection(searchSpaceId, {
localFilesystemEnabled,
});
// Partition the source mentions back into doc/surfsense_doc/folder
// id buckets so the regenerate route can pass them to
// ``stream_new_chat`` and the priority middleware sees the
// same ``[USER-MENTIONED]`` priority entries the original
// turn did. Without this partition the regenerate flow
// silently dropped the agent's mention awareness — same
// architectural bug we fixed on the new-chat path.
const regenerateSurfsenseDocIds = sourceMentionedDocs
.filter((d) => d.kind === "doc" && d.document_type === "SURFSENSE_DOCS")
.map((d) => d.id);
const regenerateDocIds = sourceMentionedDocs
.filter((d) => d.kind === "doc" && d.document_type !== "SURFSENSE_DOCS")
.map((d) => d.id);
const regenerateFolderIds = sourceMentionedDocs
.filter((d) => d.kind === "folder")
.map((d) => d.id);
const requestBody: Record<string, unknown> = {
search_space_id: searchSpaceId,
user_query: newUserQuery,
@ -1862,6 +1900,12 @@ export default function NewChatPage() {
filesystem_mode: selection.filesystem_mode,
client_platform: selection.client_platform,
local_filesystem_mounts: selection.local_filesystem_mounts,
mentioned_document_ids:
regenerateDocIds.length > 0 ? regenerateDocIds : undefined,
mentioned_surfsense_doc_ids:
regenerateSurfsenseDocIds.length > 0 ? regenerateSurfsenseDocIds : undefined,
mentioned_folder_ids:
regenerateFolderIds.length > 0 ? regenerateFolderIds : undefined,
// Full mention metadata for the regenerate-specific
// source list. Only meaningful for edit (the BE only
// re-persists a user row when ``user_query`` is set);
@ -1872,6 +1916,7 @@ export default function NewChatPage() {
id: d.id,
title: d.title,
document_type: d.document_type,
kind: d.kind,
}))
: undefined,
};

View file

@ -4,45 +4,108 @@ import { atom } from "jotai";
import type { Document } from "@/contracts/types/document.types";
/**
* Atom to store the full document objects mentioned via @-mention chips
* in the current chat composer. This persists across component remounts.
* Sentinel ``document_type`` used for folder mention chips so the
* dedup key (`kind:document_type:id`) never collides a document with a
* folder that happens to share an integer id.
*/
export const mentionedDocumentsAtom = atom<Pick<Document, "id" | "title" | "document_type">[]>([]);
export const FOLDER_MENTION_DOCUMENT_TYPE = "FOLDER";
/**
* Derived read-only atom that maps deduplicated mentioned docs
* into backend payload fields.
*/
export const mentionedDocumentIdsAtom = atom((get) => {
const allDocs = get(mentionedDocumentsAtom);
const seen = new Set<string>();
const deduped = allDocs.filter((d) => {
const key = `${d.document_type}:${d.id}`;
if (seen.has(key)) return false;
seen.add(key);
return true;
});
return {
surfsense_doc_ids: deduped
.filter((doc) => doc.document_type === "SURFSENSE_DOCS")
.map((doc) => doc.id),
document_ids: deduped
.filter((doc) => doc.document_type !== "SURFSENSE_DOCS")
.map((doc) => doc.id),
};
});
/**
* Simplified document info for display purposes
* Display metadata for a single ``@``-mention chip.
*
* The ``kind`` discriminator identifies whether the chip is a
* knowledge-base document or a knowledge-base folder. Folders carry
* the sentinel ``document_type === FOLDER_MENTION_DOCUMENT_TYPE`` so
* the editor, picker, and persisted ``mentioned-documents`` content
* part all stay aligned with the backend Pydantic schema.
*/
export interface MentionedDocumentInfo {
id: number;
title: string;
document_type: string;
kind: "doc" | "folder";
}
/**
* Atom to store mentioned documents per message ID.
* Backwards-compatible doc-only chip shape for legacy callers that
* haven't migrated to the discriminated union yet. Keep narrow so
* accidental new callers fail typecheck and route through the
* discriminated type instead.
*/
type LegacyDocMention = Pick<Document, "id" | "title" | "document_type">;
/**
* Normalize an arbitrary chip-like input into the discriminated
* ``MentionedDocumentInfo`` shape. Existing call sites that only have
* ``{id, title, document_type}`` flow through here so they don't have
* to thread ``kind`` everywhere the helper defaults to ``"doc"`` and
* rewrites the document type for folders.
*/
export function toMentionedDocumentInfo(
input: LegacyDocMention | MentionedDocumentInfo
): MentionedDocumentInfo {
if ("kind" in input && (input.kind === "doc" || input.kind === "folder")) {
return input;
}
return {
id: input.id,
title: input.title,
document_type: input.document_type,
kind: "doc",
};
}
/**
* Build a folder-mention chip from a folder row (id + name).
*/
export function makeFolderMention(input: { id: number; name: string }): MentionedDocumentInfo {
return {
id: input.id,
title: input.name,
document_type: FOLDER_MENTION_DOCUMENT_TYPE,
kind: "folder",
};
}
/**
* Atom to store the full mention objects (documents + folders) attached
* via @-mention chips in the current chat composer. Persists across
* component remounts.
*/
export const mentionedDocumentsAtom = atom<MentionedDocumentInfo[]>([]);
/**
* Derived read-only atom that maps deduplicated mention chips into
* backend payload fields. Doc chips split by ``document_type`` exactly
* like before; folder chips are projected into a separate
* ``folder_ids`` bucket so the route can forward
* ``mentioned_folder_ids`` to the agent without the priority middleware
* conflating them with hybrid-search ids.
*/
export const mentionedDocumentIdsAtom = atom((get) => {
const allMentions = get(mentionedDocumentsAtom);
const seen = new Set<string>();
const deduped = allMentions.filter((m) => {
const key = `${m.kind}:${m.document_type}:${m.id}`;
if (seen.has(key)) return false;
seen.add(key);
return true;
});
const docs = deduped.filter((m) => m.kind === "doc");
const folders = deduped.filter((m) => m.kind === "folder");
return {
surfsense_doc_ids: docs
.filter((doc) => doc.document_type === "SURFSENSE_DOCS")
.map((doc) => doc.id),
document_ids: docs
.filter((doc) => doc.document_type !== "SURFSENSE_DOCS")
.map((doc) => doc.id),
folder_ids: folders.map((f) => f.id),
};
});
/**
* Atom to store mentioned chips per message ID.
* This allows displaying which documents were mentioned with each user message.
*/
export const messageDocumentsMapAtom = atom<Record<string, MentionedDocumentInfo[]>>({});

View file

@ -1,5 +1,6 @@
"use client";
import { Folder as FolderIcon } from "lucide-react";
import type { PlateElementProps } from "platejs/react";
import {
createPlatePlugin,
@ -9,23 +10,51 @@ import {
usePlateEditor,
} from "platejs/react";
import { type FC, forwardRef, useCallback, useImperativeHandle, useMemo, useRef } from "react";
import { FOLDER_MENTION_DOCUMENT_TYPE } from "@/atoms/chat/mentioned-documents.atom";
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import type { Document } from "@/contracts/types/document.types";
import { getMentionDocKey } from "@/lib/chat/mention-doc-key";
import { cn } from "@/lib/utils";
export type MentionKind = "doc" | "folder";
export interface MentionedDocument {
id: number;
title: string;
document_type?: string;
kind: MentionKind;
}
/**
* Input shape for inserting a chip. ``kind`` defaults to ``"doc"``
* when omitted so legacy callers don't have to thread the
* discriminator. Folder callers pass ``kind: "folder"`` and the
* folder ``id`` and ``title``; ``document_type`` defaults to
* ``FOLDER_MENTION_DOCUMENT_TYPE`` inside ``insertMentionChip`` so the
* dedup key (`kind:document_type:id`) never collides with a doc chip
* that happens to share an id.
*/
export type MentionChipInput = {
id: number;
title: string;
document_type?: string;
kind?: MentionKind;
};
export interface InlineMentionEditorRef {
focus: () => void;
clear: () => void;
setText: (text: string) => void;
getText: () => string;
getMentionedDocuments: () => MentionedDocument[];
insertMentionChip: (
mention: MentionChipInput,
options?: { removeTriggerText?: boolean }
) => void;
/**
* @deprecated Use ``insertMentionChip``. Kept for one transition
* cycle so we don't break ad-hoc callers; prefer the new name.
*/
insertDocumentChip: (
doc: Pick<Document, "id" | "title" | "document_type">,
options?: { removeTriggerText?: boolean }
@ -61,6 +90,13 @@ type MentionElementNode = {
id: number;
title: string;
document_type?: string;
/**
* Discriminator added so a folder chip and a doc chip with the
* same id round-trip cleanly through ``getMentionedDocuments``
* and the persisted ``mentioned-documents`` content part.
* Defaults to ``"doc"`` for nodes that predate this field.
*/
kind?: MentionKind;
statusLabel?: string | null;
statusKind?: MentionStatusKind;
children: [{ text: "" }];
@ -90,11 +126,17 @@ const MentionElement: FC<PlateElementProps<MentionElementNode>> = ({
? "text-emerald-700"
: "text-amber-700";
const isFolder = element.kind === "folder";
return (
<span {...attributes} className="inline-flex align-middle">
<span contentEditable={false} className={`${MENTION_CHIP_CLASSNAME} cursor-default`}>
<span className={MENTION_CHIP_ICON_CLASSNAME}>
{getConnectorIcon(element.document_type ?? "UNKNOWN", "h-3 w-3")}
{isFolder ? (
<FolderIcon className="h-3 w-3" />
) : (
getConnectorIcon(element.document_type ?? "UNKNOWN", "h-3 w-3")
)}
</span>
<span className={MENTION_CHIP_TITLE_CLASSNAME} title={element.title}>
{element.title}
@ -153,10 +195,12 @@ function getMentionedDocuments(value: ComposerValue): MentionedDocument[] {
for (const block of value) {
for (const node of block.children) {
if (!isMentionNode(node)) continue;
const kind: MentionKind = node.kind ?? "doc";
const doc: MentionedDocument = {
id: node.id,
title: node.title,
document_type: node.document_type,
kind,
};
map.set(getMentionDocKey(doc), doc);
}
@ -311,21 +355,23 @@ export const InlineMentionEditor = forwardRef<InlineMentionEditorRef, InlineMent
[editor, emitState]
);
const insertDocumentChip = useCallback(
(
doc: Pick<Document, "id" | "title" | "document_type">,
options?: { removeTriggerText?: boolean }
) => {
if (typeof doc.id !== "number" || typeof doc.title !== "string") return;
const insertMentionChip = useCallback(
(mention: MentionChipInput, options?: { removeTriggerText?: boolean }) => {
if (typeof mention.id !== "number" || typeof mention.title !== "string") return;
const removeTriggerText = options?.removeTriggerText ?? true;
const current = getCurrentValue();
const selection = editor.selection;
const kind: MentionKind = mention.kind ?? "doc";
const document_type =
mention.document_type ??
(kind === "folder" ? FOLDER_MENTION_DOCUMENT_TYPE : undefined);
const mentionNode: MentionElementNode = {
type: MENTION_TYPE,
id: doc.id,
title: doc.title,
document_type: doc.document_type,
id: mention.id,
title: mention.title,
document_type,
kind,
children: [{ text: "" }],
};
@ -385,6 +431,19 @@ export const InlineMentionEditor = forwardRef<InlineMentionEditorRef, InlineMent
[editor.selection, focusAtEnd, getCurrentValue, setValue]
);
// Backwards-compatible shim — pre-folder callers pass a doc-only
// payload; we route them through ``insertMentionChip`` with
// ``kind: "doc"``.
const insertDocumentChip = useCallback(
(
doc: Pick<Document, "id" | "title" | "document_type">,
options?: { removeTriggerText?: boolean }
) => {
insertMentionChip({ ...doc, kind: "doc" }, options);
},
[insertMentionChip]
);
const removeDocumentChip = useCallback(
(docId: number, docType?: string) => {
const current = getCurrentValue();
@ -460,6 +519,7 @@ export const InlineMentionEditor = forwardRef<InlineMentionEditorRef, InlineMent
setText,
getText,
getMentionedDocuments: getMentionedDocs,
insertMentionChip,
insertDocumentChip,
removeDocumentChip,
setDocumentChipStatus,
@ -468,6 +528,7 @@ export const InlineMentionEditor = forwardRef<InlineMentionEditorRef, InlineMent
clear,
getMentionedDocs,
getText,
insertMentionChip,
insertDocumentChip,
removeDocumentChip,
setDocumentChipStatus,

View file

@ -8,7 +8,7 @@ import {
useIsMarkdownCodeBlock,
} from "@assistant-ui/react-markdown";
import { useSetAtom } from "jotai";
import { ExternalLinkIcon } from "lucide-react";
import { ExternalLinkIcon, FileIcon, Folder as FolderIcon } from "lucide-react";
import dynamic from "next/dynamic";
import { useParams } from "next/navigation";
import { useTheme } from "next-themes";
@ -18,6 +18,7 @@ import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image";
import { MentionChip } from "@/components/assistant-ui/mention-chip";
import "katex/dist/katex.min.css";
import { toast } from "sonner";
import { processChildrenWithCitations } from "@/components/citations/citation-renderer";
@ -33,6 +34,7 @@ import {
import { useElectronAPI } from "@/hooks/use-platform";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { type CitationUrlMap, preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
import { getVirtualPathDisplay } from "@/lib/chat/virtual-path-display";
import { cn } from "@/lib/utils";
function MarkdownCodeBlockSkeleton() {
@ -219,59 +221,71 @@ function FilePathLink({ path, className }: { path: string; className?: string })
? parsedSearchSpaceId
: undefined;
return (
<button
type="button"
className={cn(
"cursor-pointer font-mono text-[0.9em] font-medium text-primary underline underline-offset-4 transition-colors hover:text-primary/80",
className
)}
onClick={(event) => {
event.preventDefault();
event.stopPropagation();
void (async () => {
if (electronAPI) {
let resolvedLocalPath = path;
if (electronAPI.getAgentFilesystemMounts) {
try {
const mounts = (await electronAPI.getAgentFilesystemMounts(
resolvedSearchSpaceId
)) as AgentFilesystemMount[];
resolvedLocalPath = normalizeLocalVirtualPathForEditor(path, mounts);
} catch {
// Fall back to the raw path if mount lookup fails.
}
}
openEditorPanel({
kind: "local_file",
localFilePath: resolvedLocalPath,
title: resolvedLocalPath.split("/").pop() || resolvedLocalPath,
searchSpaceId: resolvedSearchSpaceId,
});
return;
}
const { displayName, isFolder } = getVirtualPathDisplay(path);
const icon = isFolder ? (
<FolderIcon className="size-3.5" />
) : (
<FileIcon className="size-3.5" />
);
if (!resolvedSearchSpaceId || !path.startsWith("/documents/")) return;
try {
const doc = await documentsApiService.getDocumentByVirtualPath({
search_space_id: resolvedSearchSpaceId,
virtual_path: path,
});
openEditorPanel({
kind: "document",
documentId: doc.id,
searchSpaceId: resolvedSearchSpaceId,
title: doc.title,
});
} catch {
toast.error("Document not found in knowledge base.");
const handleClick = useCallback(
(event: React.MouseEvent<HTMLButtonElement>) => {
event.preventDefault();
event.stopPropagation();
void (async () => {
if (electronAPI) {
let resolvedLocalPath = path;
if (electronAPI.getAgentFilesystemMounts) {
try {
const mounts = (await electronAPI.getAgentFilesystemMounts(
resolvedSearchSpaceId
)) as AgentFilesystemMount[];
resolvedLocalPath = normalizeLocalVirtualPathForEditor(path, mounts);
} catch {
// Fall back to the raw path if mount lookup fails.
}
}
})();
}}
title="Open in editor panel"
>
{path}
</button>
openEditorPanel({
kind: "local_file",
localFilePath: resolvedLocalPath,
title: resolvedLocalPath.split("/").pop() || resolvedLocalPath,
searchSpaceId: resolvedSearchSpaceId,
});
return;
}
if (!resolvedSearchSpaceId || !path.startsWith("/documents/")) return;
try {
const doc = await documentsApiService.getDocumentByVirtualPath({
search_space_id: resolvedSearchSpaceId,
virtual_path: path,
});
openEditorPanel({
kind: "document",
documentId: doc.id,
searchSpaceId: resolvedSearchSpaceId,
title: doc.title,
});
} catch {
toast.error("Document not found in knowledge base.");
}
})();
},
[electronAPI, openEditorPanel, path, resolvedSearchSpaceId]
);
// Folders cannot open in the editor panel — keep them as visual chips.
const onClick = isFolder ? undefined : handleClick;
return (
<MentionChip
icon={icon}
label={displayName || path}
tooltip={path}
onClick={onClick}
ariaLabel={isFolder ? `Folder ${displayName}` : `Open ${displayName}`}
className={className}
/>
);
}

View file

@ -0,0 +1,92 @@
"use client";
import type { MouseEventHandler, ReactNode } from "react";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
/**
* A single, minimal chip-button used in two places:
*
* 1. User-message mention chips (rendered for every `@`-mention the user
* inserted in the composer).
* 2. AI-answer file/folder paths (rendered when the assistant emits
* `/documents/.../file.xml` or `/<mount>/.../file.ext`).
*
* Both contexts want the same visual language: a compact, button-styled
* chip with an icon, a truncated label, and an optional tooltip. Sharing
* one component keeps the chat surface visually coherent and means a UX
* tweak (radius, hover, icon size) lands in both places at once.
*
* Styling rules (per shadcn skill):
* - Semantic tokens only (`border`, `bg-background`, `bg-accent`,
* `text-foreground`, `text-muted-foreground`). No raw colors.
* - Layout via `gap-*`, never `space-x-*`.
* - `cn()` for conditional classes.
* - No manual `z-index` the tooltip handles its own stacking.
*/
export interface MentionChipProps {
/**
* Visual prefix. Keep this small (e.g. `size-3.5`); the chip controls
* its own height and oversized icons will push the label out of place.
*/
icon: ReactNode;
/** Label shown inside the chip; truncated with `…` past the max width. */
label: string;
/**
* Full title or path shown on hover. Omit to suppress the tooltip
* entirely (e.g. when the label already conveys the full identity).
*/
tooltip?: ReactNode;
/**
* When provided, the chip behaves like a button (focusable, hover
* effect, pointer cursor). Omit for a purely decorative chip.
*/
onClick?: MouseEventHandler<HTMLButtonElement>;
disabled?: boolean;
className?: string;
/** Optional override for the accessible name; defaults to `label`. */
ariaLabel?: string;
}
export function MentionChip({
icon,
label,
tooltip,
onClick,
disabled,
className,
ariaLabel,
}: MentionChipProps) {
const isInteractive = Boolean(onClick) && !disabled;
const chip = (
<button
type="button"
onClick={onClick}
disabled={disabled}
aria-label={ariaLabel ?? label}
className={cn(
"inline-flex max-w-[220px] items-center gap-1.5 rounded-md border bg-background px-2 py-0.5 align-middle text-xs font-medium text-foreground leading-5 transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring",
isInteractive
? "cursor-pointer hover:bg-accent hover:text-accent-foreground"
: "cursor-default",
disabled && "opacity-60",
className
)}
>
<span className="inline-flex shrink-0 text-muted-foreground">{icon}</span>
<span className="truncate">{label}</span>
</button>
);
if (!tooltip) return chip;
return (
<Tooltip>
<TooltipTrigger asChild>{chip}</TooltipTrigger>
<TooltipContent side="top" className="max-w-xs break-all">
{tooltip}
</TooltipContent>
</Tooltip>
);
}

View file

@ -36,7 +36,10 @@ import {
} from "@/atoms/agent-tools/agent-tools.atoms";
import { chatSessionStateAtom } from "@/atoms/chat/chat-session-state.atom";
import { currentThreadAtom } from "@/atoms/chat/current-thread.atom";
import { mentionedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom";
import {
type MentionedDocumentInfo,
mentionedDocumentsAtom,
} from "@/atoms/chat/mentioned-documents.atom";
import { pendingUserImageDataUrlsAtom } from "@/atoms/chat/pending-user-images.atom";
import {
clearPremiumAlertForThreadAtom,
@ -87,7 +90,6 @@ import {
getToolDisplayName,
getToolIcon,
} from "@/contracts/enums/toolIcons";
import type { Document } from "@/contracts/types/document.types";
import { useBatchCommentsPreload } from "@/hooks/use-comments";
import { useCommentsSync } from "@/hooks/use-comments-sync";
import { useMediaQuery } from "@/hooks/use-media-query";
@ -377,9 +379,7 @@ const Composer: FC = () => {
const [mentionQuery, setMentionQuery] = useState("");
const [actionQuery, setActionQuery] = useState("");
const editorRef = useRef<InlineMentionEditorRef>(null);
const prevMentionedDocsRef = useRef<
Map<string, Pick<Document, "id" | "title" | "document_type">>
>(new Map());
const prevMentionedDocsRef = useRef<Map<string, MentionedDocumentInfo>>(new Map());
const documentPickerRef = useRef<DocumentMentionPickerRef>(null);
const promptPickerRef = useRef<PromptPickerRef>(null);
const { search_space_id, chat_id } = useParams();
@ -622,20 +622,20 @@ const Composer: FC = () => {
);
const handleDocumentsMention = useCallback(
(documents: Pick<Document, "id" | "title" | "document_type">[]) => {
(mentions: MentionedDocumentInfo[]) => {
const editorMentionedDocs = editorRef.current?.getMentionedDocuments() ?? [];
const editorDocKeys = new Set(editorMentionedDocs.map((doc) => getMentionDocKey(doc)));
for (const doc of documents) {
const key = getMentionDocKey(doc);
for (const mention of mentions) {
const key = getMentionDocKey(mention);
if (editorDocKeys.has(key)) continue;
editorRef.current?.insertDocumentChip(doc);
editorRef.current?.insertMentionChip(mention);
}
setMentionedDocuments((prev) => {
const existingKeySet = new Set(prev.map((d) => getMentionDocKey(d)));
const uniqueNewDocs = documents.filter((doc) => !existingKeySet.has(getMentionDocKey(doc)));
return [...prev, ...uniqueNewDocs];
const uniqueNew = mentions.filter((m) => !existingKeySet.has(getMentionDocKey(m)));
return [...prev, ...uniqueNew];
});
setMentionQuery("");
@ -657,7 +657,7 @@ const Composer: FC = () => {
for (const [key, doc] of nextDocsMap) {
if (prevDocsMap.has(key) || editorKeys.has(key)) continue;
editor.insertDocumentChip(doc, { removeTriggerText: false });
editor.insertMentionChip(doc, { removeTriggerText: false });
}
for (const [key, doc] of prevDocsMap) {

View file

@ -5,12 +5,16 @@ import {
useAuiState,
useMessagePartText,
} from "@assistant-ui/react";
import { useAtomValue } from "jotai";
import { CheckIcon, CopyIcon, Pencil } from "lucide-react";
import { useAtomValue, useSetAtom } from "jotai";
import { CheckIcon, CopyIcon, Folder as FolderIcon, Pencil } from "lucide-react";
import Image from "next/image";
import { type FC, useState } from "react";
import { useParams } from "next/navigation";
import { type FC, useCallback, useState } from "react";
import { toast } from "sonner";
import { currentThreadAtom } from "@/atoms/chat/current-thread.atom";
import { messageDocumentsMapAtom } from "@/atoms/chat/mentioned-documents.atom";
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
import { MentionChip } from "@/components/assistant-ui/mention-chip";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import { getMentionDocKey } from "@/lib/chat/mention-doc-key";
@ -61,27 +65,61 @@ const UserTextPart: FC = () => {
const text = (part as { text?: string }).text ?? "";
const messageDocumentsMap = useAtomValue(messageDocumentsMapAtom);
const mentionedDocs = (messageId ? messageDocumentsMap[messageId] : undefined) ?? [];
const openEditorPanel = useSetAtom(openEditorPanelAtom);
const params = useParams();
const searchSpaceIdParam = params?.search_space_id;
const parsedSearchSpaceId = Array.isArray(searchSpaceIdParam)
? Number(searchSpaceIdParam[0])
: Number(searchSpaceIdParam);
const resolvedSearchSpaceId = Number.isFinite(parsedSearchSpaceId)
? parsedSearchSpaceId
: undefined;
const handleOpenDoc = useCallback(
(docId: number, title: string) => {
if (!resolvedSearchSpaceId) {
toast.error("Cannot open document outside a search space.");
return;
}
openEditorPanel({
kind: "document",
documentId: docId,
searchSpaceId: resolvedSearchSpaceId,
title,
});
},
[openEditorPanel, resolvedSearchSpaceId]
);
const segments = parseMentionSegments(text, mentionedDocs);
return (
<p style={{ whiteSpace: "pre-line" }} className="break-words">
{segments.map((segment) =>
segment.type === "text" ? (
<span key={`txt-${segment.start}`}>{segment.value}</span>
<p style={{ whiteSpace: "pre-line" }} className="wrap-break-word">
{segments.map((segment) => {
if (segment.type === "text") {
return <span key={`txt-${segment.start}`}>{segment.value}</span>;
}
const isFolder = segment.doc.kind === "folder";
const icon = isFolder ? (
<FolderIcon className="size-3.5" />
) : (
<span
getConnectorIcon(segment.doc.document_type ?? "UNKNOWN", "size-3.5")
);
return (
<MentionChip
key={`mention-${getMentionDocKey(segment.doc)}-${segment.start}`}
className="inline-flex items-center gap-1 mx-0.5 px-1 py-0.5 rounded bg-primary/10 text-xs font-bold text-primary/60 select-none align-middle leading-none"
title={segment.doc.title}
>
<span className="flex items-center text-muted-foreground">
{getConnectorIcon(segment.doc.document_type ?? "UNKNOWN", "h-3 w-3")}
</span>
<span className="max-w-[120px] truncate">{segment.doc.title}</span>
</span>
)
)}
icon={icon}
label={segment.doc.title}
tooltip={isFolder ? `Folder: ${segment.doc.title}` : segment.doc.title}
onClick={
isFolder
? undefined
: () => handleOpenDoc(segment.doc.id, segment.doc.title)
}
className="mx-0.5"
/>
);
})}
</p>
);
};

View file

@ -176,34 +176,25 @@ export function FolderTreeView({
}, [folders, docsByFolder, foldersByParent, effectiveActiveTypes, searchQuery]);
const folderSelectionStates = useMemo(() => {
// One folder = one chip. The checkbox now reflects whether the
// folder itself is mentioned, not whether every nested doc is —
// that reverses the old subtree-fanout semantics in
// ``DocumentsSidebar.handleToggleFolderSelect``. We keep the
// ``"all" | "some" | "none"`` tri-state on the type so the
// existing ``FolderNode`` UI (which renders an indeterminate
// glyph for ``"some"``) stays compatible, but only ``"all"``
// and ``"none"`` are used in practice.
const states: Record<number, FolderSelectionState> = {};
const isSelectable = (d: DocumentNodeDoc) =>
d.status?.state !== "pending" && d.status?.state !== "processing";
function compute(folderId: number): { selected: number; total: number } {
const directDocs = (docsByFolder[folderId] ?? []).filter(isSelectable);
let selected = directDocs.filter((d) => mentionedDocKeys.has(getMentionDocKey(d))).length;
let total = directDocs.length;
for (const child of foldersByParent[folderId] ?? []) {
const sub = compute(child.id);
selected += sub.selected;
total += sub.total;
}
if (total === 0) states[folderId] = "none";
else if (selected === total) states[folderId] = "all";
else if (selected > 0) states[folderId] = "some";
else states[folderId] = "none";
return { selected, total };
}
for (const f of folders) {
if (states[f.id] === undefined) compute(f.id);
const folderMentionKey = getMentionDocKey({
id: f.id,
document_type: "FOLDER",
kind: "folder",
});
states[f.id] = mentionedDocKeys.has(folderMentionKey) ? "all" : "none";
}
return states;
}, [folders, docsByFolder, foldersByParent, mentionedDocKeys]);
}, [folders, mentionedDocKeys]);
const folderMap = useMemo(() => {
const map: Record<number, FolderDisplay> = {};

View file

@ -24,7 +24,10 @@ import type React from "react";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import { agentFlagsAtom } from "@/atoms/agent/agent-flags-query.atom";
import { mentionedDocumentsAtom } from "@/atoms/chat/mentioned-documents.atom";
import {
makeFolderMention,
mentionedDocumentsAtom,
} from "@/atoms/chat/mentioned-documents.atom";
import { connectorDialogOpenAtom } from "@/atoms/connector-dialog/connector-dialog.atoms";
import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms";
import { deleteDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
@ -881,7 +884,7 @@ function AuthenticatedDocumentsSidebarBase({
const handleToggleChatMention = useCallback(
(doc: { id: number; title: string; document_type: string }, isMentioned: boolean) => {
const key = getMentionDocKey(doc);
const key = getMentionDocKey({ ...doc, kind: "doc" });
if (isMentioned) {
setSidebarDocs((prev) => prev.filter((d) => getMentionDocKey(d) !== key));
} else {
@ -889,7 +892,12 @@ function AuthenticatedDocumentsSidebarBase({
if (prev.some((d) => getMentionDocKey(d) === key)) return prev;
return [
...prev,
{ id: doc.id, title: doc.title, document_type: doc.document_type as DocumentTypeEnum },
{
id: doc.id,
title: doc.title,
document_type: doc.document_type as DocumentTypeEnum,
kind: "doc",
},
];
});
}
@ -899,40 +907,29 @@ function AuthenticatedDocumentsSidebarBase({
const handleToggleFolderSelect = useCallback(
(folderId: number, selectAll: boolean) => {
function collectSubtreeDocs(parentId: number): DocumentNodeDoc[] {
const directDocs = (treeDocuments ?? []).filter(
(d) =>
d.folderId === parentId &&
d.status?.state !== "pending" &&
d.status?.state !== "processing" &&
d.status?.state !== "failed"
);
const childFolders = foldersByParent[String(parentId)] ?? [];
const descendantDocs = childFolders.flatMap((cf) => collectSubtreeDocs(cf.id));
return [...directDocs, ...descendantDocs];
}
const subtreeDocs = collectSubtreeDocs(folderId);
if (subtreeDocs.length === 0) return;
// One folder click = one folder-mention chip. The agent
// resolves the chip to its virtual path
// (``/documents/MyFolder/``) and walks it itself with
// ``ls`` / ``find_documents``. We deliberately don't
// fan out to per-doc chips anymore — the previous
// behaviour created N chips for one click and dropped
// nested folders entirely once selected, which the
// agent had no way to recover.
const folder = treeFolders.find((f) => f.id === folderId);
if (!folder) return;
const chip = makeFolderMention({ id: folder.id, name: folder.name });
const chipKey = getMentionDocKey(chip);
if (selectAll) {
setSidebarDocs((prev) => {
const existingDocKeys = new Set(prev.map((d) => getMentionDocKey(d)));
const newDocs = subtreeDocs
.filter((d) => !existingDocKeys.has(getMentionDocKey(d)))
.map((d) => ({
id: d.id,
title: d.title,
document_type: d.document_type as DocumentTypeEnum,
}));
return newDocs.length > 0 ? [...prev, ...newDocs] : prev;
const exists = prev.some((d) => getMentionDocKey(d) === chipKey);
return exists ? prev : [...prev, chip];
});
} else {
const keysToRemove = new Set(subtreeDocs.map((d) => getMentionDocKey(d)));
setSidebarDocs((prev) => prev.filter((d) => !keysToRemove.has(getMentionDocKey(d))));
setSidebarDocs((prev) => prev.filter((d) => getMentionDocKey(d) !== chipKey));
}
},
[treeDocuments, foldersByParent, setSidebarDocs]
[treeFolders, setSidebarDocs]
);
const searchFilteredDocuments = useMemo(() => {
@ -1604,7 +1601,7 @@ function AnonymousDocumentsSidebar({
const handleToggleChatMention = useCallback(
(doc: { id: number; title: string; document_type: string }, isMentioned: boolean) => {
const key = getMentionDocKey(doc);
const key = getMentionDocKey({ ...doc, kind: "doc" });
if (isMentioned) {
setSidebarDocs((prev) => prev.filter((d) => getMentionDocKey(d) !== key));
} else {
@ -1612,7 +1609,12 @@ function AnonymousDocumentsSidebar({
if (prev.some((d) => getMentionDocKey(d) === key)) return prev;
return [
...prev,
{ id: doc.id, title: doc.title, document_type: doc.document_type as DocumentTypeEnum },
{
id: doc.id,
title: doc.title,
document_type: doc.document_type as DocumentTypeEnum,
kind: "doc",
},
];
});
}

View file

@ -1,6 +1,8 @@
"use client";
import { useQuery as useZeroQuery } from "@rocicorp/zero/react";
import { keepPreviousData, useQuery } from "@tanstack/react-query";
import { Folder as FolderIcon } from "lucide-react";
import {
forwardRef,
useCallback,
@ -11,11 +13,17 @@ import {
useRef,
useState,
} from "react";
import {
FOLDER_MENTION_DOCUMENT_TYPE,
type MentionedDocumentInfo,
} from "@/atoms/chat/mentioned-documents.atom";
import { Skeleton } from "@/components/ui/skeleton";
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
import type { Document, SearchDocumentTitlesResponse } from "@/contracts/types/document.types";
import { documentsApiService } from "@/lib/apis/documents-api.service";
import { getMentionDocKey } from "@/lib/chat/mention-doc-key";
import { cn } from "@/lib/utils";
import { queries } from "@/zero/queries";
export interface DocumentMentionPickerRef {
selectHighlighted: () => void;
@ -25,9 +33,9 @@ export interface DocumentMentionPickerRef {
interface DocumentMentionPickerProps {
searchSpaceId: number;
onSelectionChange: (documents: Pick<Document, "id" | "title" | "document_type">[]) => void;
onSelectionChange: (mentions: MentionedDocumentInfo[]) => void;
onDone: () => void;
initialSelectedDocuments?: Pick<Document, "id" | "title" | "document_type">[];
initialSelectedDocuments?: MentionedDocumentInfo[];
externalSearch?: string;
}
@ -89,6 +97,11 @@ export const DocumentMentionPicker = forwardRef<
const [hasMore, setHasMore] = useState(false);
const [isLoadingMore, setIsLoadingMore] = useState(false);
// Folders for this search space — pulled from Zero so the picker
// stays consistent with the documents sidebar (same source of
// truth, automatic updates on rename/delete).
const [zeroFolders] = useZeroQuery(queries.folders.bySpace({ searchSpaceId }));
/**
* Search Strategy:
* - Single character (length === 1): Client-side filtering for instant results
@ -267,21 +280,49 @@ export const DocumentMentionPicker = forwardRef<
[actualDocuments]
);
// Track selected documents with composite key (document_type:id) to prevent cross-type ID collisions
// Folder mention candidates filtered by the current search term.
// Single-char and server-search both use the same client filter
// — folder counts in a workspace are tiny compared to docs, so we
// don't need a paged endpoint. Empty search shows all folders.
const folderMentions: MentionedDocumentInfo[] = useMemo(() => {
const all = (zeroFolders ?? []).map((f) => ({
id: f.id,
title: f.name,
document_type: FOLDER_MENTION_DOCUMENT_TYPE,
kind: "folder" as const,
}));
if (!shouldSearch) return all;
const needle = (isSingleCharSearch ? deferredSearch : debouncedSearch).trim().toLowerCase();
if (!needle) return all;
return all.filter((f) => f.title.toLowerCase().includes(needle));
}, [zeroFolders, debouncedSearch, deferredSearch, isSingleCharSearch, shouldSearch]);
// Doc-shape entries reuse their ``document_type`` discriminator;
// folder entries lift the existing kind-aware key so the same
// matchers used by the chip atom apply unchanged.
const selectedKeys = useMemo(
() => new Set(initialSelectedDocuments.map((d) => `${d.document_type}:${d.id}`)),
() =>
new Set(initialSelectedDocuments.map((d) => getMentionDocKey(d))),
[initialSelectedDocuments]
);
// Exclude already-selected documents from keyboard navigation
const selectableDocuments = useMemo(
() => actualDocuments.filter((doc) => !selectedKeys.has(`${doc.document_type}:${doc.id}`)),
[actualDocuments, selectedKeys]
);
// Combined navigation order: SurfSense docs -> User docs -> Folders.
// Mirrors the on-screen ordering so keyboard arrows match what the
// user sees.
const selectableMentions = useMemo<MentionedDocumentInfo[]>(() => {
const docs: MentionedDocumentInfo[] = actualDocuments.map((doc) => ({
id: doc.id,
title: doc.title,
document_type: doc.document_type,
kind: "doc" as const,
}));
const ordered = [...docs, ...folderMentions];
return ordered.filter((m) => !selectedKeys.has(getMentionDocKey(m)));
}, [actualDocuments, folderMentions, selectedKeys]);
const handleSelectDocument = useCallback(
(doc: Pick<Document, "id" | "title" | "document_type">) => {
onSelectionChange([...initialSelectedDocuments, doc]);
const handleSelectMention = useCallback(
(mention: MentionedDocumentInfo) => {
onSelectionChange([...initialSelectedDocuments, mention]);
onDone();
},
[initialSelectedDocuments, onSelectionChange, onDone]
@ -338,42 +379,42 @@ export const DocumentMentionPicker = forwardRef<
ref,
() => ({
selectHighlighted: () => {
if (selectableDocuments[highlightedIndex]) {
handleSelectDocument(selectableDocuments[highlightedIndex]);
if (selectableMentions[highlightedIndex]) {
handleSelectMention(selectableMentions[highlightedIndex]);
}
},
moveUp: () => {
shouldScrollRef.current = true;
setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : selectableDocuments.length - 1));
setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : selectableMentions.length - 1));
},
moveDown: () => {
shouldScrollRef.current = true;
setHighlightedIndex((prev) => (prev < selectableDocuments.length - 1 ? prev + 1 : 0));
setHighlightedIndex((prev) => (prev < selectableMentions.length - 1 ? prev + 1 : 0));
},
}),
[selectableDocuments, highlightedIndex, handleSelectDocument]
[selectableMentions, highlightedIndex, handleSelectMention]
);
// Keyboard navigation handler for arrow keys, Enter, and Escape
const handleKeyDown = useCallback(
(e: React.KeyboardEvent) => {
if (selectableDocuments.length === 0) return;
if (selectableMentions.length === 0) return;
switch (e.key) {
case "ArrowDown":
e.preventDefault();
shouldScrollRef.current = true;
setHighlightedIndex((prev) => (prev < selectableDocuments.length - 1 ? prev + 1 : 0));
setHighlightedIndex((prev) => (prev < selectableMentions.length - 1 ? prev + 1 : 0));
break;
case "ArrowUp":
e.preventDefault();
shouldScrollRef.current = true;
setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : selectableDocuments.length - 1));
setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : selectableMentions.length - 1));
break;
case "Enter":
e.preventDefault();
if (selectableDocuments[highlightedIndex]) {
handleSelectDocument(selectableDocuments[highlightedIndex]);
if (selectableMentions[highlightedIndex]) {
handleSelectMention(selectableMentions[highlightedIndex]);
}
break;
case "Escape":
@ -382,7 +423,7 @@ export const DocumentMentionPicker = forwardRef<
break;
}
},
[selectableDocuments, highlightedIndex, handleSelectDocument, onDone]
[selectableMentions, highlightedIndex, handleSelectMention, onDone]
);
return (
@ -420,7 +461,7 @@ export const DocumentMentionPicker = forwardRef<
</div>
))}
</div>
) : actualDocuments.length > 0 ? (
) : actualDocuments.length > 0 || folderMentions.length > 0 ? (
<div className="py-1 px-2">
{/* SurfSense Documentation */}
{surfsenseDocsList.length > 0 && (
@ -429,10 +470,16 @@ export const DocumentMentionPicker = forwardRef<
SurfSense Docs
</div>
{surfsenseDocsList.map((doc) => {
const docKey = `${doc.document_type}:${doc.id}`;
const mention: MentionedDocumentInfo = {
id: doc.id,
title: doc.title,
document_type: doc.document_type,
kind: "doc",
};
const docKey = getMentionDocKey(mention);
const isAlreadySelected = selectedKeys.has(docKey);
const selectableIndex = selectableDocuments.findIndex(
(d) => d.document_type === doc.document_type && d.id === doc.id
const selectableIndex = selectableMentions.findIndex(
(m) => getMentionDocKey(m) === docKey
);
const isHighlighted = !isAlreadySelected && selectableIndex === highlightedIndex;
@ -445,7 +492,7 @@ export const DocumentMentionPicker = forwardRef<
}
}}
type="button"
onClick={() => !isAlreadySelected && handleSelectDocument(doc)}
onClick={() => !isAlreadySelected && handleSelectMention(mention)}
onMouseEnter={() => {
if (!isAlreadySelected && selectableIndex >= 0) {
setHighlightedIndex(selectableIndex);
@ -480,10 +527,16 @@ export const DocumentMentionPicker = forwardRef<
Your Documents
</div>
{userDocsList.map((doc) => {
const docKey = `${doc.document_type}:${doc.id}`;
const mention: MentionedDocumentInfo = {
id: doc.id,
title: doc.title,
document_type: doc.document_type,
kind: "doc",
};
const docKey = getMentionDocKey(mention);
const isAlreadySelected = selectedKeys.has(docKey);
const selectableIndex = selectableDocuments.findIndex(
(d) => d.document_type === doc.document_type && d.id === doc.id
const selectableIndex = selectableMentions.findIndex(
(m) => getMentionDocKey(m) === docKey
);
const isHighlighted = !isAlreadySelected && selectableIndex === highlightedIndex;
@ -496,7 +549,7 @@ export const DocumentMentionPicker = forwardRef<
}
}}
type="button"
onClick={() => !isAlreadySelected && handleSelectDocument(doc)}
onClick={() => !isAlreadySelected && handleSelectMention(mention)}
onMouseEnter={() => {
if (!isAlreadySelected && selectableIndex >= 0) {
setHighlightedIndex(selectableIndex);
@ -521,6 +574,60 @@ export const DocumentMentionPicker = forwardRef<
</>
)}
{/* Folders single source of truth is Zero (same store
that powers the documents sidebar). Selecting a
folder inserts a folder chip whose path the agent
can walk with ``ls`` / ``find_documents``. */}
{folderMentions.length > 0 && (
<>
{(surfsenseDocsList.length > 0 || userDocsList.length > 0) && (
<div className="mx-2 my-4 border-t border-border dark:border-white/5" />
)}
<div className="px-3 py-2 text-xs font-bold text-muted-foreground/55">
Folders
</div>
{folderMentions.map((folder) => {
const folderKey = getMentionDocKey(folder);
const isAlreadySelected = selectedKeys.has(folderKey);
const selectableIndex = selectableMentions.findIndex(
(m) => getMentionDocKey(m) === folderKey
);
const isHighlighted = !isAlreadySelected && selectableIndex === highlightedIndex;
return (
<button
key={folderKey}
ref={(el) => {
if (el && selectableIndex >= 0) {
itemRefs.current.set(selectableIndex, el);
}
}}
type="button"
onClick={() => !isAlreadySelected && handleSelectMention(folder)}
onMouseEnter={() => {
if (!isAlreadySelected && selectableIndex >= 0) {
setHighlightedIndex(selectableIndex);
}
}}
disabled={isAlreadySelected}
className={cn(
"w-full flex items-center gap-2 px-3 py-2 text-left transition-colors rounded-md",
isAlreadySelected ? "opacity-50 cursor-not-allowed" : "cursor-pointer",
isHighlighted && "bg-accent"
)}
>
<span className="shrink-0 text-muted-foreground text-sm">
<FolderIcon className="h-4 w-4" />
</span>
<span className="flex-1 text-sm truncate" title={folder.title}>
{folder.title}
</span>
</button>
);
})}
</>
)}
{/* Pagination loading indicator */}
{isLoadingMore && (
<div className="flex items-center justify-center py-2">

View file

@ -1,8 +1,18 @@
type MentionKeyInput = {
id: number;
document_type?: string | null;
kind?: "doc" | "folder";
};
/**
* Build a stable dedup key for a mention chip.
*
* The ``kind:document_type:id`` shape prevents a document and a folder
* with the same integer id from colliding in the chip array (folders
* use the ``FOLDER`` sentinel ``document_type``; the ``kind`` prefix
* is the belt-and-braces guard).
*/
export function getMentionDocKey(doc: MentionKeyInput): string {
return `${doc.document_type ?? "UNKNOWN"}:${doc.id}`;
const kind = doc.kind ?? "doc";
return `${kind}:${doc.document_type ?? "UNKNOWN"}:${doc.id}`;
}

View file

@ -0,0 +1,55 @@
/**
* Pure helpers for turning agent-facing virtual paths into human-friendly
* chip labels.
*
* Why this lives in `lib` and not in the UI component:
* - Pure function = trivial to unit-test (no React, no DOM).
* - Used in two render sites today (the user-message chip and the AI-answer
* `MentionChip`) and likely more (history search, share-card previews, etc).
* Centralising the rules keeps the agent's path encoding and the UI's
* decoding from drifting apart.
*
* The agent emits paths under `/documents/...` with two encoding rules
* applied by `surfsense_backend/app/agents/new_chat/path_resolver.py`:
*
* 1. Every basename ends with `.xml` (so the LLM treats KB documents as XML
* files). Display layer strips this users think of the underlying
* filename, not the LLM's wrapper.
* 2. Title collisions get a ` (<doc_id>).xml` disambiguation suffix.
* Display layer strips the parenthesised id since it's an implementation
* detail, not user-facing identity.
*/
const XML_EXTENSION_RE = /\.xml$/i;
const DOC_ID_DISAMBIG_RE = /\s\(\d+\)$/;
export interface VirtualPathDisplay {
/** Human-friendly leaf name with `.xml` and ` (<doc_id>)` suffixes stripped. */
displayName: string;
/** Whether the path points to a folder (trailing slash) rather than a file. */
isFolder: boolean;
}
/**
* Decode a virtual path into the label that should appear in chip UI.
*
* Folder detection uses the trailing-slash convention the agent already
* follows in `<priority_documents>` and `KnowledgeTreeMiddleware`. Falls
* back to the raw path if nothing else can be extracted (defensive the
* caller will at least show *something*).
*/
export function getVirtualPathDisplay(path: string): VirtualPathDisplay {
const trimmed = (path ?? "").trim();
if (!trimmed) return { displayName: "", isFolder: false };
const isFolder = trimmed.endsWith("/");
const normalized = trimmed.replace(/\/+$/, "");
const segments = normalized.split("/").filter(Boolean);
const leaf = segments.at(-1);
if (!leaf) return { displayName: trimmed, isFolder };
const withoutXml = leaf.replace(XML_EXTENSION_RE, "");
const displayName = withoutXml.replace(DOC_ID_DISAMBIG_RE, "");
return { displayName: displayName || leaf, isFolder };
}