mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-04 22:02:16 +02:00
feat(markdown): enable citation rendering in MarkdownViewer and related components
- Added `enableCitations` prop to `MarkdownViewer` to support interactive citation badges. - Updated instances of `MarkdownViewer` across various components to utilize the new citation feature. - Enhanced citation processing in `PlateEditor` for read-only views, ensuring citations are rendered correctly without affecting markdown serialization. - Refactored citation handling in `InlineCitation` and `MarkdownText` to improve citation context management.
This commit is contained in:
parent
d335e96ec2
commit
7aeb8bb0a8
14 changed files with 809 additions and 260 deletions
|
|
@ -3,11 +3,11 @@
|
||||||
import { useQuery } from "@tanstack/react-query";
|
import { useQuery } from "@tanstack/react-query";
|
||||||
import { useSetAtom } from "jotai";
|
import { useSetAtom } from "jotai";
|
||||||
import { ExternalLink, FileText } from "lucide-react";
|
import { ExternalLink, FileText } from "lucide-react";
|
||||||
|
import dynamic from "next/dynamic";
|
||||||
import type { FC } from "react";
|
import type { FC } from "react";
|
||||||
import { useCallback, useEffect, useRef, useState } from "react";
|
import { useCallback, useEffect, useRef, useState } from "react";
|
||||||
import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
|
import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom";
|
||||||
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
|
import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context";
|
||||||
import { MarkdownViewer } from "@/components/markdown-viewer";
|
|
||||||
import { Citation } from "@/components/tool-ui/citation";
|
import { Citation } from "@/components/tool-ui/citation";
|
||||||
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
|
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
|
||||||
import { Spinner } from "@/components/ui/spinner";
|
import { Spinner } from "@/components/ui/spinner";
|
||||||
|
|
@ -15,6 +15,16 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip
|
||||||
import { documentsApiService } from "@/lib/apis/documents-api.service";
|
import { documentsApiService } from "@/lib/apis/documents-api.service";
|
||||||
import { cacheKeys } from "@/lib/query-client/cache-keys";
|
import { cacheKeys } from "@/lib/query-client/cache-keys";
|
||||||
|
|
||||||
|
// Lazily load MarkdownViewer here to break the static import cycle:
|
||||||
|
// `markdown-viewer.tsx` → `citation-renderer.tsx` → `inline-citation.tsx`
|
||||||
|
// would otherwise pull `markdown-viewer.tsx` back in at module-init time.
|
||||||
|
// Only `SurfsenseDocCitation` (popover body) ever renders this viewer, so
|
||||||
|
// the lazy boundary is invisible to most call paths.
|
||||||
|
const MarkdownViewer = dynamic(
|
||||||
|
() => import("@/components/markdown-viewer").then((m) => m.MarkdownViewer),
|
||||||
|
{ ssr: false, loading: () => <Spinner size="xs" /> }
|
||||||
|
);
|
||||||
|
|
||||||
interface InlineCitationProps {
|
interface InlineCitationProps {
|
||||||
chunkId: number;
|
chunkId: number;
|
||||||
isDocsChunk?: boolean;
|
isDocsChunk?: boolean;
|
||||||
|
|
@ -172,7 +182,11 @@ const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {
|
||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
{!isLoading && !error && citedChunk?.content && (
|
{!isLoading && !error && citedChunk?.content && (
|
||||||
<MarkdownViewer content={citedChunk.content} maxLength={1500} />
|
<MarkdownViewer
|
||||||
|
content={citedChunk.content}
|
||||||
|
maxLength={1500}
|
||||||
|
enableCitations
|
||||||
|
/>
|
||||||
)}
|
)}
|
||||||
{!isLoading && !error && !citedChunk?.content && (
|
{!isLoading && !error && !citedChunk?.content && (
|
||||||
<p className="py-4 text-xs text-muted-foreground">No content available.</p>
|
<p className="py-4 text-xs text-muted-foreground">No content available.</p>
|
||||||
|
|
|
||||||
|
|
@ -12,15 +12,26 @@ import { ExternalLinkIcon } from "lucide-react";
|
||||||
import dynamic from "next/dynamic";
|
import dynamic from "next/dynamic";
|
||||||
import { useParams } from "next/navigation";
|
import { useParams } from "next/navigation";
|
||||||
import { useTheme } from "next-themes";
|
import { useTheme } from "next-themes";
|
||||||
import { memo, type ReactNode } from "react";
|
import {
|
||||||
|
createContext,
|
||||||
|
memo,
|
||||||
|
type ReactNode,
|
||||||
|
useCallback,
|
||||||
|
useContext,
|
||||||
|
useRef,
|
||||||
|
} from "react";
|
||||||
import rehypeKatex from "rehype-katex";
|
import rehypeKatex from "rehype-katex";
|
||||||
import remarkGfm from "remark-gfm";
|
import remarkGfm from "remark-gfm";
|
||||||
import remarkMath from "remark-math";
|
import remarkMath from "remark-math";
|
||||||
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
|
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
|
||||||
import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image";
|
import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image";
|
||||||
import "katex/dist/katex.min.css";
|
import "katex/dist/katex.min.css";
|
||||||
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
import { processChildrenWithCitations } from "@/components/citations/citation-renderer";
|
||||||
import { Skeleton } from "@/components/ui/skeleton";
|
import { Skeleton } from "@/components/ui/skeleton";
|
||||||
|
import {
|
||||||
|
type CitationUrlMap,
|
||||||
|
preprocessCitationMarkdown,
|
||||||
|
} from "@/lib/citations/citation-parser";
|
||||||
import {
|
import {
|
||||||
Table,
|
Table,
|
||||||
TableBody,
|
TableBody,
|
||||||
|
|
@ -59,31 +70,30 @@ const LazyMarkdownCodeBlock = dynamic(
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
// Storage for URL citations replaced during preprocess to avoid GFM autolink interference.
|
// Per-render URL placeholder map propagated to component overrides via
|
||||||
// Populated in preprocessMarkdown, consumed in parseTextWithCitations.
|
// React Context. Replaces the previous module-level `_pendingUrlCitations`
|
||||||
let _pendingUrlCitations = new Map<string, string>();
|
// state, which was unsafe under concurrent renders / SSR.
|
||||||
let _urlCiteIdx = 0;
|
type CitationUrlMapRef = { current: CitationUrlMap };
|
||||||
|
const EMPTY_URL_MAP: CitationUrlMap = new Map();
|
||||||
|
const CitationUrlMapContext = createContext<CitationUrlMapRef>({ current: EMPTY_URL_MAP });
|
||||||
|
|
||||||
|
function useCitationUrlMap(): CitationUrlMap {
|
||||||
|
return useContext(CitationUrlMapContext).current;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Preprocess raw markdown before it reaches the remark/rehype pipeline.
|
* Preprocess raw markdown before it reaches the remark/rehype pipeline.
|
||||||
* - Replaces URL-based citations with safe placeholders (prevents GFM autolinks)
|
* - Replaces URL-based citations with safe placeholders (prevents GFM autolinks)
|
||||||
* - Normalises LaTeX delimiters to dollar-sign syntax for remark-math
|
* - Normalises LaTeX delimiters to dollar-sign syntax for remark-math
|
||||||
*/
|
*/
|
||||||
function preprocessMarkdown(content: string): string {
|
function preprocessMarkdown(content: string, urlMapRef: CitationUrlMapRef): string {
|
||||||
// Replace URL-based citations with safe placeholders BEFORE markdown parsing.
|
// Replace URL-based citations with safe placeholders BEFORE markdown parsing.
|
||||||
// GFM autolinks would otherwise convert the https://... inside [citation:URL]
|
// GFM autolinks would otherwise convert the https://... inside [citation:URL]
|
||||||
// into an <a> element, splitting the text and preventing our citation regex
|
// into an <a> element, splitting the text and preventing our citation regex
|
||||||
// from matching the full pattern.
|
// from matching the full pattern.
|
||||||
_pendingUrlCitations = new Map();
|
const { content: rewritten, urlMap } = preprocessCitationMarkdown(content);
|
||||||
_urlCiteIdx = 0;
|
urlMapRef.current = urlMap;
|
||||||
content = content.replace(
|
content = rewritten;
|
||||||
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g,
|
|
||||||
(_, url) => {
|
|
||||||
const key = `urlcite${_urlCiteIdx++}`;
|
|
||||||
_pendingUrlCitations.set(key, url.trim());
|
|
||||||
return `[citation:${key}]`;
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
// All math forms are normalised to $$...$$ so we can disable single-dollar
|
// All math forms are normalised to $$...$$ so we can disable single-dollar
|
||||||
// inline math in remark-math (otherwise currency like "$3,120.00 and $0.00"
|
// inline math in remark-math (otherwise currency like "$3,120.00 and $0.00"
|
||||||
|
|
@ -116,113 +126,28 @@ function preprocessMarkdown(content: string): string {
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Matches [citation:...] with numeric IDs (incl. negative, doc- prefix, comma-separated),
|
|
||||||
// URL-based IDs from live web search, or urlciteN placeholders from preprocess.
|
|
||||||
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts.
|
|
||||||
const CITATION_REGEX =
|
|
||||||
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses text and replaces [citation:XXX] patterns with citation components.
|
|
||||||
* Supports:
|
|
||||||
* - Numeric chunk IDs: [citation:123]
|
|
||||||
* - Doc-prefixed IDs: [citation:doc-123]
|
|
||||||
* - Comma-separated IDs: [citation:4149, 4150, 4151]
|
|
||||||
* - URL-based citations from live search: [citation:https://example.com/page]
|
|
||||||
*/
|
|
||||||
function parseTextWithCitations(text: string): ReactNode[] {
|
|
||||||
const parts: ReactNode[] = [];
|
|
||||||
let lastIndex = 0;
|
|
||||||
let match: RegExpExecArray | null;
|
|
||||||
let instanceIndex = 0;
|
|
||||||
|
|
||||||
CITATION_REGEX.lastIndex = 0;
|
|
||||||
|
|
||||||
match = CITATION_REGEX.exec(text);
|
|
||||||
while (match !== null) {
|
|
||||||
if (match.index > lastIndex) {
|
|
||||||
parts.push(text.substring(lastIndex, match.index));
|
|
||||||
}
|
|
||||||
|
|
||||||
const captured = match[1];
|
|
||||||
|
|
||||||
if (captured.startsWith("http://") || captured.startsWith("https://")) {
|
|
||||||
parts.push(<UrlCitation key={`citation-url-${instanceIndex}`} url={captured.trim()} />);
|
|
||||||
instanceIndex++;
|
|
||||||
} else if (captured.startsWith("urlcite")) {
|
|
||||||
const url = _pendingUrlCitations.get(captured);
|
|
||||||
if (url) {
|
|
||||||
parts.push(<UrlCitation key={`citation-url-${instanceIndex}`} url={url} />);
|
|
||||||
}
|
|
||||||
instanceIndex++;
|
|
||||||
} else {
|
|
||||||
const rawIds = captured.split(",").map((s) => s.trim());
|
|
||||||
for (const rawId of rawIds) {
|
|
||||||
const isDocsChunk = rawId.startsWith("doc-");
|
|
||||||
const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10);
|
|
||||||
parts.push(
|
|
||||||
<InlineCitation
|
|
||||||
key={`citation-${isDocsChunk ? "doc-" : ""}${chunkId}-${instanceIndex}`}
|
|
||||||
chunkId={chunkId}
|
|
||||||
isDocsChunk={isDocsChunk}
|
|
||||||
/>
|
|
||||||
);
|
|
||||||
instanceIndex++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lastIndex = match.index + match[0].length;
|
|
||||||
match = CITATION_REGEX.exec(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lastIndex < text.length) {
|
|
||||||
parts.push(text.substring(lastIndex));
|
|
||||||
}
|
|
||||||
|
|
||||||
return parts.length > 0 ? parts : [text];
|
|
||||||
}
|
|
||||||
|
|
||||||
const MarkdownTextImpl = () => {
|
const MarkdownTextImpl = () => {
|
||||||
|
const urlMapRef = useRef<CitationUrlMap>(EMPTY_URL_MAP);
|
||||||
|
const preprocess = useCallback(
|
||||||
|
(content: string) => preprocessMarkdown(content, urlMapRef),
|
||||||
|
[]
|
||||||
|
);
|
||||||
return (
|
return (
|
||||||
<MarkdownTextPrimitive
|
<CitationUrlMapContext.Provider value={urlMapRef}>
|
||||||
smooth={false}
|
<MarkdownTextPrimitive
|
||||||
remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
|
smooth={false}
|
||||||
rehypePlugins={[rehypeKatex]}
|
remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
|
||||||
className="aui-md"
|
rehypePlugins={[rehypeKatex]}
|
||||||
components={defaultComponents}
|
className="aui-md"
|
||||||
preprocess={preprocessMarkdown}
|
components={defaultComponents}
|
||||||
/>
|
preprocess={preprocess}
|
||||||
|
/>
|
||||||
|
</CitationUrlMapContext.Provider>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const MarkdownText = memo(MarkdownTextImpl);
|
export const MarkdownText = memo(MarkdownTextImpl);
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper to process children and replace citation patterns with components
|
|
||||||
*/
|
|
||||||
function processChildrenWithCitations(children: ReactNode): ReactNode {
|
|
||||||
if (typeof children === "string") {
|
|
||||||
const parsed = parseTextWithCitations(children);
|
|
||||||
return parsed.length === 1 && typeof parsed[0] === "string" ? children : parsed;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Array.isArray(children)) {
|
|
||||||
return children.map((child) => {
|
|
||||||
if (typeof child === "string") {
|
|
||||||
const parsed = parseTextWithCitations(child);
|
|
||||||
return parsed.length === 1 && typeof parsed[0] === "string" ? (
|
|
||||||
child
|
|
||||||
) : (
|
|
||||||
<span key={child}>{parsed}</span>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return child;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return children;
|
|
||||||
}
|
|
||||||
|
|
||||||
function extractDomain(url: string): string {
|
function extractDomain(url: string): string {
|
||||||
try {
|
try {
|
||||||
const parsed = new URL(url);
|
const parsed = new URL(url);
|
||||||
|
|
@ -322,92 +247,125 @@ function MarkdownImage({ src, alt }: { src?: string; alt?: string }) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaultComponents = memoizeMarkdownComponents({
|
const defaultComponents = memoizeMarkdownComponents({
|
||||||
h1: ({ className, children, ...props }) => (
|
h1: function H1({ className, children, ...props }) {
|
||||||
<h1
|
const urlMap = useCitationUrlMap();
|
||||||
className={cn(
|
return (
|
||||||
"aui-md-h1 mb-8 scroll-m-20 font-extrabold text-4xl tracking-tight last:mb-0",
|
<h1
|
||||||
className
|
className={cn(
|
||||||
)}
|
"aui-md-h1 mb-8 scroll-m-20 font-extrabold text-4xl tracking-tight last:mb-0",
|
||||||
{...props}
|
className
|
||||||
>
|
)}
|
||||||
{processChildrenWithCitations(children)}
|
{...props}
|
||||||
</h1>
|
>
|
||||||
),
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
h2: ({ className, children, ...props }) => (
|
</h1>
|
||||||
<h2
|
);
|
||||||
className={cn(
|
},
|
||||||
"aui-md-h2 mt-8 mb-4 scroll-m-20 font-semibold text-3xl tracking-tight first:mt-0 last:mb-0",
|
h2: function H2({ className, children, ...props }) {
|
||||||
className
|
const urlMap = useCitationUrlMap();
|
||||||
)}
|
return (
|
||||||
{...props}
|
<h2
|
||||||
>
|
className={cn(
|
||||||
{processChildrenWithCitations(children)}
|
"aui-md-h2 mt-8 mb-4 scroll-m-20 font-semibold text-3xl tracking-tight first:mt-0 last:mb-0",
|
||||||
</h2>
|
className
|
||||||
),
|
)}
|
||||||
h3: ({ className, children, ...props }) => (
|
{...props}
|
||||||
<h3
|
>
|
||||||
className={cn(
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
"aui-md-h3 mt-6 mb-4 scroll-m-20 font-semibold text-2xl tracking-tight first:mt-0 last:mb-0",
|
</h2>
|
||||||
className
|
);
|
||||||
)}
|
},
|
||||||
{...props}
|
h3: function H3({ className, children, ...props }) {
|
||||||
>
|
const urlMap = useCitationUrlMap();
|
||||||
{processChildrenWithCitations(children)}
|
return (
|
||||||
</h3>
|
<h3
|
||||||
),
|
className={cn(
|
||||||
h4: ({ className, children, ...props }) => (
|
"aui-md-h3 mt-6 mb-4 scroll-m-20 font-semibold text-2xl tracking-tight first:mt-0 last:mb-0",
|
||||||
<h4
|
className
|
||||||
className={cn(
|
)}
|
||||||
"aui-md-h4 mt-6 mb-4 scroll-m-20 font-semibold text-xl tracking-tight first:mt-0 last:mb-0",
|
{...props}
|
||||||
className
|
>
|
||||||
)}
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
{...props}
|
</h3>
|
||||||
>
|
);
|
||||||
{processChildrenWithCitations(children)}
|
},
|
||||||
</h4>
|
h4: function H4({ className, children, ...props }) {
|
||||||
),
|
const urlMap = useCitationUrlMap();
|
||||||
h5: ({ className, children, ...props }) => (
|
return (
|
||||||
<h5
|
<h4
|
||||||
className={cn("aui-md-h5 my-4 font-semibold text-lg first:mt-0 last:mb-0", className)}
|
className={cn(
|
||||||
{...props}
|
"aui-md-h4 mt-6 mb-4 scroll-m-20 font-semibold text-xl tracking-tight first:mt-0 last:mb-0",
|
||||||
>
|
className
|
||||||
{processChildrenWithCitations(children)}
|
)}
|
||||||
</h5>
|
{...props}
|
||||||
),
|
>
|
||||||
h6: ({ className, children, ...props }) => (
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
<h6 className={cn("aui-md-h6 my-4 font-semibold first:mt-0 last:mb-0", className)} {...props}>
|
</h4>
|
||||||
{processChildrenWithCitations(children)}
|
);
|
||||||
</h6>
|
},
|
||||||
),
|
h5: function H5({ className, children, ...props }) {
|
||||||
p: ({ className, children, ...props }) => (
|
const urlMap = useCitationUrlMap();
|
||||||
<p className={cn("aui-md-p mt-5 mb-5 leading-7 first:mt-0 last:mb-0", className)} {...props}>
|
return (
|
||||||
{processChildrenWithCitations(children)}
|
<h5
|
||||||
</p>
|
className={cn("aui-md-h5 my-4 font-semibold text-lg first:mt-0 last:mb-0", className)}
|
||||||
),
|
{...props}
|
||||||
a: ({ className, children, ...props }) => (
|
>
|
||||||
<a
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
className={cn("aui-md-a font-medium text-primary underline underline-offset-4", className)}
|
</h5>
|
||||||
{...props}
|
);
|
||||||
>
|
},
|
||||||
{processChildrenWithCitations(children)}
|
h6: function H6({ className, children, ...props }) {
|
||||||
</a>
|
const urlMap = useCitationUrlMap();
|
||||||
),
|
return (
|
||||||
blockquote: ({ className, children, ...props }) => (
|
<h6 className={cn("aui-md-h6 my-4 font-semibold first:mt-0 last:mb-0", className)} {...props}>
|
||||||
<blockquote className={cn("aui-md-blockquote border-l-2 pl-6 italic", className)} {...props}>
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
{processChildrenWithCitations(children)}
|
</h6>
|
||||||
</blockquote>
|
);
|
||||||
),
|
},
|
||||||
|
p: function P({ className, children, ...props }) {
|
||||||
|
const urlMap = useCitationUrlMap();
|
||||||
|
return (
|
||||||
|
<p className={cn("aui-md-p mt-5 mb-5 leading-7 first:mt-0 last:mb-0", className)} {...props}>
|
||||||
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
|
</p>
|
||||||
|
);
|
||||||
|
},
|
||||||
|
a: function A({ className, children, ...props }) {
|
||||||
|
const urlMap = useCitationUrlMap();
|
||||||
|
return (
|
||||||
|
<a
|
||||||
|
className={cn(
|
||||||
|
"aui-md-a font-medium text-primary underline underline-offset-4",
|
||||||
|
className
|
||||||
|
)}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
|
</a>
|
||||||
|
);
|
||||||
|
},
|
||||||
|
blockquote: function Blockquote({ className, children, ...props }) {
|
||||||
|
const urlMap = useCitationUrlMap();
|
||||||
|
return (
|
||||||
|
<blockquote className={cn("aui-md-blockquote border-l-2 pl-6 italic", className)} {...props}>
|
||||||
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
|
</blockquote>
|
||||||
|
);
|
||||||
|
},
|
||||||
ul: ({ className, ...props }) => (
|
ul: ({ className, ...props }) => (
|
||||||
<ul className={cn("aui-md-ul my-5 ml-6 list-disc [&>li]:mt-2", className)} {...props} />
|
<ul className={cn("aui-md-ul my-5 ml-6 list-disc [&>li]:mt-2", className)} {...props} />
|
||||||
),
|
),
|
||||||
ol: ({ className, ...props }) => (
|
ol: ({ className, ...props }) => (
|
||||||
<ol className={cn("aui-md-ol my-5 ml-6 list-decimal [&>li]:mt-2", className)} {...props} />
|
<ol className={cn("aui-md-ol my-5 ml-6 list-decimal [&>li]:mt-2", className)} {...props} />
|
||||||
),
|
),
|
||||||
li: ({ className, children, ...props }) => (
|
li: function Li({ className, children, ...props }) {
|
||||||
<li className={cn("aui-md-li", className)} {...props}>
|
const urlMap = useCitationUrlMap();
|
||||||
{processChildrenWithCitations(children)}
|
return (
|
||||||
</li>
|
<li className={cn("aui-md-li", className)} {...props}>
|
||||||
),
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
|
</li>
|
||||||
|
);
|
||||||
|
},
|
||||||
hr: ({ className, ...props }) => (
|
hr: ({ className, ...props }) => (
|
||||||
<hr className={cn("aui-md-hr my-5 border-b", className)} {...props} />
|
<hr className={cn("aui-md-hr my-5 border-b", className)} {...props} />
|
||||||
),
|
),
|
||||||
|
|
@ -422,28 +380,34 @@ const defaultComponents = memoizeMarkdownComponents({
|
||||||
tbody: ({ className, ...props }) => (
|
tbody: ({ className, ...props }) => (
|
||||||
<TableBody className={cn("aui-md-tbody", className)} {...props} />
|
<TableBody className={cn("aui-md-tbody", className)} {...props} />
|
||||||
),
|
),
|
||||||
th: ({ className, children, ...props }) => (
|
th: function Th({ className, children, ...props }) {
|
||||||
<TableHead
|
const urlMap = useCitationUrlMap();
|
||||||
className={cn(
|
return (
|
||||||
"aui-md-th bg-muted/50 whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
<TableHead
|
||||||
className
|
className={cn(
|
||||||
)}
|
"aui-md-th bg-muted/50 whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
||||||
{...props}
|
className
|
||||||
>
|
)}
|
||||||
{processChildrenWithCitations(children)}
|
{...props}
|
||||||
</TableHead>
|
>
|
||||||
),
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
td: ({ className, children, ...props }) => (
|
</TableHead>
|
||||||
<TableCell
|
);
|
||||||
className={cn(
|
},
|
||||||
"aui-md-td whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
td: function Td({ className, children, ...props }) {
|
||||||
className
|
const urlMap = useCitationUrlMap();
|
||||||
)}
|
return (
|
||||||
{...props}
|
<TableCell
|
||||||
>
|
className={cn(
|
||||||
{processChildrenWithCitations(children)}
|
"aui-md-td whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
||||||
</TableCell>
|
className
|
||||||
),
|
)}
|
||||||
|
{...props}
|
||||||
|
>
|
||||||
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
|
</TableCell>
|
||||||
|
);
|
||||||
|
},
|
||||||
tr: ({ className, ...props }) => <TableRow className={cn("aui-md-tr", className)} {...props} />,
|
tr: ({ className, ...props }) => <TableRow className={cn("aui-md-tr", className)} {...props} />,
|
||||||
sup: ({ className, ...props }) => (
|
sup: ({ className, ...props }) => (
|
||||||
<sup className={cn("aui-md-sup [&>a]:text-xs [&>a]:no-underline", className)} {...props} />
|
<sup className={cn("aui-md-sup [&>a]:text-xs [&>a]:no-underline", className)} {...props} />
|
||||||
|
|
@ -552,16 +516,22 @@ const defaultComponents = memoizeMarkdownComponents({
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
strong: ({ className, children, ...props }) => (
|
strong: function Strong({ className, children, ...props }) {
|
||||||
<strong className={cn("aui-md-strong font-semibold", className)} {...props}>
|
const urlMap = useCitationUrlMap();
|
||||||
{processChildrenWithCitations(children)}
|
return (
|
||||||
</strong>
|
<strong className={cn("aui-md-strong font-semibold", className)} {...props}>
|
||||||
),
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
em: ({ className, children, ...props }) => (
|
</strong>
|
||||||
<em className={cn("aui-md-em", className)} {...props}>
|
);
|
||||||
{processChildrenWithCitations(children)}
|
},
|
||||||
</em>
|
em: function Em({ className, children, ...props }) {
|
||||||
),
|
const urlMap = useCitationUrlMap();
|
||||||
|
return (
|
||||||
|
<em className={cn("aui-md-em", className)} {...props}>
|
||||||
|
{processChildrenWithCitations(children, urlMap)}
|
||||||
|
</em>
|
||||||
|
);
|
||||||
|
},
|
||||||
img: ({ src, alt }) => (
|
img: ({ src, alt }) => (
|
||||||
<MarkdownImage src={typeof src === "string" ? src : undefined} alt={alt} />
|
<MarkdownImage src={typeof src === "string" ? src : undefined} alt={alt} />
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,7 @@ export const CitationPanelContent: FC<CitationPanelContentProps> = ({ chunkId, o
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className="text-sm">
|
<div className="text-sm">
|
||||||
<MarkdownViewer content={chunk.content} />
|
<MarkdownViewer content={chunk.content} enableCitations />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
79
surfsense_web/components/citations/citation-renderer.tsx
Normal file
79
surfsense_web/components/citations/citation-renderer.tsx
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
"use client";
|
||||||
|
|
||||||
|
import type { ReactNode } from "react";
|
||||||
|
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
||||||
|
import {
|
||||||
|
type CitationToken,
|
||||||
|
type CitationUrlMap,
|
||||||
|
parseTextWithCitations,
|
||||||
|
} from "@/lib/citations/citation-parser";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Render a single parsed citation token as JSX.
|
||||||
|
*
|
||||||
|
* `ordinalKey` should be a stable per-render counter so duplicate identical
|
||||||
|
* citations within the same parent don't collide on `key`. The previous
|
||||||
|
* implementation in `markdown-text.tsx` used the source string itself as
|
||||||
|
* the key, which produced React warnings when two segments rendered the
|
||||||
|
* same `[citation:N]` text.
|
||||||
|
*/
|
||||||
|
export function renderCitationToken(token: CitationToken, ordinalKey: number): ReactNode {
|
||||||
|
if (token.kind === "url") {
|
||||||
|
return <UrlCitation key={`citation-url-${ordinalKey}`} url={token.url} />;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<InlineCitation
|
||||||
|
key={`citation-${token.isDocsChunk ? "doc-" : ""}${token.chunkId}-${ordinalKey}`}
|
||||||
|
chunkId={token.chunkId}
|
||||||
|
isDocsChunk={token.isDocsChunk}
|
||||||
|
/>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walk a `ReactNode` (string, array, or arbitrary node) and replace any
|
||||||
|
* `[citation:...]` tokens inside string children with citation badges.
|
||||||
|
*
|
||||||
|
* Designed for use inside `Streamdown`/`react-markdown` `components`
|
||||||
|
* overrides where the renderer hands you `children`. Non-string children
|
||||||
|
* are returned untouched so block/phrasing structure is preserved.
|
||||||
|
*/
|
||||||
|
export function processChildrenWithCitations(
|
||||||
|
children: ReactNode,
|
||||||
|
urlMap: CitationUrlMap
|
||||||
|
): ReactNode {
|
||||||
|
if (typeof children === "string") {
|
||||||
|
const segments = parseTextWithCitations(children, urlMap);
|
||||||
|
if (segments.length === 1 && typeof segments[0] === "string") {
|
||||||
|
return children;
|
||||||
|
}
|
||||||
|
let ordinal = 0;
|
||||||
|
return segments.map((segment) =>
|
||||||
|
typeof segment === "string" ? segment : renderCitationToken(segment, ordinal++)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(children)) {
|
||||||
|
let ordinal = 0;
|
||||||
|
return children.map((child, childIndex) => {
|
||||||
|
if (typeof child === "string") {
|
||||||
|
const segments = parseTextWithCitations(child, urlMap);
|
||||||
|
if (segments.length === 1 && typeof segments[0] === "string") {
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<span key={`citation-seg-${childIndex}`}>
|
||||||
|
{segments.map((segment) =>
|
||||||
|
typeof segment === "string"
|
||||||
|
? segment
|
||||||
|
: renderCitationToken(segment, ordinal++)
|
||||||
|
)}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return child;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return children;
|
||||||
|
}
|
||||||
|
|
@ -32,7 +32,7 @@ export function DocumentViewer({ title, content, trigger }: DocumentViewerProps)
|
||||||
<DialogTitle>{title}</DialogTitle>
|
<DialogTitle>{title}</DialogTitle>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
<div className="mt-4">
|
<div className="mt-4">
|
||||||
<MarkdownViewer content={content} />
|
<MarkdownViewer content={content} enableCitations />
|
||||||
</div>
|
</div>
|
||||||
</DialogContent>
|
</DialogContent>
|
||||||
</Dialog>
|
</Dialog>
|
||||||
|
|
|
||||||
|
|
@ -652,7 +652,7 @@ export function EditorPanelContent({
|
||||||
// Plate is heavy on multi-MB docs.
|
// Plate is heavy on multi-MB docs.
|
||||||
<div className="h-full overflow-y-auto px-5 py-4">
|
<div className="h-full overflow-y-auto px-5 py-4">
|
||||||
{largeDocAlert}
|
{largeDocAlert}
|
||||||
<MarkdownViewer content={editorDoc.source_markdown} />
|
<MarkdownViewer content={editorDoc.source_markdown} enableCitations />
|
||||||
</div>
|
</div>
|
||||||
) : renderInPlateEditor ? (
|
) : renderInPlateEditor ? (
|
||||||
// Editable doc (FILE/NOTE) — Plate editing UX.
|
// Editable doc (FILE/NOTE) — Plate editing UX.
|
||||||
|
|
@ -670,12 +670,17 @@ export function EditorPanelContent({
|
||||||
reserveToolbarSpace
|
reserveToolbarSpace
|
||||||
defaultEditing={isEditing}
|
defaultEditing={isEditing}
|
||||||
className="**:[[role=toolbar]]:bg-sidebar!"
|
className="**:[[role=toolbar]]:bg-sidebar!"
|
||||||
|
// Render `[citation:N]` badges in view mode only.
|
||||||
|
// Edit mode keeps raw text so the user can edit/delete
|
||||||
|
// tokens directly. `local_file` never reaches this branch
|
||||||
|
// (handled by the source_code editor above).
|
||||||
|
enableCitations={!isEditing && !isLocalFileMode}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<div className="h-full overflow-y-auto px-5 py-4">
|
<div className="h-full overflow-y-auto px-5 py-4">
|
||||||
<MarkdownViewer content={editorDoc.source_markdown} />
|
<MarkdownViewer content={editorDoc.source_markdown} enableCitations />
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,11 @@ import { useEffect, useMemo, useRef } from "react";
|
||||||
import remarkGfm from "remark-gfm";
|
import remarkGfm from "remark-gfm";
|
||||||
import remarkMath from "remark-math";
|
import remarkMath from "remark-math";
|
||||||
import { EditorSaveContext } from "@/components/editor/editor-save-context";
|
import { EditorSaveContext } from "@/components/editor/editor-save-context";
|
||||||
|
import { CitationKit, injectCitationNodes } from "@/components/editor/plugins/citation-kit";
|
||||||
import { type EditorPreset, presetMap } from "@/components/editor/presets";
|
import { type EditorPreset, presetMap } from "@/components/editor/presets";
|
||||||
import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
|
import { escapeMdxExpressions } from "@/components/editor/utils/escape-mdx";
|
||||||
import { Editor, EditorContainer } from "@/components/ui/editor";
|
import { Editor, EditorContainer } from "@/components/ui/editor";
|
||||||
|
import { preprocessCitationMarkdown } from "@/lib/citations/citation-parser";
|
||||||
|
|
||||||
/** Live editor instance returned by `usePlateEditor`. */
|
/** Live editor instance returned by `usePlateEditor`. */
|
||||||
export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
|
export type PlateEditorInstance = ReturnType<typeof usePlateEditor>;
|
||||||
|
|
@ -65,6 +67,14 @@ export interface PlateEditorProps {
|
||||||
* without modifying the core editor component.
|
* without modifying the core editor component.
|
||||||
*/
|
*/
|
||||||
extraPlugins?: AnyPluginConfig[];
|
extraPlugins?: AnyPluginConfig[];
|
||||||
|
/**
|
||||||
|
* Render `[citation:N]` and `[citation:URL]` tokens in the deserialized
|
||||||
|
* markdown as interactive citation badges/popovers (mirrors chat). Only
|
||||||
|
* meant for read-only views — when true, `onMarkdownChange` is suppressed
|
||||||
|
* because the in-memory tree contains custom inline-void elements that
|
||||||
|
* have no markdown serialize rule.
|
||||||
|
*/
|
||||||
|
enableCitations?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
function PlateEditorContent({
|
function PlateEditorContent({
|
||||||
|
|
@ -103,6 +113,7 @@ export function PlateEditor({
|
||||||
defaultEditing = false,
|
defaultEditing = false,
|
||||||
preset = "full",
|
preset = "full",
|
||||||
extraPlugins = [],
|
extraPlugins = [],
|
||||||
|
enableCitations = false,
|
||||||
}: PlateEditorProps) {
|
}: PlateEditorProps) {
|
||||||
const lastMarkdownRef = useRef(markdown);
|
const lastMarkdownRef = useRef(markdown);
|
||||||
const lastHtmlRef = useRef(html);
|
const lastHtmlRef = useRef(html);
|
||||||
|
|
@ -145,6 +156,8 @@ export function PlateEditor({
|
||||||
...(onSave ? [SaveShortcutPlugin] : []),
|
...(onSave ? [SaveShortcutPlugin] : []),
|
||||||
// Consumer-provided extra plugins
|
// Consumer-provided extra plugins
|
||||||
...extraPlugins,
|
...extraPlugins,
|
||||||
|
// Citation void inline element (read-only document viewer).
|
||||||
|
...(enableCitations ? CitationKit : []),
|
||||||
MarkdownPlugin.configure({
|
MarkdownPlugin.configure({
|
||||||
options: {
|
options: {
|
||||||
remarkPlugins: [remarkGfm, remarkMath, remarkMdx],
|
remarkPlugins: [remarkGfm, remarkMath, remarkMdx],
|
||||||
|
|
@ -154,8 +167,18 @@ export function PlateEditor({
|
||||||
value: html
|
value: html
|
||||||
? (editor) => editor.api.html.deserialize({ element: html }) as Value
|
? (editor) => editor.api.html.deserialize({ element: html }) as Value
|
||||||
: markdown
|
: markdown
|
||||||
? (editor) =>
|
? (editor) => {
|
||||||
editor.getApi(MarkdownPlugin).markdown.deserialize(escapeMdxExpressions(markdown))
|
if (!enableCitations) {
|
||||||
|
return editor
|
||||||
|
.getApi(MarkdownPlugin)
|
||||||
|
.markdown.deserialize(escapeMdxExpressions(markdown));
|
||||||
|
}
|
||||||
|
const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown);
|
||||||
|
const value = editor
|
||||||
|
.getApi(MarkdownPlugin)
|
||||||
|
.markdown.deserialize(escapeMdxExpressions(rewritten));
|
||||||
|
return injectCitationNodes(value as Descendant[], urlMap) as Value;
|
||||||
|
}
|
||||||
: undefined,
|
: undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -174,13 +197,22 @@ export function PlateEditor({
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!html && markdown !== undefined && markdown !== lastMarkdownRef.current) {
|
if (!html && markdown !== undefined && markdown !== lastMarkdownRef.current) {
|
||||||
lastMarkdownRef.current = markdown;
|
lastMarkdownRef.current = markdown;
|
||||||
const newValue = editor
|
let newValue: Descendant[];
|
||||||
.getApi(MarkdownPlugin)
|
if (enableCitations) {
|
||||||
.markdown.deserialize(escapeMdxExpressions(markdown));
|
const { content: rewritten, urlMap } = preprocessCitationMarkdown(markdown);
|
||||||
|
const deserialized = editor
|
||||||
|
.getApi(MarkdownPlugin)
|
||||||
|
.markdown.deserialize(escapeMdxExpressions(rewritten)) as Descendant[];
|
||||||
|
newValue = injectCitationNodes(deserialized, urlMap);
|
||||||
|
} else {
|
||||||
|
newValue = editor
|
||||||
|
.getApi(MarkdownPlugin)
|
||||||
|
.markdown.deserialize(escapeMdxExpressions(markdown)) as Descendant[];
|
||||||
|
}
|
||||||
editor.tf.reset();
|
editor.tf.reset();
|
||||||
editor.tf.setValue(newValue);
|
editor.tf.setValue(newValue as Value);
|
||||||
}
|
}
|
||||||
}, [html, markdown, editor]);
|
}, [html, markdown, editor, enableCitations]);
|
||||||
|
|
||||||
// When not forced read-only, the user can toggle between editing/viewing.
|
// When not forced read-only, the user can toggle between editing/viewing.
|
||||||
const canToggleMode = !readOnly && allowModeToggle;
|
const canToggleMode = !readOnly && allowModeToggle;
|
||||||
|
|
@ -205,6 +237,16 @@ export function PlateEditor({
|
||||||
// (initialized to true via usePlateEditor, toggled via ModeToolbarButton).
|
// (initialized to true via usePlateEditor, toggled via ModeToolbarButton).
|
||||||
{...(readOnly ? { readOnly: true } : {})}
|
{...(readOnly ? { readOnly: true } : {})}
|
||||||
onChange={({ value }) => {
|
onChange={({ value }) => {
|
||||||
|
// View-only citation mode: skip serialization. The custom
|
||||||
|
// `citation` inline-void element has no markdown serialize
|
||||||
|
// rule, so emitting changes here would overwrite
|
||||||
|
// `lastMarkdownRef.current` (and downstream copy-to-clipboard
|
||||||
|
// state in EditorPanelContent) with a tree that loses every
|
||||||
|
// citation token. `enableCitations` is only ever set in
|
||||||
|
// read-only paths, so user input cannot reach this branch
|
||||||
|
// in practice — the guard exists for the initial Plate
|
||||||
|
// normalize emit.
|
||||||
|
if (enableCitations) return;
|
||||||
if (onHtmlChange && html) {
|
if (onHtmlChange && html) {
|
||||||
const serialized = slateToHtml(value as Descendant[]);
|
const serialized = slateToHtml(value as Descendant[]);
|
||||||
onHtmlChange(serialized);
|
onHtmlChange(serialized);
|
||||||
|
|
|
||||||
222
surfsense_web/components/editor/plugins/citation-kit.tsx
Normal file
222
surfsense_web/components/editor/plugins/citation-kit.tsx
Normal file
|
|
@ -0,0 +1,222 @@
|
||||||
|
"use client";
|
||||||
|
|
||||||
|
import { type FC } from "react";
|
||||||
|
import { KEYS, type Descendant } from "platejs";
|
||||||
|
import { createPlatePlugin, type PlateElementProps } from "platejs/react";
|
||||||
|
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
||||||
|
import {
|
||||||
|
CITATION_REGEX,
|
||||||
|
type CitationUrlMap,
|
||||||
|
parseTextWithCitations,
|
||||||
|
} from "@/lib/citations/citation-parser";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Plate inline-void node modeling a single `[citation:...]` reference.
|
||||||
|
*
|
||||||
|
* Modeled after the existing `MentionPlugin` pattern in
|
||||||
|
* `inline-mention-editor.tsx` — the only confirmed pattern in this repo
|
||||||
|
* for non-text inline UI. Inline-void elements satisfy Slate's invariant
|
||||||
|
* that the editor renders both atomic widgets and surrounding text
|
||||||
|
* cleanly without breaking selection / caret semantics.
|
||||||
|
*/
|
||||||
|
export type CitationElementNode = {
|
||||||
|
type: "citation";
|
||||||
|
kind: "chunk" | "doc" | "url";
|
||||||
|
chunkId?: number;
|
||||||
|
url?: string;
|
||||||
|
/** Original `[citation:...]` substring for traceability/debugging. */
|
||||||
|
rawText: string;
|
||||||
|
children: [{ text: "" }];
|
||||||
|
};
|
||||||
|
|
||||||
|
const CITATION_TYPE = "citation";
|
||||||
|
|
||||||
|
const CitationElement: FC<PlateElementProps<CitationElementNode>> = ({
|
||||||
|
attributes,
|
||||||
|
children,
|
||||||
|
element,
|
||||||
|
}) => {
|
||||||
|
const isUrl = element.kind === "url";
|
||||||
|
return (
|
||||||
|
<span {...attributes} className="inline-flex align-baseline">
|
||||||
|
<span contentEditable={false}>
|
||||||
|
{isUrl && element.url ? (
|
||||||
|
<UrlCitation url={element.url} />
|
||||||
|
) : element.chunkId !== undefined ? (
|
||||||
|
<InlineCitation chunkId={element.chunkId} isDocsChunk={element.kind === "doc"} />
|
||||||
|
) : null}
|
||||||
|
</span>
|
||||||
|
{children}
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const CitationPlugin = createPlatePlugin({
|
||||||
|
key: CITATION_TYPE,
|
||||||
|
node: {
|
||||||
|
isElement: true,
|
||||||
|
isInline: true,
|
||||||
|
isVoid: true,
|
||||||
|
type: CITATION_TYPE,
|
||||||
|
component: CitationElement,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Plugin kit shape used elsewhere in the editor. */
|
||||||
|
export const CitationKit = [CitationPlugin];
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Slate value transform — runs after MarkdownPlugin.deserialize
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Structural shapes used by the value transform. We cannot use Plate's
|
||||||
|
// generic Element / Text type predicates directly because `Descendant` is a
|
||||||
|
// constrained union and our predicates would over-narrow. Casting through
|
||||||
|
// these row types keeps the walker readable without fighting the types.
|
||||||
|
type SlateText = { text: string } & Record<string, unknown>;
|
||||||
|
type SlateElement = { type?: string; children: Descendant[] } & Record<string, unknown>;
|
||||||
|
|
||||||
|
function isText(node: Descendant): boolean {
|
||||||
|
return typeof (node as { text?: unknown }).text === "string";
|
||||||
|
}
|
||||||
|
|
||||||
|
function asText(node: Descendant): SlateText {
|
||||||
|
return node as unknown as SlateText;
|
||||||
|
}
|
||||||
|
|
||||||
|
function asElement(node: Descendant): SlateElement {
|
||||||
|
return node as unknown as SlateElement;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Element types whose subtrees we MUST NOT inject citation void elements
|
||||||
|
* into. Each rationale documented in the citation plan:
|
||||||
|
* - `KEYS.codeBlock` / `code_line` — Plate's schema rejects inline elements
|
||||||
|
* inside code containers; the user expects literal text inside code.
|
||||||
|
* - `KEYS.link` — `<button>` inside `<a>` is invalid HTML and the link
|
||||||
|
* swallows the citation click. Mirrors the `<a>` skip in
|
||||||
|
* `MarkdownViewer`.
|
||||||
|
*/
|
||||||
|
const SKIP_SUBTREE_TYPES = new Set<string>([
|
||||||
|
KEYS.codeBlock,
|
||||||
|
"code_line",
|
||||||
|
KEYS.link,
|
||||||
|
]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the marks portion of a Slate text node so we can preserve formatting
|
||||||
|
* (bold/italic/etc.) on the surrounding text fragments after we split.
|
||||||
|
*/
|
||||||
|
function copyMarks(textNode: SlateText): Record<string, unknown> {
|
||||||
|
const { text: _text, ...marks } = textNode;
|
||||||
|
return marks;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeCitationElement(
|
||||||
|
rawText: string,
|
||||||
|
segment: { kind: "url"; url: string } | { kind: "chunk"; chunkId: number; isDocsChunk: boolean }
|
||||||
|
): CitationElementNode {
|
||||||
|
if (segment.kind === "url") {
|
||||||
|
return {
|
||||||
|
type: CITATION_TYPE,
|
||||||
|
kind: "url",
|
||||||
|
url: segment.url,
|
||||||
|
rawText,
|
||||||
|
children: [{ text: "" }],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
type: CITATION_TYPE,
|
||||||
|
kind: segment.isDocsChunk ? "doc" : "chunk",
|
||||||
|
chunkId: segment.chunkId,
|
||||||
|
rawText,
|
||||||
|
children: [{ text: "" }],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Re-extract the raw `[citation:...]` substrings that produced each parsed
|
||||||
|
* segment, in source order. Lets us preserve the original literal for
|
||||||
|
* `rawText` on the inline-void element.
|
||||||
|
*/
|
||||||
|
function extractRawCitationMatches(text: string): string[] {
|
||||||
|
const matches: string[] = [];
|
||||||
|
CITATION_REGEX.lastIndex = 0;
|
||||||
|
let m: RegExpExecArray | null = CITATION_REGEX.exec(text);
|
||||||
|
while (m !== null) {
|
||||||
|
matches.push(m[0]);
|
||||||
|
m = CITATION_REGEX.exec(text);
|
||||||
|
}
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
function transformTextNode(node: SlateText, urlMap: CitationUrlMap): Descendant[] {
|
||||||
|
const segments = parseTextWithCitations(node.text, urlMap);
|
||||||
|
if (segments.length === 1 && typeof segments[0] === "string") {
|
||||||
|
return [node as unknown as Descendant];
|
||||||
|
}
|
||||||
|
|
||||||
|
const marks = copyMarks(node);
|
||||||
|
const rawMatches = extractRawCitationMatches(node.text);
|
||||||
|
const out: Descendant[] = [];
|
||||||
|
let citationIdx = 0;
|
||||||
|
let pendingText: string | null = null;
|
||||||
|
|
||||||
|
const flushText = () => {
|
||||||
|
// Slate inline-void adjacency: emit an empty text node (with copied
|
||||||
|
// marks) when the citation appears at the very start/end of the text
|
||||||
|
// node so neighbours of the void always have a text sibling.
|
||||||
|
out.push({ ...marks, text: pendingText ?? "" } as unknown as Descendant);
|
||||||
|
pendingText = null;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const segment of segments) {
|
||||||
|
if (typeof segment === "string") {
|
||||||
|
pendingText = (pendingText ?? "") + segment;
|
||||||
|
} else {
|
||||||
|
flushText();
|
||||||
|
const raw = rawMatches[citationIdx] ?? "";
|
||||||
|
out.push(makeCitationElement(raw, segment) as unknown as Descendant);
|
||||||
|
citationIdx += 1;
|
||||||
|
// Always reset pendingText so the next loop iteration emits a
|
||||||
|
// trailing empty text node if no further plain text follows.
|
||||||
|
pendingText = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flushText();
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function transformChildren(children: Descendant[], urlMap: CitationUrlMap): Descendant[] {
|
||||||
|
const out: Descendant[] = [];
|
||||||
|
for (const child of children) {
|
||||||
|
if (isText(child)) {
|
||||||
|
out.push(...transformTextNode(asText(child), urlMap));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const elementChild = asElement(child);
|
||||||
|
const elementType = (elementChild.type ?? "") as string;
|
||||||
|
if (elementType && SKIP_SUBTREE_TYPES.has(elementType)) {
|
||||||
|
out.push(child);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out.push({
|
||||||
|
...elementChild,
|
||||||
|
children: transformChildren(elementChild.children, urlMap),
|
||||||
|
} as unknown as Descendant);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walk a deserialized Slate value and replace every `[citation:...]`
|
||||||
|
* substring with a `citation` inline-void element. URL placeholders
|
||||||
|
* created by `preprocessCitationMarkdown` are resolved through `urlMap`.
|
||||||
|
*
|
||||||
|
* Subtrees of `code_block`, `code_line`, and `link` are returned as-is —
|
||||||
|
* see `SKIP_SUBTREE_TYPES` above.
|
||||||
|
*/
|
||||||
|
export function injectCitationNodes(value: Descendant[], urlMap: CitationUrlMap): Descendant[] {
|
||||||
|
return transformChildren(value, urlMap);
|
||||||
|
}
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
// break the MDX parser. This module sanitises them before deserialization.
|
// break the MDX parser. This module sanitises them before deserialization.
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
const FENCED_OR_INLINE_CODE = /(```[\s\S]*?```|`[^`\n]+`)/g;
|
import { FENCED_OR_INLINE_CODE } from "@/lib/markdown/code-regions";
|
||||||
|
|
||||||
// Strip HTML comments that MDX cannot parse.
|
// Strip HTML comments that MDX cannot parse.
|
||||||
// PDF converters emit <!-- PageHeader="..." -->, <!-- PageBreak -->, etc.
|
// PDF converters emit <!-- PageHeader="..." -->, <!-- PageBreak -->, etc.
|
||||||
|
|
|
||||||
|
|
@ -316,10 +316,10 @@ export function DocumentTabContent({ documentId, searchSpaceId, title }: Documen
|
||||||
</Button>
|
</Button>
|
||||||
</AlertDescription>
|
</AlertDescription>
|
||||||
</Alert>
|
</Alert>
|
||||||
<MarkdownViewer content={doc.source_markdown} />
|
<MarkdownViewer content={doc.source_markdown} enableCitations />
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
<MarkdownViewer content={doc.source_markdown} />
|
<MarkdownViewer content={doc.source_markdown} enableCitations />
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,12 @@ import { createMathPlugin } from "@streamdown/math";
|
||||||
import { Streamdown, type StreamdownProps } from "streamdown";
|
import { Streamdown, type StreamdownProps } from "streamdown";
|
||||||
import "katex/dist/katex.min.css";
|
import "katex/dist/katex.min.css";
|
||||||
import Image from "next/image";
|
import Image from "next/image";
|
||||||
|
import { useMemo } from "react";
|
||||||
|
import { processChildrenWithCitations } from "@/components/citations/citation-renderer";
|
||||||
|
import {
|
||||||
|
type CitationUrlMap,
|
||||||
|
preprocessCitationMarkdown,
|
||||||
|
} from "@/lib/citations/citation-parser";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
const code = createCodePlugin({
|
const code = createCodePlugin({
|
||||||
|
|
@ -21,8 +27,21 @@ interface MarkdownViewerProps {
|
||||||
content: string;
|
content: string;
|
||||||
className?: string;
|
className?: string;
|
||||||
maxLength?: number;
|
maxLength?: number;
|
||||||
|
/**
|
||||||
|
* When true, render `[citation:N]` / `[citation:URL]` tokens as the
|
||||||
|
* interactive citation badges/popovers used in chat. Default `false`
|
||||||
|
* so callers that don't need citations are unchanged.
|
||||||
|
*
|
||||||
|
* Note: we deliberately do NOT override `<a>` to inject citations into
|
||||||
|
* link text — that would produce `<button>` inside `<a>` (invalid
|
||||||
|
* HTML). A `[citation:N]` token literally placed inside markdown link
|
||||||
|
* text stays as raw text.
|
||||||
|
*/
|
||||||
|
enableCitations?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const EMPTY_URL_MAP: CitationUrlMap = new Map();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the entire content is wrapped in a single ```markdown or ```md
|
* If the entire content is wrapped in a single ```markdown or ```md
|
||||||
* code fence, strip the fence so the inner markdown renders properly.
|
* code fence, strip the fence so the inner markdown renders properly.
|
||||||
|
|
@ -85,14 +104,45 @@ function convertLatexDelimiters(content: string): string {
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function MarkdownViewer({ content, className, maxLength }: MarkdownViewerProps) {
|
export function MarkdownViewer({
|
||||||
|
content,
|
||||||
|
className,
|
||||||
|
maxLength,
|
||||||
|
enableCitations = false,
|
||||||
|
}: MarkdownViewerProps) {
|
||||||
const isTruncated = maxLength != null && content.length > maxLength;
|
const isTruncated = maxLength != null && content.length > maxLength;
|
||||||
const displayContent = isTruncated ? content.slice(0, maxLength) : content;
|
const displayContent = isTruncated ? content.slice(0, maxLength) : content;
|
||||||
const processedContent = convertLatexDelimiters(stripOuterMarkdownFence(displayContent));
|
|
||||||
|
// Preprocess for URL placeholders BEFORE LaTeX so GFM autolinks don't
|
||||||
|
// split `[citation:https://…]` apart. The preprocess is code-fence
|
||||||
|
// aware so citations inside fenced code stay literal.
|
||||||
|
const { processedContent, urlMap } = useMemo(() => {
|
||||||
|
const stripped = stripOuterMarkdownFence(displayContent);
|
||||||
|
if (!enableCitations) {
|
||||||
|
return {
|
||||||
|
processedContent: convertLatexDelimiters(stripped),
|
||||||
|
urlMap: EMPTY_URL_MAP,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const { content: rewritten, urlMap: map } = preprocessCitationMarkdown(stripped);
|
||||||
|
return {
|
||||||
|
processedContent: convertLatexDelimiters(rewritten),
|
||||||
|
urlMap: map,
|
||||||
|
};
|
||||||
|
}, [displayContent, enableCitations]);
|
||||||
|
|
||||||
|
// Phrasing/block renderers wrap their string children through the
|
||||||
|
// citation renderer when `enableCitations` is on. We deliberately do
|
||||||
|
// NOT override `<a>` (would produce <button> inside <a>) and we do
|
||||||
|
// NOT touch the inline/fenced `code` paths (citations stay literal
|
||||||
|
// inside code, matching markdown-text.tsx behavior).
|
||||||
|
const wrap = (children: React.ReactNode): React.ReactNode =>
|
||||||
|
enableCitations ? processChildrenWithCitations(children, urlMap) : children;
|
||||||
|
|
||||||
const components: StreamdownProps["components"] = {
|
const components: StreamdownProps["components"] = {
|
||||||
p: ({ children, ...props }) => (
|
p: ({ children, ...props }) => (
|
||||||
<p className="my-2" {...props}>
|
<p className="my-2" {...props}>
|
||||||
{children}
|
{wrap(children)}
|
||||||
</p>
|
</p>
|
||||||
),
|
),
|
||||||
a: ({ children, ...props }) => (
|
a: ({ children, ...props }) => (
|
||||||
|
|
@ -105,31 +155,49 @@ export function MarkdownViewer({ content, className, maxLength }: MarkdownViewer
|
||||||
{children}
|
{children}
|
||||||
</a>
|
</a>
|
||||||
),
|
),
|
||||||
li: ({ children, ...props }) => <li {...props}>{children}</li>,
|
li: ({ children, ...props }) => <li {...props}>{wrap(children)}</li>,
|
||||||
ul: ({ ...props }) => <ul className="list-disc pl-5 my-2" {...props} />,
|
ul: ({ ...props }) => <ul className="list-disc pl-5 my-2" {...props} />,
|
||||||
ol: ({ ...props }) => <ol className="list-decimal pl-5 my-2" {...props} />,
|
ol: ({ ...props }) => <ol className="list-decimal pl-5 my-2" {...props} />,
|
||||||
h1: ({ children, ...props }) => (
|
h1: ({ children, ...props }) => (
|
||||||
<h1 className="text-2xl font-bold mt-6 mb-2" {...props}>
|
<h1 className="text-2xl font-bold mt-6 mb-2" {...props}>
|
||||||
{children}
|
{wrap(children)}
|
||||||
</h1>
|
</h1>
|
||||||
),
|
),
|
||||||
h2: ({ children, ...props }) => (
|
h2: ({ children, ...props }) => (
|
||||||
<h2 className="text-xl font-bold mt-5 mb-2" {...props}>
|
<h2 className="text-xl font-bold mt-5 mb-2" {...props}>
|
||||||
{children}
|
{wrap(children)}
|
||||||
</h2>
|
</h2>
|
||||||
),
|
),
|
||||||
h3: ({ children, ...props }) => (
|
h3: ({ children, ...props }) => (
|
||||||
<h3 className="text-lg font-bold mt-4 mb-2" {...props}>
|
<h3 className="text-lg font-bold mt-4 mb-2" {...props}>
|
||||||
{children}
|
{wrap(children)}
|
||||||
</h3>
|
</h3>
|
||||||
),
|
),
|
||||||
h4: ({ children, ...props }) => (
|
h4: ({ children, ...props }) => (
|
||||||
<h4 className="text-base font-bold mt-3 mb-1" {...props}>
|
<h4 className="text-base font-bold mt-3 mb-1" {...props}>
|
||||||
{children}
|
{wrap(children)}
|
||||||
</h4>
|
</h4>
|
||||||
),
|
),
|
||||||
blockquote: ({ ...props }) => (
|
h5: ({ children, ...props }) => (
|
||||||
<blockquote className="border-l-4 border-muted pl-4 italic my-2" {...props} />
|
<h5 className="text-sm font-bold mt-3 mb-1" {...props}>
|
||||||
|
{wrap(children)}
|
||||||
|
</h5>
|
||||||
|
),
|
||||||
|
h6: ({ children, ...props }) => (
|
||||||
|
<h6 className="text-xs font-bold mt-3 mb-1" {...props}>
|
||||||
|
{wrap(children)}
|
||||||
|
</h6>
|
||||||
|
),
|
||||||
|
strong: ({ children, ...props }) => (
|
||||||
|
<strong className="font-semibold" {...props}>
|
||||||
|
{wrap(children)}
|
||||||
|
</strong>
|
||||||
|
),
|
||||||
|
em: ({ children, ...props }) => <em {...props}>{wrap(children)}</em>,
|
||||||
|
blockquote: ({ children, ...props }) => (
|
||||||
|
<blockquote className="border-l-4 border-muted pl-4 italic my-2" {...props}>
|
||||||
|
{wrap(children)}
|
||||||
|
</blockquote>
|
||||||
),
|
),
|
||||||
hr: ({ ...props }) => <hr className="my-4 border-muted" {...props} />,
|
hr: ({ ...props }) => <hr className="my-4 border-muted" {...props} />,
|
||||||
img: ({ src, alt, width: _w, height: _h, ...props }) => {
|
img: ({ src, alt, width: _w, height: _h, ...props }) => {
|
||||||
|
|
@ -163,17 +231,21 @@ export function MarkdownViewer({ content, className, maxLength }: MarkdownViewer
|
||||||
<table className="w-full divide-y divide-border" {...props} />
|
<table className="w-full divide-y divide-border" {...props} />
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
th: ({ ...props }) => (
|
th: ({ children, ...props }) => (
|
||||||
<th
|
<th
|
||||||
className="px-4 py-2.5 text-left text-sm font-semibold text-muted-foreground/80 bg-muted/30 border-r border-border/40 last:border-r-0"
|
className="px-4 py-2.5 text-left text-sm font-semibold text-muted-foreground/80 bg-muted/30 border-r border-border/40 last:border-r-0"
|
||||||
{...props}
|
{...props}
|
||||||
/>
|
>
|
||||||
|
{wrap(children)}
|
||||||
|
</th>
|
||||||
),
|
),
|
||||||
td: ({ ...props }) => (
|
td: ({ children, ...props }) => (
|
||||||
<td
|
<td
|
||||||
className="px-4 py-2.5 text-sm border-t border-r border-border/40 last:border-r-0"
|
className="px-4 py-2.5 text-sm border-t border-r border-border/40 last:border-r-0"
|
||||||
{...props}
|
{...props}
|
||||||
/>
|
>
|
||||||
|
{wrap(children)}
|
||||||
|
</td>
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -516,7 +516,7 @@ export function ReportPanelContent({
|
||||||
) : reportContent.content ? (
|
) : reportContent.content ? (
|
||||||
isReadOnly ? (
|
isReadOnly ? (
|
||||||
<div className="h-full overflow-y-auto px-5 py-4">
|
<div className="h-full overflow-y-auto px-5 py-4">
|
||||||
<MarkdownViewer content={reportContent.content} />
|
<MarkdownViewer content={reportContent.content} enableCitations />
|
||||||
</div>
|
</div>
|
||||||
) : (
|
) : (
|
||||||
<PlateEditor
|
<PlateEditor
|
||||||
|
|
@ -531,6 +531,9 @@ export function ReportPanelContent({
|
||||||
reserveToolbarSpace
|
reserveToolbarSpace
|
||||||
defaultEditing={isEditing}
|
defaultEditing={isEditing}
|
||||||
className="[&_[role=toolbar]]:!bg-sidebar"
|
className="[&_[role=toolbar]]:!bg-sidebar"
|
||||||
|
// Show citation badges in view mode; raw `[citation:N]`
|
||||||
|
// text in edit mode so users can edit/delete tokens.
|
||||||
|
enableCitations={!isEditing}
|
||||||
/>
|
/>
|
||||||
)
|
)
|
||||||
) : (
|
) : (
|
||||||
|
|
|
||||||
134
surfsense_web/lib/citations/citation-parser.ts
Normal file
134
surfsense_web/lib/citations/citation-parser.ts
Normal file
|
|
@ -0,0 +1,134 @@
|
||||||
|
// Pure citation parsing for `[citation:...]` tokens emitted by SurfSense
|
||||||
|
// agents. No React imports — consumed by both the React renderer
|
||||||
|
// (markdown surfaces) and the Plate value transform (document viewer).
|
||||||
|
//
|
||||||
|
// The same logic previously lived inline in
|
||||||
|
// `components/assistant-ui/markdown-text.tsx` with module-level mutable
|
||||||
|
// state. This module exposes a per-call URL map so multiple concurrent
|
||||||
|
// renderers / SSR contexts can't race each other.
|
||||||
|
|
||||||
|
import { FENCED_OR_INLINE_CODE } from "@/lib/markdown/code-regions";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matches `[citation:...]` with numeric IDs (incl. negative, doc- prefix,
|
||||||
|
* comma-separated), URL-based IDs from live web search, or `urlciteN`
|
||||||
|
* placeholders produced by `preprocessCitationMarkdown`.
|
||||||
|
*
|
||||||
|
* Also matches Chinese brackets 【】 and zero-width spaces that LLMs
|
||||||
|
* sometimes emit.
|
||||||
|
*/
|
||||||
|
export const CITATION_REGEX =
|
||||||
|
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
|
||||||
|
|
||||||
|
/** A single parsed citation reference. */
|
||||||
|
export type CitationToken =
|
||||||
|
| { kind: "url"; url: string }
|
||||||
|
| { kind: "chunk"; chunkId: number; isDocsChunk: boolean };
|
||||||
|
|
||||||
|
/** Output of `parseTextWithCitations` — interleaved text + citation tokens. */
|
||||||
|
export type ParsedSegment = string | CitationToken;
|
||||||
|
|
||||||
|
/** Per-call URL placeholder map; key is `urlciteN`, value is the original URL. */
|
||||||
|
export type CitationUrlMap = Map<string, string>;
|
||||||
|
|
||||||
|
/** Result of preprocessing raw markdown for downstream parsing. */
|
||||||
|
export interface PreprocessedCitations {
|
||||||
|
/** Markdown with `[citation:URL]` tokens rewritten to `[citation:urlciteN]`. */
|
||||||
|
content: string;
|
||||||
|
/** Lookup table to recover the original URL from each placeholder. */
|
||||||
|
urlMap: CitationUrlMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Pattern matching only URL-form citations (used during preprocessing). */
|
||||||
|
const URL_CITATION_REGEX =
|
||||||
|
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace `[citation:URL]` tokens with `[citation:urlciteN]` placeholders so
|
||||||
|
* GFM autolinks don't split the URL out of the brackets during markdown
|
||||||
|
* parsing. Returns both the rewritten content and a map for later lookup.
|
||||||
|
*
|
||||||
|
* Code-fence aware: skips fenced (``` ``` ```) and inline (`` ` ``) code
|
||||||
|
* regions so citation-shaped strings inside example code remain literal.
|
||||||
|
*
|
||||||
|
* Known limitations: `~~~` fences, 4-space indented code, and LaTeX math
|
||||||
|
* blocks are not skipped. Citation tokens inside those regions are rare in
|
||||||
|
* practice; documented in the plan.
|
||||||
|
*/
|
||||||
|
export function preprocessCitationMarkdown(content: string): PreprocessedCitations {
|
||||||
|
const urlMap: CitationUrlMap = new Map();
|
||||||
|
let counter = 0;
|
||||||
|
|
||||||
|
// Splitting on a regex with one capture group puts code regions at odd
|
||||||
|
// indexes (matched delimiters) and the surrounding text at even indexes.
|
||||||
|
// Only transform the even-indexed parts.
|
||||||
|
const parts = content.split(FENCED_OR_INLINE_CODE);
|
||||||
|
const transformed = parts.map((part, index) => {
|
||||||
|
if (index % 2 === 1) return part;
|
||||||
|
return part.replace(URL_CITATION_REGEX, (_match, url: string) => {
|
||||||
|
const key = `urlcite${counter++}`;
|
||||||
|
urlMap.set(key, url.trim());
|
||||||
|
return `[citation:${key}]`;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return { content: transformed.join(""), urlMap };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a string into an array of plain text segments and citation tokens.
|
||||||
|
*
|
||||||
|
* Pure data — no React. The renderer module is responsible for mapping
|
||||||
|
* tokens to JSX. Negative chunk IDs are forwarded as-is so the consumer
|
||||||
|
* can decide how to render anonymous documents.
|
||||||
|
*/
|
||||||
|
export function parseTextWithCitations(
|
||||||
|
text: string,
|
||||||
|
urlMap: CitationUrlMap
|
||||||
|
): ParsedSegment[] {
|
||||||
|
const segments: ParsedSegment[] = [];
|
||||||
|
let lastIndex = 0;
|
||||||
|
let match: RegExpExecArray | null;
|
||||||
|
|
||||||
|
CITATION_REGEX.lastIndex = 0;
|
||||||
|
match = CITATION_REGEX.exec(text);
|
||||||
|
while (match !== null) {
|
||||||
|
if (match.index > lastIndex) {
|
||||||
|
segments.push(text.substring(lastIndex, match.index));
|
||||||
|
}
|
||||||
|
|
||||||
|
const captured = match[1];
|
||||||
|
|
||||||
|
if (captured.startsWith("http://") || captured.startsWith("https://")) {
|
||||||
|
segments.push({ kind: "url", url: captured.trim() });
|
||||||
|
} else if (captured.startsWith("urlcite")) {
|
||||||
|
const url = urlMap.get(captured);
|
||||||
|
if (url) {
|
||||||
|
segments.push({ kind: "url", url });
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const rawIds = captured.split(",").map((s) => s.trim());
|
||||||
|
for (const rawId of rawIds) {
|
||||||
|
const isDocsChunk = rawId.startsWith("doc-");
|
||||||
|
const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10);
|
||||||
|
if (!Number.isNaN(chunkId)) {
|
||||||
|
segments.push({ kind: "chunk", chunkId, isDocsChunk });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lastIndex = match.index + match[0].length;
|
||||||
|
match = CITATION_REGEX.exec(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastIndex < text.length) {
|
||||||
|
segments.push(text.substring(lastIndex));
|
||||||
|
}
|
||||||
|
|
||||||
|
return segments.length > 0 ? segments : [text];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Type guard for the citation branch of `ParsedSegment`. */
|
||||||
|
export function isCitationToken(segment: ParsedSegment): segment is CitationToken {
|
||||||
|
return typeof segment !== "string";
|
||||||
|
}
|
||||||
8
surfsense_web/lib/markdown/code-regions.ts
Normal file
8
surfsense_web/lib/markdown/code-regions.ts
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
// Matches fenced (```...```) and inline (`...`) code regions. Used by MDX
|
||||||
|
// escaping and citation preprocessing — single source of truth so future
|
||||||
|
// edits stay in sync.
|
||||||
|
//
|
||||||
|
// String.split() with this capturing pattern places non-code parts at even
|
||||||
|
// indexes and matched code regions at odd indexes — preserve odd-indexed
|
||||||
|
// segments verbatim when transforming markdown.
|
||||||
|
export const FENCED_OR_INLINE_CODE = /(```[\s\S]*?```|`[^`\n]+`)/g;
|
||||||
Loading…
Add table
Add a link
Reference in a new issue