mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-04 13:22:41 +02:00
feat(markdown): enable citation rendering in MarkdownViewer and related components
- Added `enableCitations` prop to `MarkdownViewer` to support interactive citation badges. - Updated instances of `MarkdownViewer` across various components to utilize the new citation feature. - Enhanced citation processing in `PlateEditor` for read-only views, ensuring citations are rendered correctly without affecting markdown serialization. - Refactored citation handling in `InlineCitation` and `MarkdownText` to improve citation context management.
This commit is contained in:
parent
d335e96ec2
commit
7aeb8bb0a8
14 changed files with 809 additions and 260 deletions
|
|
@ -12,15 +12,26 @@ import { ExternalLinkIcon } from "lucide-react";
|
|||
import dynamic from "next/dynamic";
|
||||
import { useParams } from "next/navigation";
|
||||
import { useTheme } from "next-themes";
|
||||
import { memo, type ReactNode } from "react";
|
||||
import {
|
||||
createContext,
|
||||
memo,
|
||||
type ReactNode,
|
||||
useCallback,
|
||||
useContext,
|
||||
useRef,
|
||||
} from "react";
|
||||
import rehypeKatex from "rehype-katex";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkMath from "remark-math";
|
||||
import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom";
|
||||
import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image";
|
||||
import "katex/dist/katex.min.css";
|
||||
import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation";
|
||||
import { processChildrenWithCitations } from "@/components/citations/citation-renderer";
|
||||
import { Skeleton } from "@/components/ui/skeleton";
|
||||
import {
|
||||
type CitationUrlMap,
|
||||
preprocessCitationMarkdown,
|
||||
} from "@/lib/citations/citation-parser";
|
||||
import {
|
||||
Table,
|
||||
TableBody,
|
||||
|
|
@ -59,31 +70,30 @@ const LazyMarkdownCodeBlock = dynamic(
|
|||
}
|
||||
);
|
||||
|
||||
// Storage for URL citations replaced during preprocess to avoid GFM autolink interference.
|
||||
// Populated in preprocessMarkdown, consumed in parseTextWithCitations.
|
||||
let _pendingUrlCitations = new Map<string, string>();
|
||||
let _urlCiteIdx = 0;
|
||||
// Per-render URL placeholder map propagated to component overrides via
|
||||
// React Context. Replaces the previous module-level `_pendingUrlCitations`
|
||||
// state, which was unsafe under concurrent renders / SSR.
|
||||
type CitationUrlMapRef = { current: CitationUrlMap };
|
||||
const EMPTY_URL_MAP: CitationUrlMap = new Map();
|
||||
const CitationUrlMapContext = createContext<CitationUrlMapRef>({ current: EMPTY_URL_MAP });
|
||||
|
||||
function useCitationUrlMap(): CitationUrlMap {
|
||||
return useContext(CitationUrlMapContext).current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocess raw markdown before it reaches the remark/rehype pipeline.
|
||||
* - Replaces URL-based citations with safe placeholders (prevents GFM autolinks)
|
||||
* - Normalises LaTeX delimiters to dollar-sign syntax for remark-math
|
||||
*/
|
||||
function preprocessMarkdown(content: string): string {
|
||||
function preprocessMarkdown(content: string, urlMapRef: CitationUrlMapRef): string {
|
||||
// Replace URL-based citations with safe placeholders BEFORE markdown parsing.
|
||||
// GFM autolinks would otherwise convert the https://... inside [citation:URL]
|
||||
// into an <a> element, splitting the text and preventing our citation regex
|
||||
// from matching the full pattern.
|
||||
_pendingUrlCitations = new Map();
|
||||
_urlCiteIdx = 0;
|
||||
content = content.replace(
|
||||
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g,
|
||||
(_, url) => {
|
||||
const key = `urlcite${_urlCiteIdx++}`;
|
||||
_pendingUrlCitations.set(key, url.trim());
|
||||
return `[citation:${key}]`;
|
||||
}
|
||||
);
|
||||
const { content: rewritten, urlMap } = preprocessCitationMarkdown(content);
|
||||
urlMapRef.current = urlMap;
|
||||
content = rewritten;
|
||||
|
||||
// All math forms are normalised to $$...$$ so we can disable single-dollar
|
||||
// inline math in remark-math (otherwise currency like "$3,120.00 and $0.00"
|
||||
|
|
@ -116,113 +126,28 @@ function preprocessMarkdown(content: string): string {
|
|||
return content;
|
||||
}
|
||||
|
||||
// Matches [citation:...] with numeric IDs (incl. negative, doc- prefix, comma-separated),
|
||||
// URL-based IDs from live web search, or urlciteN placeholders from preprocess.
|
||||
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts.
|
||||
const CITATION_REGEX =
|
||||
/[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g;
|
||||
|
||||
/**
|
||||
* Parses text and replaces [citation:XXX] patterns with citation components.
|
||||
* Supports:
|
||||
* - Numeric chunk IDs: [citation:123]
|
||||
* - Doc-prefixed IDs: [citation:doc-123]
|
||||
* - Comma-separated IDs: [citation:4149, 4150, 4151]
|
||||
* - URL-based citations from live search: [citation:https://example.com/page]
|
||||
*/
|
||||
function parseTextWithCitations(text: string): ReactNode[] {
|
||||
const parts: ReactNode[] = [];
|
||||
let lastIndex = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
let instanceIndex = 0;
|
||||
|
||||
CITATION_REGEX.lastIndex = 0;
|
||||
|
||||
match = CITATION_REGEX.exec(text);
|
||||
while (match !== null) {
|
||||
if (match.index > lastIndex) {
|
||||
parts.push(text.substring(lastIndex, match.index));
|
||||
}
|
||||
|
||||
const captured = match[1];
|
||||
|
||||
if (captured.startsWith("http://") || captured.startsWith("https://")) {
|
||||
parts.push(<UrlCitation key={`citation-url-${instanceIndex}`} url={captured.trim()} />);
|
||||
instanceIndex++;
|
||||
} else if (captured.startsWith("urlcite")) {
|
||||
const url = _pendingUrlCitations.get(captured);
|
||||
if (url) {
|
||||
parts.push(<UrlCitation key={`citation-url-${instanceIndex}`} url={url} />);
|
||||
}
|
||||
instanceIndex++;
|
||||
} else {
|
||||
const rawIds = captured.split(",").map((s) => s.trim());
|
||||
for (const rawId of rawIds) {
|
||||
const isDocsChunk = rawId.startsWith("doc-");
|
||||
const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10);
|
||||
parts.push(
|
||||
<InlineCitation
|
||||
key={`citation-${isDocsChunk ? "doc-" : ""}${chunkId}-${instanceIndex}`}
|
||||
chunkId={chunkId}
|
||||
isDocsChunk={isDocsChunk}
|
||||
/>
|
||||
);
|
||||
instanceIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
lastIndex = match.index + match[0].length;
|
||||
match = CITATION_REGEX.exec(text);
|
||||
}
|
||||
|
||||
if (lastIndex < text.length) {
|
||||
parts.push(text.substring(lastIndex));
|
||||
}
|
||||
|
||||
return parts.length > 0 ? parts : [text];
|
||||
}
|
||||
|
||||
const MarkdownTextImpl = () => {
|
||||
const urlMapRef = useRef<CitationUrlMap>(EMPTY_URL_MAP);
|
||||
const preprocess = useCallback(
|
||||
(content: string) => preprocessMarkdown(content, urlMapRef),
|
||||
[]
|
||||
);
|
||||
return (
|
||||
<MarkdownTextPrimitive
|
||||
smooth={false}
|
||||
remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
|
||||
rehypePlugins={[rehypeKatex]}
|
||||
className="aui-md"
|
||||
components={defaultComponents}
|
||||
preprocess={preprocessMarkdown}
|
||||
/>
|
||||
<CitationUrlMapContext.Provider value={urlMapRef}>
|
||||
<MarkdownTextPrimitive
|
||||
smooth={false}
|
||||
remarkPlugins={[remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
|
||||
rehypePlugins={[rehypeKatex]}
|
||||
className="aui-md"
|
||||
components={defaultComponents}
|
||||
preprocess={preprocess}
|
||||
/>
|
||||
</CitationUrlMapContext.Provider>
|
||||
);
|
||||
};
|
||||
|
||||
export const MarkdownText = memo(MarkdownTextImpl);
|
||||
|
||||
/**
|
||||
* Helper to process children and replace citation patterns with components
|
||||
*/
|
||||
function processChildrenWithCitations(children: ReactNode): ReactNode {
|
||||
if (typeof children === "string") {
|
||||
const parsed = parseTextWithCitations(children);
|
||||
return parsed.length === 1 && typeof parsed[0] === "string" ? children : parsed;
|
||||
}
|
||||
|
||||
if (Array.isArray(children)) {
|
||||
return children.map((child) => {
|
||||
if (typeof child === "string") {
|
||||
const parsed = parseTextWithCitations(child);
|
||||
return parsed.length === 1 && typeof parsed[0] === "string" ? (
|
||||
child
|
||||
) : (
|
||||
<span key={child}>{parsed}</span>
|
||||
);
|
||||
}
|
||||
return child;
|
||||
});
|
||||
}
|
||||
|
||||
return children;
|
||||
}
|
||||
|
||||
function extractDomain(url: string): string {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
|
|
@ -322,92 +247,125 @@ function MarkdownImage({ src, alt }: { src?: string; alt?: string }) {
|
|||
}
|
||||
|
||||
const defaultComponents = memoizeMarkdownComponents({
|
||||
h1: ({ className, children, ...props }) => (
|
||||
<h1
|
||||
className={cn(
|
||||
"aui-md-h1 mb-8 scroll-m-20 font-extrabold text-4xl tracking-tight last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</h1>
|
||||
),
|
||||
h2: ({ className, children, ...props }) => (
|
||||
<h2
|
||||
className={cn(
|
||||
"aui-md-h2 mt-8 mb-4 scroll-m-20 font-semibold text-3xl tracking-tight first:mt-0 last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</h2>
|
||||
),
|
||||
h3: ({ className, children, ...props }) => (
|
||||
<h3
|
||||
className={cn(
|
||||
"aui-md-h3 mt-6 mb-4 scroll-m-20 font-semibold text-2xl tracking-tight first:mt-0 last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</h3>
|
||||
),
|
||||
h4: ({ className, children, ...props }) => (
|
||||
<h4
|
||||
className={cn(
|
||||
"aui-md-h4 mt-6 mb-4 scroll-m-20 font-semibold text-xl tracking-tight first:mt-0 last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</h4>
|
||||
),
|
||||
h5: ({ className, children, ...props }) => (
|
||||
<h5
|
||||
className={cn("aui-md-h5 my-4 font-semibold text-lg first:mt-0 last:mb-0", className)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</h5>
|
||||
),
|
||||
h6: ({ className, children, ...props }) => (
|
||||
<h6 className={cn("aui-md-h6 my-4 font-semibold first:mt-0 last:mb-0", className)} {...props}>
|
||||
{processChildrenWithCitations(children)}
|
||||
</h6>
|
||||
),
|
||||
p: ({ className, children, ...props }) => (
|
||||
<p className={cn("aui-md-p mt-5 mb-5 leading-7 first:mt-0 last:mb-0", className)} {...props}>
|
||||
{processChildrenWithCitations(children)}
|
||||
</p>
|
||||
),
|
||||
a: ({ className, children, ...props }) => (
|
||||
<a
|
||||
className={cn("aui-md-a font-medium text-primary underline underline-offset-4", className)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</a>
|
||||
),
|
||||
blockquote: ({ className, children, ...props }) => (
|
||||
<blockquote className={cn("aui-md-blockquote border-l-2 pl-6 italic", className)} {...props}>
|
||||
{processChildrenWithCitations(children)}
|
||||
</blockquote>
|
||||
),
|
||||
h1: function H1({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<h1
|
||||
className={cn(
|
||||
"aui-md-h1 mb-8 scroll-m-20 font-extrabold text-4xl tracking-tight last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</h1>
|
||||
);
|
||||
},
|
||||
h2: function H2({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<h2
|
||||
className={cn(
|
||||
"aui-md-h2 mt-8 mb-4 scroll-m-20 font-semibold text-3xl tracking-tight first:mt-0 last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</h2>
|
||||
);
|
||||
},
|
||||
h3: function H3({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<h3
|
||||
className={cn(
|
||||
"aui-md-h3 mt-6 mb-4 scroll-m-20 font-semibold text-2xl tracking-tight first:mt-0 last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</h3>
|
||||
);
|
||||
},
|
||||
h4: function H4({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<h4
|
||||
className={cn(
|
||||
"aui-md-h4 mt-6 mb-4 scroll-m-20 font-semibold text-xl tracking-tight first:mt-0 last:mb-0",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</h4>
|
||||
);
|
||||
},
|
||||
h5: function H5({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<h5
|
||||
className={cn("aui-md-h5 my-4 font-semibold text-lg first:mt-0 last:mb-0", className)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</h5>
|
||||
);
|
||||
},
|
||||
h6: function H6({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<h6 className={cn("aui-md-h6 my-4 font-semibold first:mt-0 last:mb-0", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</h6>
|
||||
);
|
||||
},
|
||||
p: function P({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<p className={cn("aui-md-p mt-5 mb-5 leading-7 first:mt-0 last:mb-0", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</p>
|
||||
);
|
||||
},
|
||||
a: function A({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<a
|
||||
className={cn(
|
||||
"aui-md-a font-medium text-primary underline underline-offset-4",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</a>
|
||||
);
|
||||
},
|
||||
blockquote: function Blockquote({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<blockquote className={cn("aui-md-blockquote border-l-2 pl-6 italic", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</blockquote>
|
||||
);
|
||||
},
|
||||
ul: ({ className, ...props }) => (
|
||||
<ul className={cn("aui-md-ul my-5 ml-6 list-disc [&>li]:mt-2", className)} {...props} />
|
||||
),
|
||||
ol: ({ className, ...props }) => (
|
||||
<ol className={cn("aui-md-ol my-5 ml-6 list-decimal [&>li]:mt-2", className)} {...props} />
|
||||
),
|
||||
li: ({ className, children, ...props }) => (
|
||||
<li className={cn("aui-md-li", className)} {...props}>
|
||||
{processChildrenWithCitations(children)}
|
||||
</li>
|
||||
),
|
||||
li: function Li({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<li className={cn("aui-md-li", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</li>
|
||||
);
|
||||
},
|
||||
hr: ({ className, ...props }) => (
|
||||
<hr className={cn("aui-md-hr my-5 border-b", className)} {...props} />
|
||||
),
|
||||
|
|
@ -422,28 +380,34 @@ const defaultComponents = memoizeMarkdownComponents({
|
|||
tbody: ({ className, ...props }) => (
|
||||
<TableBody className={cn("aui-md-tbody", className)} {...props} />
|
||||
),
|
||||
th: ({ className, children, ...props }) => (
|
||||
<TableHead
|
||||
className={cn(
|
||||
"aui-md-th bg-muted/50 whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</TableHead>
|
||||
),
|
||||
td: ({ className, children, ...props }) => (
|
||||
<TableCell
|
||||
className={cn(
|
||||
"aui-md-td whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children)}
|
||||
</TableCell>
|
||||
),
|
||||
th: function Th({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<TableHead
|
||||
className={cn(
|
||||
"aui-md-th bg-muted/50 whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</TableHead>
|
||||
);
|
||||
},
|
||||
td: function Td({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<TableCell
|
||||
className={cn(
|
||||
"aui-md-td whitespace-normal [[align=center]]:text-center [[align=right]]:text-right",
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</TableCell>
|
||||
);
|
||||
},
|
||||
tr: ({ className, ...props }) => <TableRow className={cn("aui-md-tr", className)} {...props} />,
|
||||
sup: ({ className, ...props }) => (
|
||||
<sup className={cn("aui-md-sup [&>a]:text-xs [&>a]:no-underline", className)} {...props} />
|
||||
|
|
@ -552,16 +516,22 @@ const defaultComponents = memoizeMarkdownComponents({
|
|||
/>
|
||||
);
|
||||
},
|
||||
strong: ({ className, children, ...props }) => (
|
||||
<strong className={cn("aui-md-strong font-semibold", className)} {...props}>
|
||||
{processChildrenWithCitations(children)}
|
||||
</strong>
|
||||
),
|
||||
em: ({ className, children, ...props }) => (
|
||||
<em className={cn("aui-md-em", className)} {...props}>
|
||||
{processChildrenWithCitations(children)}
|
||||
</em>
|
||||
),
|
||||
strong: function Strong({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<strong className={cn("aui-md-strong font-semibold", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</strong>
|
||||
);
|
||||
},
|
||||
em: function Em({ className, children, ...props }) {
|
||||
const urlMap = useCitationUrlMap();
|
||||
return (
|
||||
<em className={cn("aui-md-em", className)} {...props}>
|
||||
{processChildrenWithCitations(children, urlMap)}
|
||||
</em>
|
||||
);
|
||||
},
|
||||
img: ({ src, alt }) => (
|
||||
<MarkdownImage src={typeof src === "string" ? src : undefined} alt={alt} />
|
||||
),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue