"use client"; import "@assistant-ui/react-markdown/styles/dot.css"; import { type CodeHeaderProps, MarkdownTextPrimitive, unstable_memoizeMarkdownComponents as memoizeMarkdownComponents, useIsMarkdownCodeBlock, } from "@assistant-ui/react-markdown"; import { CheckIcon, CopyIcon } from "lucide-react"; import { type FC, memo, type ReactNode, useState } from "react"; import rehypeKatex from "rehype-katex"; import remarkGfm from "remark-gfm"; import remarkMath from "remark-math"; import "katex/dist/katex.min.css"; import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { cn } from "@/lib/utils"; // Storage for URL citations replaced during preprocess to avoid GFM autolink interference. // Populated in preprocessMarkdown, consumed in parseTextWithCitations. let _pendingUrlCitations = new Map(); let _urlCiteIdx = 0; /** * Preprocess raw markdown before it reaches the remark/rehype pipeline. * - Replaces URL-based citations with safe placeholders (prevents GFM autolinks) * - Normalises LaTeX delimiters to dollar-sign syntax for remark-math */ function preprocessMarkdown(content: string): string { // Replace URL-based citations with safe placeholders BEFORE markdown parsing. // GFM autolinks would otherwise convert the https://... inside [citation:URL] // into an element, splitting the text and preventing our citation regex // from matching the full pattern. _pendingUrlCitations = new Map(); _urlCiteIdx = 0; content = content.replace( /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g, (_, url) => { const key = `urlcite${_urlCiteIdx++}`; _pendingUrlCitations.set(key, url.trim()); return `[citation:${key}]`; } ); // 1. Block math: \[...\] → $$...$$ content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`); // 2. Inline math: \(...\) → $...$ content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`); // 3. Block: \begin{equation}...\end{equation} → $$...$$ content = content.replace( /\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g, (_, inner) => `$$${inner}$$` ); // 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$ content = content.replace( /\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g, (_, inner) => `$$${inner}$$` ); // 5. Inline: \begin{math}...\end{math} → $...$ content = content.replace(/\\begin\{math\}([\s\S]*?)\\end\{math\}/g, (_, inner) => `$${inner}$`); // 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$ content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1"); // Ensure markdown headings (## ...) always start on their own line. content = content.replace(/([^\n])(#{1,6}\s)/g, "$1\n\n$2"); return content; } // Matches [citation:...] with numeric IDs (incl. doc- prefix, comma-separated), // URL-based IDs from live web search, or urlciteN placeholders from preprocess. // Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts. const CITATION_REGEX = /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?\d+(?:\s*,\s*(?:doc-)?\d+)*)\s*\u200B?[\]】]/g; /** * Parses text and replaces [citation:XXX] patterns with citation components. * Supports: * - Numeric chunk IDs: [citation:123] * - Doc-prefixed IDs: [citation:doc-123] * - Comma-separated IDs: [citation:4149, 4150, 4151] * - URL-based citations from live search: [citation:https://example.com/page] */ function parseTextWithCitations(text: string): ReactNode[] { const parts: ReactNode[] = []; let lastIndex = 0; let match: RegExpExecArray | null; let instanceIndex = 0; CITATION_REGEX.lastIndex = 0; match = CITATION_REGEX.exec(text); while (match !== null) { if (match.index > lastIndex) { parts.push(text.substring(lastIndex, match.index)); } const captured = match[1]; if (captured.startsWith("http://") || captured.startsWith("https://")) { parts.push(); instanceIndex++; } else if (captured.startsWith("urlcite")) { const url = _pendingUrlCitations.get(captured); if (url) { parts.push(); } instanceIndex++; } else { const rawIds = captured.split(",").map((s) => s.trim()); for (const rawId of rawIds) { const isDocsChunk = rawId.startsWith("doc-"); const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10); parts.push( ); instanceIndex++; } } lastIndex = match.index + match[0].length; match = CITATION_REGEX.exec(text); } if (lastIndex < text.length) { parts.push(text.substring(lastIndex)); } return parts.length > 0 ? parts : [text]; } const MarkdownTextImpl = () => { return ( ); }; export const MarkdownText = memo(MarkdownTextImpl); const CodeHeader: FC = ({ language, code }) => { const { isCopied, copyToClipboard } = useCopyToClipboard(); const onCopy = () => { if (!code || isCopied) return; copyToClipboard(code); }; return (
{language} {!isCopied && } {isCopied && }
); }; const useCopyToClipboard = ({ copiedDuration = 3000 }: { copiedDuration?: number } = {}) => { const [isCopied, setIsCopied] = useState(false); const copyToClipboard = (value: string) => { if (!value) return; navigator.clipboard.writeText(value).then(() => { setIsCopied(true); setTimeout(() => setIsCopied(false), copiedDuration); }); }; return { isCopied, copyToClipboard }; }; /** * Helper to process children and replace citation patterns with components */ function processChildrenWithCitations(children: ReactNode): ReactNode { if (typeof children === "string") { const parsed = parseTextWithCitations(children); return parsed.length === 1 && typeof parsed[0] === "string" ? children : <>{parsed}; } if (Array.isArray(children)) { return children.map((child, index) => { if (typeof child === "string") { const parsed = parseTextWithCitations(child); return parsed.length === 1 && typeof parsed[0] === "string" ? ( child ) : ( {parsed} ); } return child; }); } return children; } const defaultComponents = memoizeMarkdownComponents({ h1: ({ className, children, ...props }) => (

{processChildrenWithCitations(children)}

), h2: ({ className, children, ...props }) => (

{processChildrenWithCitations(children)}

), h3: ({ className, children, ...props }) => (

{processChildrenWithCitations(children)}

), h4: ({ className, children, ...props }) => (

{processChildrenWithCitations(children)}

), h5: ({ className, children, ...props }) => (
{processChildrenWithCitations(children)}
), h6: ({ className, children, ...props }) => (
{processChildrenWithCitations(children)}
), p: ({ className, children, ...props }) => (

{processChildrenWithCitations(children)}

), a: ({ className, children, ...props }) => (
{processChildrenWithCitations(children)} ), blockquote: ({ className, children, ...props }) => (
{processChildrenWithCitations(children)}
), ul: ({ className, ...props }) => (