diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index cc8cec5d9..b33b04b07 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -11,10 +11,48 @@ import { import { CheckIcon, CopyIcon } from "lucide-react"; import { type FC, memo, type ReactNode, useState } from "react"; import remarkGfm from "remark-gfm"; +import remarkMath from "remark-math"; +import rehypeKatex from "rehype-katex"; +import "katex/dist/katex.min.css"; import { InlineCitation } from "@/components/assistant-ui/inline-citation"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { cn } from "@/lib/utils"; +/** + * Convert all LaTeX delimiter styles to the dollar-sign syntax + * that remark-math understands. LLMs use various delimiters + * (\(...\), \[...\], \begin{equation}, etc.) and we need to + * normalise them all to $ / $$ before the markdown parser runs. + */ +function convertLatexDelimiters(content: string): string { + // 1. Block math: \[...\] → $$...$$ + content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`); + // 2. Inline math: \(...\) → $...$ + content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`); + // 3. Block: \begin{equation}...\end{equation} → $$...$$ + content = content.replace( + /\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g, + (_, inner) => `$$${inner}$$`, + ); + // 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$ + content = content.replace( + /\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g, + (_, inner) => `$$${inner}$$`, + ); + // 5. Inline: \begin{math}...\end{math} → $...$ + content = content.replace( + /\\begin\{math\}([\s\S]*?)\\end\{math\}/g, + (_, inner) => `$${inner}$`, + ); + // 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$ + content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1"); + + // Ensure markdown headings (## ...) always start on their own line. + content = content.replace(/([^\n])(#{1,6}\s)/g, "$1\n\n$2"); + + return content; +} + // Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID] // Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g; @@ -59,7 +97,8 @@ function parseTextWithCitations(text: string): ReactNode[] { // Reset regex state CITATION_REGEX.lastIndex = 0; - while ((match = CITATION_REGEX.exec(text)) !== null) { + match = CITATION_REGEX.exec(text); + while (match !== null) { // Add text before the citation if (match.index > lastIndex) { parts.push(text.substring(lastIndex, match.index)); @@ -80,6 +119,7 @@ function parseTextWithCitations(text: string): ReactNode[] { lastIndex = match.index + match[0].length; instanceIndex++; + match = CITATION_REGEX.exec(text); } // Add any remaining text after the last citation @@ -93,9 +133,11 @@ function parseTextWithCitations(text: string): ReactNode[] { const MarkdownTextImpl = () => { return ( ); }; diff --git a/surfsense_web/components/markdown-viewer.tsx b/surfsense_web/components/markdown-viewer.tsx index 192b0f0ba..4b6f9e7af 100644 --- a/surfsense_web/components/markdown-viewer.tsx +++ b/surfsense_web/components/markdown-viewer.tsx @@ -29,33 +29,45 @@ function stripOuterMarkdownFence(content: string): string { } /** - * Convert various LaTeX delimiter styles to the dollar-sign syntax + * Convert all LaTeX delimiter styles to the dollar-sign syntax * that remark-math understands, and normalise edge-cases that * commonly appear in LLM-generated markdown. * - * \[...\] → $$ ... $$ (block / display math) - * \(...\) → $ ... $ (inline math) - * same-line $$…$$ → $ ... $ (inline math — display math - * can't live inside table cells) - * `$$ … $$` → $$ … $$ (strip wrapping backtick code) - * `$ … $` → $ … $ (strip wrapping backtick code) + * \[...\] → $$ ... $$ (block / display math) + * \(...\) → $ ... $ (inline math) + * \begin{equation}...\end{equation} → $$ ... $$ (block math) + * \begin{displaymath}...\end{displaymath} → $$ ... $$ (block math) + * \begin{math}...\end{math} → $ ... $ (inline math) + * same-line $$…$$ → $ ... $ (inline math — display math + * can't live inside table cells) + * `$$ … $$` → $$ … $$ (strip wrapping backtick code) + * `$ … $` → $ … $ (strip wrapping backtick code) */ function convertLatexDelimiters(content: string): string { // 1. Block math: \[...\] → $$...$$ - content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => { - return `$$${inner}$$`; - }); + content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`); // 2. Inline math: \(...\) → $...$ - content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => { - return `$${inner}$`; - }); - // 3. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$ + content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`); + // 3. Block: \begin{equation}...\end{equation} → $$...$$ + content = content.replace( + /\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g, + (_, inner) => `$$${inner}$$`, + ); + // 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$ + content = content.replace( + /\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g, + (_, inner) => `$$${inner}$$`, + ); + // 5. Inline: \begin{math}...\end{math} → $...$ + content = content.replace( + /\\begin\{math\}([\s\S]*?)\\end\{math\}/g, + (_, inner) => `$${inner}$`, + ); + // 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$ content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1"); - // 4. Same-line $$...$$ → $...$ (inline math) so it works inside table cells. + // 7. Same-line $$...$$ → $...$ (inline math) so it works inside table cells. // True display math has $$ on its own line, so this only affects inline usage. - content = content.replace(/\$\$([^\n]+?)\$\$/g, (_match, inner) => { - return `$${inner}$`; - }); + content = content.replace(/\$\$([^\n]+?)\$\$/g, (_, inner) => `$${inner}$`); return content; } diff --git a/surfsense_web/package.json b/surfsense_web/package.json index aa3605d2d..9a10bff02 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -99,9 +99,11 @@ "react-json-view-lite": "^2.4.1", "react-syntax-highlighter": "^15.6.1", "react-wrap-balancer": "^1.1.1", + "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", "rehype-sanitize": "^6.0.0", "remark-gfm": "^4.0.1", + "remark-math": "^6.0.0", "server-only": "^0.0.1", "sonner": "^2.0.6", "streamdown": "^2.2.0", diff --git a/surfsense_web/pnpm-lock.yaml b/surfsense_web/pnpm-lock.yaml index aa5d360ae..fc6b3ca9f 100644 --- a/surfsense_web/pnpm-lock.yaml +++ b/surfsense_web/pnpm-lock.yaml @@ -242,6 +242,9 @@ importers: react-wrap-balancer: specifier: ^1.1.1 version: 1.1.1(react@19.2.3) + rehype-katex: + specifier: ^7.0.1 + version: 7.0.1 rehype-raw: specifier: ^7.0.0 version: 7.0.0 @@ -251,6 +254,9 @@ importers: remark-gfm: specifier: ^4.0.1 version: 4.0.1 + remark-math: + specifier: ^6.0.0 + version: 6.0.0 server-only: specifier: ^0.0.1 version: 0.0.1