mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-29 10:56:24 +02:00
feat: add support for LaTeX rendering in Markdown components with rehype-katex and remark-math
This commit is contained in:
parent
a2dd5fb671
commit
3d712e391b
4 changed files with 82 additions and 20 deletions
|
|
@ -11,10 +11,48 @@ import {
|
||||||
import { CheckIcon, CopyIcon } from "lucide-react";
|
import { CheckIcon, CopyIcon } from "lucide-react";
|
||||||
import { type FC, memo, type ReactNode, useState } from "react";
|
import { type FC, memo, type ReactNode, useState } from "react";
|
||||||
import remarkGfm from "remark-gfm";
|
import remarkGfm from "remark-gfm";
|
||||||
|
import remarkMath from "remark-math";
|
||||||
|
import rehypeKatex from "rehype-katex";
|
||||||
|
import "katex/dist/katex.min.css";
|
||||||
import { InlineCitation } from "@/components/assistant-ui/inline-citation";
|
import { InlineCitation } from "@/components/assistant-ui/inline-citation";
|
||||||
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
|
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert all LaTeX delimiter styles to the dollar-sign syntax
|
||||||
|
* that remark-math understands. LLMs use various delimiters
|
||||||
|
* (\(...\), \[...\], \begin{equation}, etc.) and we need to
|
||||||
|
* normalise them all to $ / $$ before the markdown parser runs.
|
||||||
|
*/
|
||||||
|
function convertLatexDelimiters(content: string): string {
|
||||||
|
// 1. Block math: \[...\] → $$...$$
|
||||||
|
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`);
|
||||||
|
// 2. Inline math: \(...\) → $...$
|
||||||
|
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`);
|
||||||
|
// 3. Block: \begin{equation}...\end{equation} → $$...$$
|
||||||
|
content = content.replace(
|
||||||
|
/\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g,
|
||||||
|
(_, inner) => `$$${inner}$$`,
|
||||||
|
);
|
||||||
|
// 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$
|
||||||
|
content = content.replace(
|
||||||
|
/\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g,
|
||||||
|
(_, inner) => `$$${inner}$$`,
|
||||||
|
);
|
||||||
|
// 5. Inline: \begin{math}...\end{math} → $...$
|
||||||
|
content = content.replace(
|
||||||
|
/\\begin\{math\}([\s\S]*?)\\end\{math\}/g,
|
||||||
|
(_, inner) => `$${inner}$`,
|
||||||
|
);
|
||||||
|
// 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
||||||
|
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
|
||||||
|
|
||||||
|
// Ensure markdown headings (## ...) always start on their own line.
|
||||||
|
content = content.replace(/([^\n])(#{1,6}\s)/g, "$1\n\n$2");
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
// Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID]
|
// Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID]
|
||||||
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts
|
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts
|
||||||
const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g;
|
const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g;
|
||||||
|
|
@ -59,7 +97,8 @@ function parseTextWithCitations(text: string): ReactNode[] {
|
||||||
// Reset regex state
|
// Reset regex state
|
||||||
CITATION_REGEX.lastIndex = 0;
|
CITATION_REGEX.lastIndex = 0;
|
||||||
|
|
||||||
while ((match = CITATION_REGEX.exec(text)) !== null) {
|
match = CITATION_REGEX.exec(text);
|
||||||
|
while (match !== null) {
|
||||||
// Add text before the citation
|
// Add text before the citation
|
||||||
if (match.index > lastIndex) {
|
if (match.index > lastIndex) {
|
||||||
parts.push(text.substring(lastIndex, match.index));
|
parts.push(text.substring(lastIndex, match.index));
|
||||||
|
|
@ -80,6 +119,7 @@ function parseTextWithCitations(text: string): ReactNode[] {
|
||||||
|
|
||||||
lastIndex = match.index + match[0].length;
|
lastIndex = match.index + match[0].length;
|
||||||
instanceIndex++;
|
instanceIndex++;
|
||||||
|
match = CITATION_REGEX.exec(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add any remaining text after the last citation
|
// Add any remaining text after the last citation
|
||||||
|
|
@ -93,9 +133,11 @@ function parseTextWithCitations(text: string): ReactNode[] {
|
||||||
const MarkdownTextImpl = () => {
|
const MarkdownTextImpl = () => {
|
||||||
return (
|
return (
|
||||||
<MarkdownTextPrimitive
|
<MarkdownTextPrimitive
|
||||||
remarkPlugins={[remarkGfm]}
|
remarkPlugins={[remarkGfm, remarkMath]}
|
||||||
|
rehypePlugins={[rehypeKatex]}
|
||||||
className="aui-md"
|
className="aui-md"
|
||||||
components={defaultComponents}
|
components={defaultComponents}
|
||||||
|
preprocess={convertLatexDelimiters}
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -29,12 +29,15 @@ function stripOuterMarkdownFence(content: string): string {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert various LaTeX delimiter styles to the dollar-sign syntax
|
* Convert all LaTeX delimiter styles to the dollar-sign syntax
|
||||||
* that remark-math understands, and normalise edge-cases that
|
* that remark-math understands, and normalise edge-cases that
|
||||||
* commonly appear in LLM-generated markdown.
|
* commonly appear in LLM-generated markdown.
|
||||||
*
|
*
|
||||||
* \[...\] → $$ ... $$ (block / display math)
|
* \[...\] → $$ ... $$ (block / display math)
|
||||||
* \(...\) → $ ... $ (inline math)
|
* \(...\) → $ ... $ (inline math)
|
||||||
|
* \begin{equation}...\end{equation} → $$ ... $$ (block math)
|
||||||
|
* \begin{displaymath}...\end{displaymath} → $$ ... $$ (block math)
|
||||||
|
* \begin{math}...\end{math} → $ ... $ (inline math)
|
||||||
* same-line $$…$$ → $ ... $ (inline math — display math
|
* same-line $$…$$ → $ ... $ (inline math — display math
|
||||||
* can't live inside table cells)
|
* can't live inside table cells)
|
||||||
* `$$ … $$` → $$ … $$ (strip wrapping backtick code)
|
* `$$ … $$` → $$ … $$ (strip wrapping backtick code)
|
||||||
|
|
@ -42,20 +45,29 @@ function stripOuterMarkdownFence(content: string): string {
|
||||||
*/
|
*/
|
||||||
function convertLatexDelimiters(content: string): string {
|
function convertLatexDelimiters(content: string): string {
|
||||||
// 1. Block math: \[...\] → $$...$$
|
// 1. Block math: \[...\] → $$...$$
|
||||||
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => {
|
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`);
|
||||||
return `$$${inner}$$`;
|
|
||||||
});
|
|
||||||
// 2. Inline math: \(...\) → $...$
|
// 2. Inline math: \(...\) → $...$
|
||||||
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => {
|
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`);
|
||||||
return `$${inner}$`;
|
// 3. Block: \begin{equation}...\end{equation} → $$...$$
|
||||||
});
|
content = content.replace(
|
||||||
// 3. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
/\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g,
|
||||||
|
(_, inner) => `$$${inner}$$`,
|
||||||
|
);
|
||||||
|
// 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$
|
||||||
|
content = content.replace(
|
||||||
|
/\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g,
|
||||||
|
(_, inner) => `$$${inner}$$`,
|
||||||
|
);
|
||||||
|
// 5. Inline: \begin{math}...\end{math} → $...$
|
||||||
|
content = content.replace(
|
||||||
|
/\\begin\{math\}([\s\S]*?)\\end\{math\}/g,
|
||||||
|
(_, inner) => `$${inner}$`,
|
||||||
|
);
|
||||||
|
// 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
||||||
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
|
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
|
||||||
// 4. Same-line $$...$$ → $...$ (inline math) so it works inside table cells.
|
// 7. Same-line $$...$$ → $...$ (inline math) so it works inside table cells.
|
||||||
// True display math has $$ on its own line, so this only affects inline usage.
|
// True display math has $$ on its own line, so this only affects inline usage.
|
||||||
content = content.replace(/\$\$([^\n]+?)\$\$/g, (_match, inner) => {
|
content = content.replace(/\$\$([^\n]+?)\$\$/g, (_, inner) => `$${inner}$`);
|
||||||
return `$${inner}$`;
|
|
||||||
});
|
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -99,9 +99,11 @@
|
||||||
"react-json-view-lite": "^2.4.1",
|
"react-json-view-lite": "^2.4.1",
|
||||||
"react-syntax-highlighter": "^15.6.1",
|
"react-syntax-highlighter": "^15.6.1",
|
||||||
"react-wrap-balancer": "^1.1.1",
|
"react-wrap-balancer": "^1.1.1",
|
||||||
|
"rehype-katex": "^7.0.1",
|
||||||
"rehype-raw": "^7.0.0",
|
"rehype-raw": "^7.0.0",
|
||||||
"rehype-sanitize": "^6.0.0",
|
"rehype-sanitize": "^6.0.0",
|
||||||
"remark-gfm": "^4.0.1",
|
"remark-gfm": "^4.0.1",
|
||||||
|
"remark-math": "^6.0.0",
|
||||||
"server-only": "^0.0.1",
|
"server-only": "^0.0.1",
|
||||||
"sonner": "^2.0.6",
|
"sonner": "^2.0.6",
|
||||||
"streamdown": "^2.2.0",
|
"streamdown": "^2.2.0",
|
||||||
|
|
|
||||||
6
surfsense_web/pnpm-lock.yaml
generated
6
surfsense_web/pnpm-lock.yaml
generated
|
|
@ -242,6 +242,9 @@ importers:
|
||||||
react-wrap-balancer:
|
react-wrap-balancer:
|
||||||
specifier: ^1.1.1
|
specifier: ^1.1.1
|
||||||
version: 1.1.1(react@19.2.3)
|
version: 1.1.1(react@19.2.3)
|
||||||
|
rehype-katex:
|
||||||
|
specifier: ^7.0.1
|
||||||
|
version: 7.0.1
|
||||||
rehype-raw:
|
rehype-raw:
|
||||||
specifier: ^7.0.0
|
specifier: ^7.0.0
|
||||||
version: 7.0.0
|
version: 7.0.0
|
||||||
|
|
@ -251,6 +254,9 @@ importers:
|
||||||
remark-gfm:
|
remark-gfm:
|
||||||
specifier: ^4.0.1
|
specifier: ^4.0.1
|
||||||
version: 4.0.1
|
version: 4.0.1
|
||||||
|
remark-math:
|
||||||
|
specifier: ^6.0.0
|
||||||
|
version: 6.0.0
|
||||||
server-only:
|
server-only:
|
||||||
specifier: ^0.0.1
|
specifier: ^0.0.1
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue