diff --git a/surfsense_web/components/assistant-ui/inline-citation.tsx b/surfsense_web/components/assistant-ui/inline-citation.tsx index 2aeba89ca..e299f2373 100644 --- a/surfsense_web/components/assistant-ui/inline-citation.tsx +++ b/surfsense_web/components/assistant-ui/inline-citation.tsx @@ -3,11 +3,11 @@ import { useQuery } from "@tanstack/react-query"; import { useSetAtom } from "jotai"; import { ExternalLink, FileText } from "lucide-react"; +import dynamic from "next/dynamic"; import type { FC } from "react"; import { useCallback, useEffect, useRef, useState } from "react"; import { openCitationPanelAtom } from "@/atoms/citation/citation-panel.atom"; import { useCitationMetadata } from "@/components/assistant-ui/citation-metadata-context"; -import { MarkdownViewer } from "@/components/markdown-viewer"; import { Citation } from "@/components/tool-ui/citation"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; import { Spinner } from "@/components/ui/spinner"; @@ -15,6 +15,16 @@ import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip import { documentsApiService } from "@/lib/apis/documents-api.service"; import { cacheKeys } from "@/lib/query-client/cache-keys"; +// Lazily load MarkdownViewer here to break the static import cycle: +// `markdown-viewer.tsx` → `citation-renderer.tsx` → `inline-citation.tsx` +// would otherwise pull `markdown-viewer.tsx` back in at module-init time. +// Only `SurfsenseDocCitation` (popover body) ever renders this viewer, so +// the lazy boundary is invisible to most call paths. +const MarkdownViewer = dynamic( + () => import("@/components/markdown-viewer").then((m) => m.MarkdownViewer), + { ssr: false, loading: () => } +); + interface InlineCitationProps { chunkId: number; isDocsChunk?: boolean; @@ -172,7 +182,11 @@ const SurfsenseDocCitation: FC<{ chunkId: number }> = ({ chunkId }) => {

)} {!isLoading && !error && citedChunk?.content && ( - + )} {!isLoading && !error && !citedChunk?.content && (

No content available.

diff --git a/surfsense_web/components/assistant-ui/markdown-text.tsx b/surfsense_web/components/assistant-ui/markdown-text.tsx index 7655e10cc..2b788e88b 100644 --- a/surfsense_web/components/assistant-ui/markdown-text.tsx +++ b/surfsense_web/components/assistant-ui/markdown-text.tsx @@ -12,15 +12,26 @@ import { ExternalLinkIcon } from "lucide-react"; import dynamic from "next/dynamic"; import { useParams } from "next/navigation"; import { useTheme } from "next-themes"; -import { memo, type ReactNode } from "react"; +import { + createContext, + memo, + type ReactNode, + useCallback, + useContext, + useRef, +} from "react"; import rehypeKatex from "rehype-katex"; import remarkGfm from "remark-gfm"; import remarkMath from "remark-math"; import { openEditorPanelAtom } from "@/atoms/editor/editor-panel.atom"; import { ImagePreview, ImageRoot, ImageZoom } from "@/components/assistant-ui/image"; import "katex/dist/katex.min.css"; -import { InlineCitation, UrlCitation } from "@/components/assistant-ui/inline-citation"; +import { processChildrenWithCitations } from "@/components/citations/citation-renderer"; import { Skeleton } from "@/components/ui/skeleton"; +import { + type CitationUrlMap, + preprocessCitationMarkdown, +} from "@/lib/citations/citation-parser"; import { Table, TableBody, @@ -59,31 +70,30 @@ const LazyMarkdownCodeBlock = dynamic( } ); -// Storage for URL citations replaced during preprocess to avoid GFM autolink interference. -// Populated in preprocessMarkdown, consumed in parseTextWithCitations. -let _pendingUrlCitations = new Map(); -let _urlCiteIdx = 0; +// Per-render URL placeholder map propagated to component overrides via +// React Context. Replaces the previous module-level `_pendingUrlCitations` +// state, which was unsafe under concurrent renders / SSR. +type CitationUrlMapRef = { current: CitationUrlMap }; +const EMPTY_URL_MAP: CitationUrlMap = new Map(); +const CitationUrlMapContext = createContext({ current: EMPTY_URL_MAP }); + +function useCitationUrlMap(): CitationUrlMap { + return useContext(CitationUrlMapContext).current; +} /** * Preprocess raw markdown before it reaches the remark/rehype pipeline. * - Replaces URL-based citations with safe placeholders (prevents GFM autolinks) * - Normalises LaTeX delimiters to dollar-sign syntax for remark-math */ -function preprocessMarkdown(content: string): string { +function preprocessMarkdown(content: string, urlMapRef: CitationUrlMapRef): string { // Replace URL-based citations with safe placeholders BEFORE markdown parsing. // GFM autolinks would otherwise convert the https://... inside [citation:URL] // into an element, splitting the text and preventing our citation regex // from matching the full pattern. - _pendingUrlCitations = new Map(); - _urlCiteIdx = 0; - content = content.replace( - /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+)\s*\u200B?[\]】]/g, - (_, url) => { - const key = `urlcite${_urlCiteIdx++}`; - _pendingUrlCitations.set(key, url.trim()); - return `[citation:${key}]`; - } - ); + const { content: rewritten, urlMap } = preprocessCitationMarkdown(content); + urlMapRef.current = urlMap; + content = rewritten; // All math forms are normalised to $$...$$ so we can disable single-dollar // inline math in remark-math (otherwise currency like "$3,120.00 and $0.00" @@ -116,113 +126,28 @@ function preprocessMarkdown(content: string): string { return content; } -// Matches [citation:...] with numeric IDs (incl. negative, doc- prefix, comma-separated), -// URL-based IDs from live web search, or urlciteN placeholders from preprocess. -// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts. -const CITATION_REGEX = - /[[【]\u200B?citation:\s*(https?:\/\/[^\]】\u200B]+|urlcite\d+|(?:doc-)?-?\d+(?:\s*,\s*(?:doc-)?-?\d+)*)\s*\u200B?[\]】]/g; - -/** - * Parses text and replaces [citation:XXX] patterns with citation components. - * Supports: - * - Numeric chunk IDs: [citation:123] - * - Doc-prefixed IDs: [citation:doc-123] - * - Comma-separated IDs: [citation:4149, 4150, 4151] - * - URL-based citations from live search: [citation:https://example.com/page] - */ -function parseTextWithCitations(text: string): ReactNode[] { - const parts: ReactNode[] = []; - let lastIndex = 0; - let match: RegExpExecArray | null; - let instanceIndex = 0; - - CITATION_REGEX.lastIndex = 0; - - match = CITATION_REGEX.exec(text); - while (match !== null) { - if (match.index > lastIndex) { - parts.push(text.substring(lastIndex, match.index)); - } - - const captured = match[1]; - - if (captured.startsWith("http://") || captured.startsWith("https://")) { - parts.push(); - instanceIndex++; - } else if (captured.startsWith("urlcite")) { - const url = _pendingUrlCitations.get(captured); - if (url) { - parts.push(); - } - instanceIndex++; - } else { - const rawIds = captured.split(",").map((s) => s.trim()); - for (const rawId of rawIds) { - const isDocsChunk = rawId.startsWith("doc-"); - const chunkId = Number.parseInt(isDocsChunk ? rawId.slice(4) : rawId, 10); - parts.push( - - ); - instanceIndex++; - } - } - - lastIndex = match.index + match[0].length; - match = CITATION_REGEX.exec(text); - } - - if (lastIndex < text.length) { - parts.push(text.substring(lastIndex)); - } - - return parts.length > 0 ? parts : [text]; -} - const MarkdownTextImpl = () => { + const urlMapRef = useRef(EMPTY_URL_MAP); + const preprocess = useCallback( + (content: string) => preprocessMarkdown(content, urlMapRef), + [] + ); return ( - + + + ); }; export const MarkdownText = memo(MarkdownTextImpl); -/** - * Helper to process children and replace citation patterns with components - */ -function processChildrenWithCitations(children: ReactNode): ReactNode { - if (typeof children === "string") { - const parsed = parseTextWithCitations(children); - return parsed.length === 1 && typeof parsed[0] === "string" ? children : parsed; - } - - if (Array.isArray(children)) { - return children.map((child) => { - if (typeof child === "string") { - const parsed = parseTextWithCitations(child); - return parsed.length === 1 && typeof parsed[0] === "string" ? ( - child - ) : ( - {parsed} - ); - } - return child; - }); - } - - return children; -} - function extractDomain(url: string): string { try { const parsed = new URL(url); @@ -322,92 +247,125 @@ function MarkdownImage({ src, alt }: { src?: string; alt?: string }) { } const defaultComponents = memoizeMarkdownComponents({ - h1: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h2: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h3: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h4: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - h5: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} -
- ), - h6: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} -
- ), - p: ({ className, children, ...props }) => ( -

- {processChildrenWithCitations(children)} -

- ), - a: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} - - ), - blockquote: ({ className, children, ...props }) => ( -
- {processChildrenWithCitations(children)} -
- ), + h1: function H1({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h2: function H2({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h3: function H3({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h4: function H4({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + h5: function H5({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
+ {processChildrenWithCitations(children, urlMap)} +
+ ); + }, + h6: function H6({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
+ {processChildrenWithCitations(children, urlMap)} +
+ ); + }, + p: function P({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +

+ {processChildrenWithCitations(children, urlMap)} +

+ ); + }, + a: function A({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( + + {processChildrenWithCitations(children, urlMap)} + + ); + }, + blockquote: function Blockquote({ className, children, ...props }) { + const urlMap = useCitationUrlMap(); + return ( +
+ {processChildrenWithCitations(children, urlMap)} +
+ ); + }, ul: ({ className, ...props }) => (