Merge pull request #816 from AnishSarkar22/feat/report-artifact

fix: fix docker setup of report artifact & some improvements
This commit is contained in:
Rohan Verma 2026-02-14 15:01:23 -08:00 committed by GitHub
commit f489f2c030
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 3501 additions and 3338 deletions

View file

@ -129,16 +129,6 @@ RUN ARCH=$(dpkg --print-architecture) && \
dpkg -i /tmp/pandoc.deb && \ dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb rm /tmp/pandoc.deb
# Install Typst for PDF rendering (Typst has built-in professional styling
# for tables, headings, code blocks, etc., no CSS needed).
RUN ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.14.2/typst-${TYPST_ARCH}.tar.xz" && \
tar -xf /tmp/typst.tar.xz -C /tmp && \
cp /tmp/typst-*/typst /usr/local/bin/typst && \
rm -rf /tmp/typst* && \
typst --version
# Install Node.js 20.x (for running frontend) # Install Node.js 20.x (for running frontend)
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \

View file

@ -30,10 +30,6 @@ RUN ARCH=$(dpkg --print-architecture) && \
dpkg -i /tmp/pandoc.deb && \ dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb rm /tmp/pandoc.deb
# NOTE: Typst CLI is NOT installed here. PDF rendering uses the `typst` Python
# library (pip package) which bundles the compiler as a native extension.
# This avoids architecture-specific binary downloads and works cross-platform.
# Update certificates and install SSL tools # Update certificates and install SSL tools
RUN update-ca-certificates RUN update-ca-certificates
RUN pip install --upgrade certifi pip-system-certs RUN pip install --upgrade certifi pip-system-certs

View file

@ -21,6 +21,8 @@ You are SurfSense, a reasoning and acting AI agent designed to answer user quest
Today's date (UTC): {resolved_today} Today's date (UTC): {resolved_today}
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
</system_instruction> </system_instruction>
""" """
@ -33,6 +35,8 @@ In this team thread, each message is prefixed with **[DisplayName of the author]
Today's date (UTC): {resolved_today} Today's date (UTC): {resolved_today}
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
</system_instruction> </system_instruction>
""" """

View file

@ -124,16 +124,15 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
), ),
requires=["search_space_id", "db_session", "thread_id"], requires=["search_space_id", "db_session", "thread_id"],
), ),
# Report generation tool (inline, no Celery) # Report generation tool (inline, short-lived sessions for DB ops)
ToolDefinition( ToolDefinition(
name="generate_report", name="generate_report",
description="Generate a structured Markdown report from provided content", description="Generate a structured Markdown report from provided content",
factory=lambda deps: create_generate_report_tool( factory=lambda deps: create_generate_report_tool(
search_space_id=deps["search_space_id"], search_space_id=deps["search_space_id"],
db_session=deps["db_session"],
thread_id=deps["thread_id"], thread_id=deps["thread_id"],
), ),
requires=["search_space_id", "db_session", "thread_id"], requires=["search_space_id", "thread_id"],
), ),
# Link preview tool - fetches Open Graph metadata for URLs # Link preview tool - fetches Open Graph metadata for URLs
ToolDefinition( ToolDefinition(

View file

@ -6,18 +6,20 @@ that generates a structured Markdown report inline (no Celery). The LLM is
called within the tool, the result is saved to the database, and the tool called within the tool, the result is saved to the database, and the tool
returns immediately with a ready status. returns immediately with a ready status.
This follows the same inline pattern as generate_image and display_image, Uses short-lived database sessions to avoid holding connections during long
NOT the Celery-based podcast pattern. LLM calls (30-120+ seconds). Each DB operation (read config, save report)
opens and closes its own session, ensuring no connection is held idle during
the LLM API call.
""" """
import logging import logging
import re import re
from typing import Any from typing import Any
from langchain_core.messages import HumanMessage
from langchain_core.tools import tool from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import Report from app.db import Report, async_session_maker
from app.services.llm_service import get_document_summary_llm from app.services.llm_service import get_document_summary_llm
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -53,6 +55,7 @@ _REPORT_PROMPT = """You are an expert report writer. Generate a well-structured,
A[Source Code] --> B[Compiler] A[Source Code] --> B[Compiler]
B --> C[Bytecode] B --> C[Bytecode]
``` ```
10. When including mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
Write the report now: Write the report now:
""" """
@ -96,7 +99,6 @@ def _extract_metadata(content: str) -> dict[str, Any]:
def create_generate_report_tool( def create_generate_report_tool(
search_space_id: int, search_space_id: int,
db_session: AsyncSession,
thread_id: int | None = None, thread_id: int | None = None,
): ):
""" """
@ -105,9 +107,11 @@ def create_generate_report_tool(
The tool generates a Markdown report inline using the search space's The tool generates a Markdown report inline using the search space's
document summary LLM, saves it to the database, and returns immediately. document summary LLM, saves it to the database, and returns immediately.
Uses short-lived database sessions for each DB operation so no connection
is held during the long LLM API call.
Args: Args:
search_space_id: The user's search space ID search_space_id: The user's search space ID
db_session: Database session for creating the report record
thread_id: The chat thread ID for associating the report thread_id: The chat thread ID for associating the report
Returns: Returns:
@ -197,14 +201,17 @@ def create_generate_report_tool(
User: "Rewrite the report in a more formal tone" parent_report_id = <previous report_id> User: "Rewrite the report in a more formal tone" parent_report_id = <previous report_id>
User: "I want more details about pricing in here" parent_report_id = <previous report_id> User: "I want more details about pricing in here" parent_report_id = <previous report_id>
User: "Include more examples" parent_report_id = <previous report_id> User: "Include more examples" parent_report_id = <previous report_id>
User: "Can you also cover security in this?" parent_report_id = <previous report_id> User: "Can you also cover nutrition in this?" parent_report_id = <previous report_id>
User: "Make it more detailed" parent_report_id = <previous report_id> User: "Make it more detailed" parent_report_id = <previous report_id>
User: "I want more about X for in here" parent_report_id = <previous report_id> User: "Not bad, but expand on the budget section" parent_report_id = <previous report_id>
User: "Also mention the competitor landscape" parent_report_id = <previous report_id>
Examples of when to LEAVE parent_report_id as None: Examples of when to LEAVE parent_report_id as None:
User: "Generate a report on climate change" parent_report_id = None (new topic) User: "Generate a report on climate change" parent_report_id = None (new topic)
User: "Write me a report about the budget" parent_report_id = None (new topic) User: "Write me a report about the budget" parent_report_id = None (new topic)
User: "Create another report, this time about marketing" parent_report_id = None User: "Create another report, this time about marketing" parent_report_id = None
User: "Now write one about travel trends in Europe" parent_report_id = None (new topic despite "now")
User: "Do the same kind of report but for the fitness industry" parent_report_id = None (new topic, different subject)
Args: Args:
topic: A short, concise title for the report (maximum 8 words). Keep it brief and descriptive e.g. "AI in Healthcare Analysis: A Comprehensive Report" instead of "Comprehensive Analysis of Artificial Intelligence Applications in Modern Healthcare Systems". topic: A short, concise title for the report (maximum 8 words). Keep it brief and descriptive e.g. "AI in Healthcare Analysis: A Comprehensive Report" instead of "Comprehensive Analysis of Artificial Intelligence Applications in Modern Healthcare Systems".
@ -225,50 +232,37 @@ def create_generate_report_tool(
- word_count: Number of words in the report - word_count: Number of words in the report
- message: Status message (or "error" field if failed) - message: Status message (or "error" field if failed)
""" """
# Resolve the parent report and its group (if versioning) # Initialize version tracking variables (used by _save_failed_report closure)
parent_report: Report | None = None parent_report_content: str | None = None
report_group_id: int | None = None report_group_id: int | None = None
if parent_report_id:
parent_report = await db_session.get(Report, parent_report_id)
if parent_report:
report_group_id = parent_report.report_group_id
logger.info(
f"[generate_report] Creating new version from parent {parent_report_id} "
f"(group {report_group_id})"
)
else:
logger.warning(
f"[generate_report] parent_report_id={parent_report_id} not found, "
"creating standalone report"
)
async def _save_failed_report(error_msg: str) -> int | None: async def _save_failed_report(error_msg: str) -> int | None:
"""Persist a failed report row so the error is visible later.""" """Persist a failed report row using a short-lived session."""
try: try:
failed_report = Report( async with async_session_maker() as session:
title=topic, failed_report = Report(
content=None, title=topic,
report_metadata={ content=None,
"status": "failed", report_metadata={
"error_message": error_msg, "status": "failed",
}, "error_message": error_msg,
report_style=report_style, },
search_space_id=search_space_id, report_style=report_style,
thread_id=thread_id, search_space_id=search_space_id,
report_group_id=report_group_id, thread_id=thread_id,
) report_group_id=report_group_id,
db_session.add(failed_report) )
await db_session.commit() session.add(failed_report)
await db_session.refresh(failed_report) await session.commit()
# If this is a new group (v1 failed), set group to self await session.refresh(failed_report)
if not failed_report.report_group_id: # If this is a new group (v1 failed), set group to self
failed_report.report_group_id = failed_report.id if not failed_report.report_group_id:
await db_session.commit() failed_report.report_group_id = failed_report.id
logger.info( await session.commit()
f"[generate_report] Saved failed report {failed_report.id}: {error_msg}" logger.info(
) f"[generate_report] Saved failed report {failed_report.id}: {error_msg}"
return failed_report.id )
return failed_report.id
except Exception: except Exception:
logger.exception( logger.exception(
"[generate_report] Could not persist failed report row" "[generate_report] Could not persist failed report row"
@ -276,8 +270,28 @@ def create_generate_report_tool(
return None return None
try: try:
# Get the LLM instance for this search space # ── Phase 1: READ (short-lived session) ──────────────────────
llm = await get_document_summary_llm(db_session, search_space_id) # Fetch parent report and LLM config, then close the session
# so no DB connection is held during the long LLM call.
async with async_session_maker() as read_session:
if parent_report_id:
parent_report = await read_session.get(Report, parent_report_id)
if parent_report:
report_group_id = parent_report.report_group_id
parent_report_content = parent_report.content
logger.info(
f"[generate_report] Creating new version from parent {parent_report_id} "
f"(group {report_group_id})"
)
else:
logger.warning(
f"[generate_report] parent_report_id={parent_report_id} not found, "
"creating standalone report"
)
llm = await get_document_summary_llm(read_session, search_space_id)
# read_session closed — connection returned to pool
if not llm: if not llm:
error_msg = ( error_msg = (
"No LLM configured. Please configure a language model in Settings." "No LLM configured. Please configure a language model in Settings."
@ -299,11 +313,11 @@ def create_generate_report_tool(
# If revising, include previous version content # If revising, include previous version content
previous_version_section = "" previous_version_section = ""
if parent_report and parent_report.content: if parent_report_content:
previous_version_section = ( previous_version_section = (
"**Previous Version of This Report (refine this based on the instructions above — " "**Previous Version of This Report (refine this based on the instructions above — "
"preserve structure and quality, apply only the requested changes):**\n\n" "preserve structure and quality, apply only the requested changes):**\n\n"
f"{parent_report.content}" f"{parent_report_content}"
) )
prompt = _REPORT_PROMPT.format( prompt = _REPORT_PROMPT.format(
@ -314,9 +328,7 @@ def create_generate_report_tool(
source_content=source_content[:100000], # Cap source content source_content=source_content[:100000], # Cap source content
) )
# Call the LLM inline # ── Phase 2: LLM CALL (no DB connection held) ────────────────
from langchain_core.messages import HumanMessage
response = await llm.ainvoke([HumanMessage(content=prompt)]) response = await llm.ainvoke([HumanMessage(content=prompt)])
report_content = response.content report_content = response.content
@ -347,35 +359,41 @@ def create_generate_report_tool(
# Extract metadata (includes "status": "ready") # Extract metadata (includes "status": "ready")
metadata = _extract_metadata(report_content) metadata = _extract_metadata(report_content)
# Save to database # ── Phase 3: WRITE (short-lived session) ─────────────────────
report = Report( # Save the report to the database, then close the session.
title=topic, async with async_session_maker() as write_session:
content=report_content, report = Report(
report_metadata=metadata, title=topic,
report_style=report_style, content=report_content,
search_space_id=search_space_id, report_metadata=metadata,
thread_id=thread_id, report_style=report_style,
report_group_id=report_group_id, # None for v1, inherited for v2+ search_space_id=search_space_id,
) thread_id=thread_id,
db_session.add(report) report_group_id=report_group_id,
await db_session.commit() )
await db_session.refresh(report) write_session.add(report)
await write_session.commit()
await write_session.refresh(report)
# If this is a brand-new report (v1), set report_group_id = own id # If this is a brand-new report (v1), set report_group_id = own id
if not report.report_group_id: if not report.report_group_id:
report.report_group_id = report.id report.report_group_id = report.id
await db_session.commit() await write_session.commit()
saved_report_id = report.id
saved_group_id = report.report_group_id
# write_session closed — connection returned to pool
logger.info( logger.info(
f"[generate_report] Created report {report.id} " f"[generate_report] Created report {saved_report_id} "
f"(group={report.report_group_id}): " f"(group={saved_group_id}): "
f"{metadata.get('word_count', 0)} words, " f"{metadata.get('word_count', 0)} words, "
f"{metadata.get('section_count', 0)} sections" f"{metadata.get('section_count', 0)} sections"
) )
return { return {
"status": "ready", "status": "ready",
"report_id": report.id, "report_id": saved_report_id,
"title": topic, "title": topic,
"word_count": metadata.get("word_count", 0), "word_count": metadata.get("word_count", 0),
"message": f"Report generated successfully: {topic}", "message": f"Report generated successfully: {topic}",

View file

@ -66,6 +66,62 @@ def _strip_wrapping_code_fences(text: str) -> str:
return stripped return stripped
def _normalize_latex_delimiters(text: str) -> str:
"""Convert all LaTeX math delimiters to dollar-sign form.
Pandoc's ``tex_math_dollars`` extension (on the ``gfm`` reader) handles
``$$`` and ``$$$$`` natively. This function converts every other
delimiter style that LLMs produce into dollar-sign form so pandoc can
parse them as math.
Supported conversions:
\\[\\] $$$$ (display math)
\\(\\) $$ (inline math)
\\begin{equation}\\end{equation} $$$$ (display math)
\\begin{displaymath}\\end{displaymath} $$$$ (display math)
\\begin{math}\\end{math} $$ (inline math)
`$$$$` / `$$` strip wrapping backticks
"""
# 1. Block math: \[...\] → $$...$$
text = re.sub(r"\\\[([\s\S]*?)\\\]", lambda m: f"$${m.group(1)}$$", text)
# 2. Inline math: \(...\) → $...$
text = re.sub(r"\\\(([\s\S]*?)\\\)", lambda m: f"${m.group(1)}$", text)
# 3. \begin{equation}...\end{equation} → $$...$$
text = re.sub(
r"\\begin\{equation\}([\s\S]*?)\\end\{equation\}",
lambda m: f"$${m.group(1)}$$",
text,
)
# 4. \begin{displaymath}...\end{displaymath} → $$...$$
text = re.sub(
r"\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}",
lambda m: f"$${m.group(1)}$$",
text,
)
# 5. \begin{math}...\end{math} → $...$
text = re.sub(
r"\\begin\{math\}([\s\S]*?)\\end\{math\}",
lambda m: f"${m.group(1)}$",
text,
)
# 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
text = re.sub(r"`(\${1,2})((?:(?!\1).)+)\1`", r"\1\2\1", text)
# 7. Trim whitespace inside inline math $...$.
# Pandoc's tex_math_dollars requires NO space after the opening $ and
# NO space before the closing $. LLMs frequently produce "$ e^x $"
# or "\( e^x \)" (which step 2 converts to "$ e^x $"). Without
# trimming, pandoc treats these as literal dollar-sign text.
# We require spaces on BOTH sides to avoid false-positives on
# currency like "$50" or "$50 and $100".
def _trim_inline_math(m: re.Match) -> str:
inner = m.group(1).strip()
return f"${inner}$" if inner else m.group(0)
text = re.sub(r"(?<!\$)\$(?!\$) +(.+?) +\$(?!\$)", _trim_inline_math, text)
return text
async def _get_report_with_access( async def _get_report_with_access(
report_id: int, report_id: int,
session: AsyncSession, session: AsyncSession,
@ -227,6 +283,10 @@ async def export_report(
# Without this, pandoc treats the entire content as a code block. # Without this, pandoc treats the entire content as a code block.
markdown_content = _strip_wrapping_code_fences(report.content) markdown_content = _strip_wrapping_code_fences(report.content)
# Normalise all LaTeX math delimiters (\(\), \[\], \begin{equation},
# etc.) into $/$$ form that pandoc's tex_math_dollars extension can parse.
markdown_content = _normalize_latex_delimiters(markdown_content)
# Convert Markdown to the requested format. # Convert Markdown to the requested format.
# #
# DOCX: pypandoc (pandoc) handles the full conversion directly. # DOCX: pypandoc (pandoc) handles the full conversion directly.
@ -237,8 +297,9 @@ async def export_report(
# bundles the compiler as a native extension. Typst produces # bundles the compiler as a native extension. Typst produces
# professional styling for tables, headings, code blocks, etc. # professional styling for tables, headings, code blocks, etc.
# #
# Use "gfm" as the input format because LLM output uses GFM-style # Use "gfm" as the base input format because LLM output uses GFM-style
# pipe tables that pandoc's stricter default "markdown" may mangle. # pipe tables that pandoc's stricter default "markdown" may mangle.
# The +tex_math_dollars extension enables $/$$ math recognition.
def _convert_and_read() -> bytes: def _convert_and_read() -> bytes:
"""Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread.""" """Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread."""
@ -253,7 +314,7 @@ async def export_report(
typst_markup: str = pypandoc.convert_text( typst_markup: str = pypandoc.convert_text(
markdown_content, markdown_content,
"typst", "typst",
format="gfm", format="gfm+tex_math_dollars",
extra_args=[ extra_args=[
"--standalone", "--standalone",
"-V", "-V",
@ -273,7 +334,7 @@ async def export_report(
pypandoc.convert_text( pypandoc.convert_text(
markdown_content, markdown_content,
format.value, format.value,
format="gfm", format="gfm+tex_math_dollars",
extra_args=["--standalone"], extra_args=["--standalone"],
outputfile=tmp_path, outputfile=tmp_path,
) )

6461
surfsense_backend/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -11,10 +11,45 @@ import {
import { CheckIcon, CopyIcon } from "lucide-react"; import { CheckIcon, CopyIcon } from "lucide-react";
import { type FC, memo, type ReactNode, useState } from "react"; import { type FC, memo, type ReactNode, useState } from "react";
import remarkGfm from "remark-gfm"; import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
import rehypeKatex from "rehype-katex";
import "katex/dist/katex.min.css";
import { InlineCitation } from "@/components/assistant-ui/inline-citation"; import { InlineCitation } from "@/components/assistant-ui/inline-citation";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button"; import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
/**
* Convert all LaTeX delimiter styles to the dollar-sign syntax
* that remark-math understands. LLMs use various delimiters
* (\(...\), \[...\], \begin{equation}, etc.) and we need to
* normalise them all to $ / $$ before the markdown parser runs.
*/
function convertLatexDelimiters(content: string): string {
// 1. Block math: \[...\] → $$...$$
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`);
// 2. Inline math: \(...\) → $...$
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`);
// 3. Block: \begin{equation}...\end{equation} → $$...$$
content = content.replace(
/\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g,
(_, inner) => `$$${inner}$$`
);
// 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$
content = content.replace(
/\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g,
(_, inner) => `$$${inner}$$`
);
// 5. Inline: \begin{math}...\end{math} → $...$
content = content.replace(/\\begin\{math\}([\s\S]*?)\\end\{math\}/g, (_, inner) => `$${inner}$`);
// 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
// Ensure markdown headings (## ...) always start on their own line.
content = content.replace(/([^\n])(#{1,6}\s)/g, "$1\n\n$2");
return content;
}
// Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID] // Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID]
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts // Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts
const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g; const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g;
@ -59,7 +94,8 @@ function parseTextWithCitations(text: string): ReactNode[] {
// Reset regex state // Reset regex state
CITATION_REGEX.lastIndex = 0; CITATION_REGEX.lastIndex = 0;
while ((match = CITATION_REGEX.exec(text)) !== null) { match = CITATION_REGEX.exec(text);
while (match !== null) {
// Add text before the citation // Add text before the citation
if (match.index > lastIndex) { if (match.index > lastIndex) {
parts.push(text.substring(lastIndex, match.index)); parts.push(text.substring(lastIndex, match.index));
@ -80,6 +116,7 @@ function parseTextWithCitations(text: string): ReactNode[] {
lastIndex = match.index + match[0].length; lastIndex = match.index + match[0].length;
instanceIndex++; instanceIndex++;
match = CITATION_REGEX.exec(text);
} }
// Add any remaining text after the last citation // Add any remaining text after the last citation
@ -93,9 +130,11 @@ function parseTextWithCitations(text: string): ReactNode[] {
const MarkdownTextImpl = () => { const MarkdownTextImpl = () => {
return ( return (
<MarkdownTextPrimitive <MarkdownTextPrimitive
remarkPlugins={[remarkGfm]} remarkPlugins={[remarkGfm, remarkMath]}
rehypePlugins={[rehypeKatex]}
className="aui-md" className="aui-md"
components={defaultComponents} components={defaultComponents}
preprocess={convertLatexDelimiters}
/> />
); );
}; };

View file

@ -29,33 +29,53 @@ function stripOuterMarkdownFence(content: string): string {
} }
/** /**
* Convert various LaTeX delimiter styles to the dollar-sign syntax * Convert all LaTeX delimiter styles to the double-dollar syntax
* that remark-math understands, and normalise edge-cases that * that Streamdown's @streamdown/math plugin understands.
* commonly appear in LLM-generated markdown.
* *
* \[...\] $$ ... $$ (block / display math) * Streamdown math conventions (different from remark-math!):
* \(...\) $ ... $ (inline math) * $$...$$ on the SAME line inline math
* same-line $$$$ $ ... $ (inline math display math * $$\n...\n$$ on SEPARATE lines block (display) math
* can't live inside table cells) *
* `$$$$` $$ $$ (strip wrapping backtick code) * Conversions performed:
* `$$` $ $ (strip wrapping backtick code) * \[...\] $$\n ... \n$$ (block math)
* \(...\) $$...$$ (inline math, same line)
* \begin{equation}...\end{equation} $$\n ... \n$$ (block math)
* \begin{displaymath}...\end{displaymath} $$\n ... \n$$ (block math)
* \begin{math}...\end{math} $$...$$ (inline math, same line)
* `$$$$` $$ $$ (strip wrapping backtick code)
* `$$` $ $ (strip wrapping backtick code)
* $...$ $$...$$ (normalise single-$ to double-$$)
*/ */
function convertLatexDelimiters(content: string): string { function convertLatexDelimiters(content: string): string {
// 1. Block math: \[...\] → $$...$$ // 1. Block math: \[...\] → $$\n...\n$$ (display math on separate lines)
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => { content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `\n$$\n${inner.trim()}\n$$\n`);
return `$$${inner}$$`; // 2. Inline math: \(...\) → $$...$$ (inline math on same line)
}); content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$$${inner.trim()}$$`);
// 2. Inline math: \(...\) → $...$ // 3. Block: \begin{equation}...\end{equation} → $$\n...\n$$
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => { content = content.replace(
return `$${inner}$`; /\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g,
}); (_, inner) => `\n$$\n${inner.trim()}\n$$\n`
// 3. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$ );
// 4. Block: \begin{displaymath}...\end{displaymath} → $$\n...\n$$
content = content.replace(
/\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g,
(_, inner) => `\n$$\n${inner.trim()}\n$$\n`
);
// 5. Inline: \begin{math}...\end{math} → $$...$$
content = content.replace(
/\\begin\{math\}([\s\S]*?)\\end\{math\}/g,
(_, inner) => `$$${inner.trim()}$$`
);
// 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1"); content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
// 4. Same-line $$...$$ → $...$ (inline math) so it works inside table cells. // 7. Normalise single-dollar $...$ to double-dollar $$...$$ so they render
// True display math has $$ on its own line, so this only affects inline usage. // reliably in Streamdown (single-$ has strict no-space rules that often fail).
content = content.replace(/\$\$([^\n]+?)\$\$/g, (_match, inner) => { // We match $…$ where the content starts with a backslash (LaTeX command)
return `$${inner}$`; // to avoid converting currency like $50.
}); content = content.replace(
/(?<!\$)\$(?!\$)(\\[a-zA-Z][\s\S]*?)(?<!\$)\$(?!\$)/g,
(_, inner) => `$$${inner.trim()}$$`
);
return content; return content;
} }

View file

@ -14,7 +14,6 @@ import {
DropdownMenuItem, DropdownMenuItem,
DropdownMenuTrigger, DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu"; } from "@/components/ui/dropdown-menu";
import { Spinner } from "@/components/ui/spinner";
import { useMediaQuery } from "@/hooks/use-media-query"; import { useMediaQuery } from "@/hooks/use-media-query";
import { baseApiService } from "@/lib/apis/base-api.service"; import { baseApiService } from "@/lib/apis/base-api.service";
import { authenticatedFetch } from "@/lib/auth-utils"; import { authenticatedFetch } from "@/lib/auth-utils";
@ -298,14 +297,12 @@ function ReportPanelContent({
onClick={() => handleExport("pdf")} onClick={() => handleExport("pdf")}
disabled={exporting !== null} disabled={exporting !== null}
> >
{exporting === "pdf" && <Spinner size="xs" />}
Download PDF Download PDF
</DropdownMenuItem> </DropdownMenuItem>
<DropdownMenuItem <DropdownMenuItem
onClick={() => handleExport("docx")} onClick={() => handleExport("docx")}
disabled={exporting !== null} disabled={exporting !== null}
> >
{exporting === "docx" && <Spinner size="xs" />}
Download DOCX Download DOCX
</DropdownMenuItem> </DropdownMenuItem>
</> </>

View file

@ -99,9 +99,11 @@
"react-json-view-lite": "^2.4.1", "react-json-view-lite": "^2.4.1",
"react-syntax-highlighter": "^15.6.1", "react-syntax-highlighter": "^15.6.1",
"react-wrap-balancer": "^1.1.1", "react-wrap-balancer": "^1.1.1",
"rehype-katex": "^7.0.1",
"rehype-raw": "^7.0.0", "rehype-raw": "^7.0.0",
"rehype-sanitize": "^6.0.0", "rehype-sanitize": "^6.0.0",
"remark-gfm": "^4.0.1", "remark-gfm": "^4.0.1",
"remark-math": "^6.0.0",
"server-only": "^0.0.1", "server-only": "^0.0.1",
"sonner": "^2.0.6", "sonner": "^2.0.6",
"streamdown": "^2.2.0", "streamdown": "^2.2.0",

View file

@ -242,6 +242,9 @@ importers:
react-wrap-balancer: react-wrap-balancer:
specifier: ^1.1.1 specifier: ^1.1.1
version: 1.1.1(react@19.2.3) version: 1.1.1(react@19.2.3)
rehype-katex:
specifier: ^7.0.1
version: 7.0.1
rehype-raw: rehype-raw:
specifier: ^7.0.0 specifier: ^7.0.0
version: 7.0.0 version: 7.0.0
@ -251,6 +254,9 @@ importers:
remark-gfm: remark-gfm:
specifier: ^4.0.1 specifier: ^4.0.1
version: 4.0.1 version: 4.0.1
remark-math:
specifier: ^6.0.0
version: 6.0.0
server-only: server-only:
specifier: ^0.0.1 specifier: ^0.0.1
version: 0.0.1 version: 0.0.1