diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 2bb5d5bb2..14dfe1b2e 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -21,6 +21,8 @@ You are SurfSense, a reasoning and acting AI agent designed to answer user quest Today's date (UTC): {resolved_today} +When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math. + """ @@ -33,6 +35,8 @@ In this team thread, each message is prefixed with **[DisplayName of the author] Today's date (UTC): {resolved_today} +When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math. + """ diff --git a/surfsense_backend/app/agents/new_chat/tools/report.py b/surfsense_backend/app/agents/new_chat/tools/report.py index 53c0c9cd1..85449f8e3 100644 --- a/surfsense_backend/app/agents/new_chat/tools/report.py +++ b/surfsense_backend/app/agents/new_chat/tools/report.py @@ -53,6 +53,7 @@ _REPORT_PROMPT = """You are an expert report writer. Generate a well-structured, A[Source Code] --> B[Compiler] B --> C[Bytecode] ``` +10. When including mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math. Write the report now: """ diff --git a/surfsense_backend/app/routes/reports_routes.py b/surfsense_backend/app/routes/reports_routes.py index 9456dc374..ce5002bbc 100644 --- a/surfsense_backend/app/routes/reports_routes.py +++ b/surfsense_backend/app/routes/reports_routes.py @@ -66,6 +66,61 @@ def _strip_wrapping_code_fences(text: str) -> str: return stripped +def _normalize_latex_delimiters(text: str) -> str: + """Convert all LaTeX math delimiters to dollar-sign form. + + Pandoc's ``tex_math_dollars`` extension (on the ``gfm`` reader) handles + ``$…$`` and ``$$…$$`` natively. This function converts every other + delimiter style that LLMs produce into dollar-sign form so pandoc can + parse them as math. + + Supported conversions: + \\[…\\] → $$…$$ (display math) + \\(…\\) → $…$ (inline math) + \\begin{equation}…\\end{equation} → $$…$$ (display math) + \\begin{displaymath}…\\end{displaymath}→ $$…$$ (display math) + \\begin{math}…\\end{math} → $…$ (inline math) + `$$…$$` / `$…$` → strip wrapping backticks + """ + # 1. Block math: \[...\] → $$...$$ + text = re.sub(r"\\\[([\s\S]*?)\\\]", lambda m: f"$${m.group(1)}$$", text) + # 2. Inline math: \(...\) → $...$ + text = re.sub(r"\\\(([\s\S]*?)\\\)", lambda m: f"${m.group(1)}$", text) + # 3. \begin{equation}...\end{equation} → $$...$$ + text = re.sub( + r"\\begin\{equation\}([\s\S]*?)\\end\{equation\}", + lambda m: f"$${m.group(1)}$$", + text, + ) + # 4. \begin{displaymath}...\end{displaymath} → $$...$$ + text = re.sub( + r"\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}", + lambda m: f"$${m.group(1)}$$", + text, + ) + # 5. \begin{math}...\end{math} → $...$ + text = re.sub( + r"\\begin\{math\}([\s\S]*?)\\end\{math\}", + lambda m: f"${m.group(1)}$", + text, + ) + # 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$ + text = re.sub(r"`(\${1,2})((?:(?!\1).)+)\1`", r"\1\2\1", text) + # 7. Trim whitespace inside inline math $...$. + # Pandoc's tex_math_dollars requires NO space after the opening $ and + # NO space before the closing $. LLMs frequently produce "$ e^x $" + # or "\( e^x \)" (which step 2 converts to "$ e^x $"). Without + # trimming, pandoc treats these as literal dollar-sign text. + # We require spaces on BOTH sides to avoid false-positives on + # currency like "$50" or "$50 and $100". + def _trim_inline_math(m: re.Match) -> str: + inner = m.group(1).strip() + return f"${inner}$" if inner else m.group(0) + + text = re.sub(r"(? bytes: """Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread.""" @@ -253,7 +313,7 @@ async def export_report( typst_markup: str = pypandoc.convert_text( markdown_content, "typst", - format="gfm", + format="gfm+tex_math_dollars", extra_args=[ "--standalone", "-V", @@ -273,7 +333,7 @@ async def export_report( pypandoc.convert_text( markdown_content, format.value, - format="gfm", + format="gfm+tex_math_dollars", extra_args=["--standalone"], outputfile=tmp_path, )