mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-12 01:02:39 +02:00
Merge pull request #816 from AnishSarkar22/feat/report-artifact
fix: fix docker setup of report artifact & some improvements
This commit is contained in:
commit
f489f2c030
12 changed files with 3501 additions and 3338 deletions
|
|
@ -129,16 +129,6 @@ RUN ARCH=$(dpkg --print-architecture) && \
|
|||
dpkg -i /tmp/pandoc.deb && \
|
||||
rm /tmp/pandoc.deb
|
||||
|
||||
# Install Typst for PDF rendering (Typst has built-in professional styling
|
||||
# for tables, headings, code blocks, etc., no CSS needed).
|
||||
RUN ARCH=$(dpkg --print-architecture) && \
|
||||
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
|
||||
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
|
||||
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.14.2/typst-${TYPST_ARCH}.tar.xz" && \
|
||||
tar -xf /tmp/typst.tar.xz -C /tmp && \
|
||||
cp /tmp/typst-*/typst /usr/local/bin/typst && \
|
||||
rm -rf /tmp/typst* && \
|
||||
typst --version
|
||||
|
||||
# Install Node.js 20.x (for running frontend)
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
|
|
|
|||
|
|
@ -30,10 +30,6 @@ RUN ARCH=$(dpkg --print-architecture) && \
|
|||
dpkg -i /tmp/pandoc.deb && \
|
||||
rm /tmp/pandoc.deb
|
||||
|
||||
# NOTE: Typst CLI is NOT installed here. PDF rendering uses the `typst` Python
|
||||
# library (pip package) which bundles the compiler as a native extension.
|
||||
# This avoids architecture-specific binary downloads and works cross-platform.
|
||||
|
||||
# Update certificates and install SSL tools
|
||||
RUN update-ca-certificates
|
||||
RUN pip install --upgrade certifi pip-system-certs
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ You are SurfSense, a reasoning and acting AI agent designed to answer user quest
|
|||
|
||||
Today's date (UTC): {resolved_today}
|
||||
|
||||
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
|
||||
|
||||
</system_instruction>
|
||||
"""
|
||||
|
||||
|
|
@ -33,6 +35,8 @@ In this team thread, each message is prefixed with **[DisplayName of the author]
|
|||
|
||||
Today's date (UTC): {resolved_today}
|
||||
|
||||
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
|
||||
|
||||
</system_instruction>
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -124,16 +124,15 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
|
|||
),
|
||||
requires=["search_space_id", "db_session", "thread_id"],
|
||||
),
|
||||
# Report generation tool (inline, no Celery)
|
||||
# Report generation tool (inline, short-lived sessions for DB ops)
|
||||
ToolDefinition(
|
||||
name="generate_report",
|
||||
description="Generate a structured Markdown report from provided content",
|
||||
factory=lambda deps: create_generate_report_tool(
|
||||
search_space_id=deps["search_space_id"],
|
||||
db_session=deps["db_session"],
|
||||
thread_id=deps["thread_id"],
|
||||
),
|
||||
requires=["search_space_id", "db_session", "thread_id"],
|
||||
requires=["search_space_id", "thread_id"],
|
||||
),
|
||||
# Link preview tool - fetches Open Graph metadata for URLs
|
||||
ToolDefinition(
|
||||
|
|
|
|||
|
|
@ -6,18 +6,20 @@ that generates a structured Markdown report inline (no Celery). The LLM is
|
|||
called within the tool, the result is saved to the database, and the tool
|
||||
returns immediately with a ready status.
|
||||
|
||||
This follows the same inline pattern as generate_image and display_image,
|
||||
NOT the Celery-based podcast pattern.
|
||||
Uses short-lived database sessions to avoid holding connections during long
|
||||
LLM calls (30-120+ seconds). Each DB operation (read config, save report)
|
||||
opens and closes its own session, ensuring no connection is held idle during
|
||||
the LLM API call.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.tools import tool
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Report
|
||||
from app.db import Report, async_session_maker
|
||||
from app.services.llm_service import get_document_summary_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -53,6 +55,7 @@ _REPORT_PROMPT = """You are an expert report writer. Generate a well-structured,
|
|||
A[Source Code] --> B[Compiler]
|
||||
B --> C[Bytecode]
|
||||
```
|
||||
10. When including mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
|
||||
|
||||
Write the report now:
|
||||
"""
|
||||
|
|
@ -96,7 +99,6 @@ def _extract_metadata(content: str) -> dict[str, Any]:
|
|||
|
||||
def create_generate_report_tool(
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
thread_id: int | None = None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -105,9 +107,11 @@ def create_generate_report_tool(
|
|||
The tool generates a Markdown report inline using the search space's
|
||||
document summary LLM, saves it to the database, and returns immediately.
|
||||
|
||||
Uses short-lived database sessions for each DB operation so no connection
|
||||
is held during the long LLM API call.
|
||||
|
||||
Args:
|
||||
search_space_id: The user's search space ID
|
||||
db_session: Database session for creating the report record
|
||||
thread_id: The chat thread ID for associating the report
|
||||
|
||||
Returns:
|
||||
|
|
@ -197,14 +201,17 @@ def create_generate_report_tool(
|
|||
User: "Rewrite the report in a more formal tone" → parent_report_id = <previous report_id>
|
||||
User: "I want more details about pricing in here" → parent_report_id = <previous report_id>
|
||||
User: "Include more examples" → parent_report_id = <previous report_id>
|
||||
User: "Can you also cover security in this?" → parent_report_id = <previous report_id>
|
||||
User: "Can you also cover nutrition in this?" → parent_report_id = <previous report_id>
|
||||
User: "Make it more detailed" → parent_report_id = <previous report_id>
|
||||
User: "I want more about X for in here" → parent_report_id = <previous report_id>
|
||||
User: "Not bad, but expand on the budget section" → parent_report_id = <previous report_id>
|
||||
User: "Also mention the competitor landscape" → parent_report_id = <previous report_id>
|
||||
|
||||
Examples of when to LEAVE parent_report_id as None:
|
||||
User: "Generate a report on climate change" → parent_report_id = None (new topic)
|
||||
User: "Write me a report about the budget" → parent_report_id = None (new topic)
|
||||
User: "Create another report, this time about marketing" → parent_report_id = None
|
||||
User: "Now write one about travel trends in Europe" → parent_report_id = None (new topic despite "now")
|
||||
User: "Do the same kind of report but for the fitness industry" → parent_report_id = None (new topic, different subject)
|
||||
|
||||
Args:
|
||||
topic: A short, concise title for the report (maximum 8 words). Keep it brief and descriptive — e.g. "AI in Healthcare Analysis: A Comprehensive Report" instead of "Comprehensive Analysis of Artificial Intelligence Applications in Modern Healthcare Systems".
|
||||
|
|
@ -225,50 +232,37 @@ def create_generate_report_tool(
|
|||
- word_count: Number of words in the report
|
||||
- message: Status message (or "error" field if failed)
|
||||
"""
|
||||
# Resolve the parent report and its group (if versioning)
|
||||
parent_report: Report | None = None
|
||||
# Initialize version tracking variables (used by _save_failed_report closure)
|
||||
parent_report_content: str | None = None
|
||||
report_group_id: int | None = None
|
||||
|
||||
if parent_report_id:
|
||||
parent_report = await db_session.get(Report, parent_report_id)
|
||||
if parent_report:
|
||||
report_group_id = parent_report.report_group_id
|
||||
logger.info(
|
||||
f"[generate_report] Creating new version from parent {parent_report_id} "
|
||||
f"(group {report_group_id})"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[generate_report] parent_report_id={parent_report_id} not found, "
|
||||
"creating standalone report"
|
||||
)
|
||||
|
||||
async def _save_failed_report(error_msg: str) -> int | None:
|
||||
"""Persist a failed report row so the error is visible later."""
|
||||
"""Persist a failed report row using a short-lived session."""
|
||||
try:
|
||||
failed_report = Report(
|
||||
title=topic,
|
||||
content=None,
|
||||
report_metadata={
|
||||
"status": "failed",
|
||||
"error_message": error_msg,
|
||||
},
|
||||
report_style=report_style,
|
||||
search_space_id=search_space_id,
|
||||
thread_id=thread_id,
|
||||
report_group_id=report_group_id,
|
||||
)
|
||||
db_session.add(failed_report)
|
||||
await db_session.commit()
|
||||
await db_session.refresh(failed_report)
|
||||
# If this is a new group (v1 failed), set group to self
|
||||
if not failed_report.report_group_id:
|
||||
failed_report.report_group_id = failed_report.id
|
||||
await db_session.commit()
|
||||
logger.info(
|
||||
f"[generate_report] Saved failed report {failed_report.id}: {error_msg}"
|
||||
)
|
||||
return failed_report.id
|
||||
async with async_session_maker() as session:
|
||||
failed_report = Report(
|
||||
title=topic,
|
||||
content=None,
|
||||
report_metadata={
|
||||
"status": "failed",
|
||||
"error_message": error_msg,
|
||||
},
|
||||
report_style=report_style,
|
||||
search_space_id=search_space_id,
|
||||
thread_id=thread_id,
|
||||
report_group_id=report_group_id,
|
||||
)
|
||||
session.add(failed_report)
|
||||
await session.commit()
|
||||
await session.refresh(failed_report)
|
||||
# If this is a new group (v1 failed), set group to self
|
||||
if not failed_report.report_group_id:
|
||||
failed_report.report_group_id = failed_report.id
|
||||
await session.commit()
|
||||
logger.info(
|
||||
f"[generate_report] Saved failed report {failed_report.id}: {error_msg}"
|
||||
)
|
||||
return failed_report.id
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"[generate_report] Could not persist failed report row"
|
||||
|
|
@ -276,8 +270,28 @@ def create_generate_report_tool(
|
|||
return None
|
||||
|
||||
try:
|
||||
# Get the LLM instance for this search space
|
||||
llm = await get_document_summary_llm(db_session, search_space_id)
|
||||
# ── Phase 1: READ (short-lived session) ──────────────────────
|
||||
# Fetch parent report and LLM config, then close the session
|
||||
# so no DB connection is held during the long LLM call.
|
||||
async with async_session_maker() as read_session:
|
||||
if parent_report_id:
|
||||
parent_report = await read_session.get(Report, parent_report_id)
|
||||
if parent_report:
|
||||
report_group_id = parent_report.report_group_id
|
||||
parent_report_content = parent_report.content
|
||||
logger.info(
|
||||
f"[generate_report] Creating new version from parent {parent_report_id} "
|
||||
f"(group {report_group_id})"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[generate_report] parent_report_id={parent_report_id} not found, "
|
||||
"creating standalone report"
|
||||
)
|
||||
|
||||
llm = await get_document_summary_llm(read_session, search_space_id)
|
||||
# read_session closed — connection returned to pool
|
||||
|
||||
if not llm:
|
||||
error_msg = (
|
||||
"No LLM configured. Please configure a language model in Settings."
|
||||
|
|
@ -299,11 +313,11 @@ def create_generate_report_tool(
|
|||
|
||||
# If revising, include previous version content
|
||||
previous_version_section = ""
|
||||
if parent_report and parent_report.content:
|
||||
if parent_report_content:
|
||||
previous_version_section = (
|
||||
"**Previous Version of This Report (refine this based on the instructions above — "
|
||||
"preserve structure and quality, apply only the requested changes):**\n\n"
|
||||
f"{parent_report.content}"
|
||||
f"{parent_report_content}"
|
||||
)
|
||||
|
||||
prompt = _REPORT_PROMPT.format(
|
||||
|
|
@ -314,9 +328,7 @@ def create_generate_report_tool(
|
|||
source_content=source_content[:100000], # Cap source content
|
||||
)
|
||||
|
||||
# Call the LLM inline
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
# ── Phase 2: LLM CALL (no DB connection held) ────────────────
|
||||
response = await llm.ainvoke([HumanMessage(content=prompt)])
|
||||
report_content = response.content
|
||||
|
||||
|
|
@ -347,35 +359,41 @@ def create_generate_report_tool(
|
|||
# Extract metadata (includes "status": "ready")
|
||||
metadata = _extract_metadata(report_content)
|
||||
|
||||
# Save to database
|
||||
report = Report(
|
||||
title=topic,
|
||||
content=report_content,
|
||||
report_metadata=metadata,
|
||||
report_style=report_style,
|
||||
search_space_id=search_space_id,
|
||||
thread_id=thread_id,
|
||||
report_group_id=report_group_id, # None for v1, inherited for v2+
|
||||
)
|
||||
db_session.add(report)
|
||||
await db_session.commit()
|
||||
await db_session.refresh(report)
|
||||
# ── Phase 3: WRITE (short-lived session) ─────────────────────
|
||||
# Save the report to the database, then close the session.
|
||||
async with async_session_maker() as write_session:
|
||||
report = Report(
|
||||
title=topic,
|
||||
content=report_content,
|
||||
report_metadata=metadata,
|
||||
report_style=report_style,
|
||||
search_space_id=search_space_id,
|
||||
thread_id=thread_id,
|
||||
report_group_id=report_group_id,
|
||||
)
|
||||
write_session.add(report)
|
||||
await write_session.commit()
|
||||
await write_session.refresh(report)
|
||||
|
||||
# If this is a brand-new report (v1), set report_group_id = own id
|
||||
if not report.report_group_id:
|
||||
report.report_group_id = report.id
|
||||
await db_session.commit()
|
||||
# If this is a brand-new report (v1), set report_group_id = own id
|
||||
if not report.report_group_id:
|
||||
report.report_group_id = report.id
|
||||
await write_session.commit()
|
||||
|
||||
saved_report_id = report.id
|
||||
saved_group_id = report.report_group_id
|
||||
# write_session closed — connection returned to pool
|
||||
|
||||
logger.info(
|
||||
f"[generate_report] Created report {report.id} "
|
||||
f"(group={report.report_group_id}): "
|
||||
f"[generate_report] Created report {saved_report_id} "
|
||||
f"(group={saved_group_id}): "
|
||||
f"{metadata.get('word_count', 0)} words, "
|
||||
f"{metadata.get('section_count', 0)} sections"
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ready",
|
||||
"report_id": report.id,
|
||||
"report_id": saved_report_id,
|
||||
"title": topic,
|
||||
"word_count": metadata.get("word_count", 0),
|
||||
"message": f"Report generated successfully: {topic}",
|
||||
|
|
|
|||
|
|
@ -66,6 +66,62 @@ def _strip_wrapping_code_fences(text: str) -> str:
|
|||
return stripped
|
||||
|
||||
|
||||
def _normalize_latex_delimiters(text: str) -> str:
|
||||
"""Convert all LaTeX math delimiters to dollar-sign form.
|
||||
|
||||
Pandoc's ``tex_math_dollars`` extension (on the ``gfm`` reader) handles
|
||||
``$…$`` and ``$$…$$`` natively. This function converts every other
|
||||
delimiter style that LLMs produce into dollar-sign form so pandoc can
|
||||
parse them as math.
|
||||
|
||||
Supported conversions:
|
||||
\\[…\\] → $$…$$ (display math)
|
||||
\\(…\\) → $…$ (inline math)
|
||||
\\begin{equation}…\\end{equation} → $$…$$ (display math)
|
||||
\\begin{displaymath}…\\end{displaymath}→ $$…$$ (display math)
|
||||
\\begin{math}…\\end{math} → $…$ (inline math)
|
||||
`$$…$$` / `$…$` → strip wrapping backticks
|
||||
"""
|
||||
# 1. Block math: \[...\] → $$...$$
|
||||
text = re.sub(r"\\\[([\s\S]*?)\\\]", lambda m: f"$${m.group(1)}$$", text)
|
||||
# 2. Inline math: \(...\) → $...$
|
||||
text = re.sub(r"\\\(([\s\S]*?)\\\)", lambda m: f"${m.group(1)}$", text)
|
||||
# 3. \begin{equation}...\end{equation} → $$...$$
|
||||
text = re.sub(
|
||||
r"\\begin\{equation\}([\s\S]*?)\\end\{equation\}",
|
||||
lambda m: f"$${m.group(1)}$$",
|
||||
text,
|
||||
)
|
||||
# 4. \begin{displaymath}...\end{displaymath} → $$...$$
|
||||
text = re.sub(
|
||||
r"\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}",
|
||||
lambda m: f"$${m.group(1)}$$",
|
||||
text,
|
||||
)
|
||||
# 5. \begin{math}...\end{math} → $...$
|
||||
text = re.sub(
|
||||
r"\\begin\{math\}([\s\S]*?)\\end\{math\}",
|
||||
lambda m: f"${m.group(1)}$",
|
||||
text,
|
||||
)
|
||||
# 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
||||
text = re.sub(r"`(\${1,2})((?:(?!\1).)+)\1`", r"\1\2\1", text)
|
||||
|
||||
# 7. Trim whitespace inside inline math $...$.
|
||||
# Pandoc's tex_math_dollars requires NO space after the opening $ and
|
||||
# NO space before the closing $. LLMs frequently produce "$ e^x $"
|
||||
# or "\( e^x \)" (which step 2 converts to "$ e^x $"). Without
|
||||
# trimming, pandoc treats these as literal dollar-sign text.
|
||||
# We require spaces on BOTH sides to avoid false-positives on
|
||||
# currency like "$50" or "$50 and $100".
|
||||
def _trim_inline_math(m: re.Match) -> str:
|
||||
inner = m.group(1).strip()
|
||||
return f"${inner}$" if inner else m.group(0)
|
||||
|
||||
text = re.sub(r"(?<!\$)\$(?!\$) +(.+?) +\$(?!\$)", _trim_inline_math, text)
|
||||
return text
|
||||
|
||||
|
||||
async def _get_report_with_access(
|
||||
report_id: int,
|
||||
session: AsyncSession,
|
||||
|
|
@ -227,6 +283,10 @@ async def export_report(
|
|||
# Without this, pandoc treats the entire content as a code block.
|
||||
markdown_content = _strip_wrapping_code_fences(report.content)
|
||||
|
||||
# Normalise all LaTeX math delimiters (\(\), \[\], \begin{equation},
|
||||
# etc.) into $/$$ form that pandoc's tex_math_dollars extension can parse.
|
||||
markdown_content = _normalize_latex_delimiters(markdown_content)
|
||||
|
||||
# Convert Markdown to the requested format.
|
||||
#
|
||||
# DOCX: pypandoc (pandoc) handles the full conversion directly.
|
||||
|
|
@ -237,8 +297,9 @@ async def export_report(
|
|||
# bundles the compiler as a native extension. Typst produces
|
||||
# professional styling for tables, headings, code blocks, etc.
|
||||
#
|
||||
# Use "gfm" as the input format because LLM output uses GFM-style
|
||||
# Use "gfm" as the base input format because LLM output uses GFM-style
|
||||
# pipe tables that pandoc's stricter default "markdown" may mangle.
|
||||
# The +tex_math_dollars extension enables $/$$ math recognition.
|
||||
|
||||
def _convert_and_read() -> bytes:
|
||||
"""Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread."""
|
||||
|
|
@ -253,7 +314,7 @@ async def export_report(
|
|||
typst_markup: str = pypandoc.convert_text(
|
||||
markdown_content,
|
||||
"typst",
|
||||
format="gfm",
|
||||
format="gfm+tex_math_dollars",
|
||||
extra_args=[
|
||||
"--standalone",
|
||||
"-V",
|
||||
|
|
@ -273,7 +334,7 @@ async def export_report(
|
|||
pypandoc.convert_text(
|
||||
markdown_content,
|
||||
format.value,
|
||||
format="gfm",
|
||||
format="gfm+tex_math_dollars",
|
||||
extra_args=["--standalone"],
|
||||
outputfile=tmp_path,
|
||||
)
|
||||
|
|
|
|||
6461
surfsense_backend/uv.lock
generated
6461
surfsense_backend/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -11,10 +11,45 @@ import {
|
|||
import { CheckIcon, CopyIcon } from "lucide-react";
|
||||
import { type FC, memo, type ReactNode, useState } from "react";
|
||||
import remarkGfm from "remark-gfm";
|
||||
import remarkMath from "remark-math";
|
||||
import rehypeKatex from "rehype-katex";
|
||||
import "katex/dist/katex.min.css";
|
||||
import { InlineCitation } from "@/components/assistant-ui/inline-citation";
|
||||
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
/**
|
||||
* Convert all LaTeX delimiter styles to the dollar-sign syntax
|
||||
* that remark-math understands. LLMs use various delimiters
|
||||
* (\(...\), \[...\], \begin{equation}, etc.) and we need to
|
||||
* normalise them all to $ / $$ before the markdown parser runs.
|
||||
*/
|
||||
function convertLatexDelimiters(content: string): string {
|
||||
// 1. Block math: \[...\] → $$...$$
|
||||
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `$$${inner}$$`);
|
||||
// 2. Inline math: \(...\) → $...$
|
||||
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$${inner}$`);
|
||||
// 3. Block: \begin{equation}...\end{equation} → $$...$$
|
||||
content = content.replace(
|
||||
/\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g,
|
||||
(_, inner) => `$$${inner}$$`
|
||||
);
|
||||
// 4. Block: \begin{displaymath}...\end{displaymath} → $$...$$
|
||||
content = content.replace(
|
||||
/\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g,
|
||||
(_, inner) => `$$${inner}$$`
|
||||
);
|
||||
// 5. Inline: \begin{math}...\end{math} → $...$
|
||||
content = content.replace(/\\begin\{math\}([\s\S]*?)\\end\{math\}/g, (_, inner) => `$${inner}$`);
|
||||
// 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
||||
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
|
||||
|
||||
// Ensure markdown headings (## ...) always start on their own line.
|
||||
content = content.replace(/([^\n])(#{1,6}\s)/g, "$1\n\n$2");
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
// Citation pattern: [citation:CHUNK_ID] or [citation:doc-CHUNK_ID]
|
||||
// Also matches Chinese brackets 【】 and handles zero-width spaces that LLM sometimes inserts
|
||||
const CITATION_REGEX = /[[【]\u200B?citation:(doc-)?(\d+)\u200B?[\]】]/g;
|
||||
|
|
@ -59,7 +94,8 @@ function parseTextWithCitations(text: string): ReactNode[] {
|
|||
// Reset regex state
|
||||
CITATION_REGEX.lastIndex = 0;
|
||||
|
||||
while ((match = CITATION_REGEX.exec(text)) !== null) {
|
||||
match = CITATION_REGEX.exec(text);
|
||||
while (match !== null) {
|
||||
// Add text before the citation
|
||||
if (match.index > lastIndex) {
|
||||
parts.push(text.substring(lastIndex, match.index));
|
||||
|
|
@ -80,6 +116,7 @@ function parseTextWithCitations(text: string): ReactNode[] {
|
|||
|
||||
lastIndex = match.index + match[0].length;
|
||||
instanceIndex++;
|
||||
match = CITATION_REGEX.exec(text);
|
||||
}
|
||||
|
||||
// Add any remaining text after the last citation
|
||||
|
|
@ -93,9 +130,11 @@ function parseTextWithCitations(text: string): ReactNode[] {
|
|||
const MarkdownTextImpl = () => {
|
||||
return (
|
||||
<MarkdownTextPrimitive
|
||||
remarkPlugins={[remarkGfm]}
|
||||
remarkPlugins={[remarkGfm, remarkMath]}
|
||||
rehypePlugins={[rehypeKatex]}
|
||||
className="aui-md"
|
||||
components={defaultComponents}
|
||||
preprocess={convertLatexDelimiters}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -29,33 +29,53 @@ function stripOuterMarkdownFence(content: string): string {
|
|||
}
|
||||
|
||||
/**
|
||||
* Convert various LaTeX delimiter styles to the dollar-sign syntax
|
||||
* that remark-math understands, and normalise edge-cases that
|
||||
* commonly appear in LLM-generated markdown.
|
||||
* Convert all LaTeX delimiter styles to the double-dollar syntax
|
||||
* that Streamdown's @streamdown/math plugin understands.
|
||||
*
|
||||
* \[...\] → $$ ... $$ (block / display math)
|
||||
* \(...\) → $ ... $ (inline math)
|
||||
* same-line $$…$$ → $ ... $ (inline math — display math
|
||||
* can't live inside table cells)
|
||||
* `$$ … $$` → $$ … $$ (strip wrapping backtick code)
|
||||
* `$ … $` → $ … $ (strip wrapping backtick code)
|
||||
* Streamdown math conventions (different from remark-math!):
|
||||
* $$...$$ on the SAME line → inline math
|
||||
* $$\n...\n$$ on SEPARATE lines → block (display) math
|
||||
*
|
||||
* Conversions performed:
|
||||
* \[...\] → $$\n ... \n$$ (block math)
|
||||
* \(...\) → $$...$$ (inline math, same line)
|
||||
* \begin{equation}...\end{equation} → $$\n ... \n$$ (block math)
|
||||
* \begin{displaymath}...\end{displaymath} → $$\n ... \n$$ (block math)
|
||||
* \begin{math}...\end{math} → $$...$$ (inline math, same line)
|
||||
* `$$ … $$` → $$ … $$ (strip wrapping backtick code)
|
||||
* `$ … $` → $ … $ (strip wrapping backtick code)
|
||||
* $...$ → $$...$$ (normalise single-$ to double-$$)
|
||||
*/
|
||||
function convertLatexDelimiters(content: string): string {
|
||||
// 1. Block math: \[...\] → $$...$$
|
||||
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_match, inner) => {
|
||||
return `$$${inner}$$`;
|
||||
});
|
||||
// 2. Inline math: \(...\) → $...$
|
||||
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_match, inner) => {
|
||||
return `$${inner}$`;
|
||||
});
|
||||
// 3. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
||||
// 1. Block math: \[...\] → $$\n...\n$$ (display math on separate lines)
|
||||
content = content.replace(/\\\[([\s\S]*?)\\\]/g, (_, inner) => `\n$$\n${inner.trim()}\n$$\n`);
|
||||
// 2. Inline math: \(...\) → $$...$$ (inline math on same line)
|
||||
content = content.replace(/\\\(([\s\S]*?)\\\)/g, (_, inner) => `$$${inner.trim()}$$`);
|
||||
// 3. Block: \begin{equation}...\end{equation} → $$\n...\n$$
|
||||
content = content.replace(
|
||||
/\\begin\{equation\}([\s\S]*?)\\end\{equation\}/g,
|
||||
(_, inner) => `\n$$\n${inner.trim()}\n$$\n`
|
||||
);
|
||||
// 4. Block: \begin{displaymath}...\end{displaymath} → $$\n...\n$$
|
||||
content = content.replace(
|
||||
/\\begin\{displaymath\}([\s\S]*?)\\end\{displaymath\}/g,
|
||||
(_, inner) => `\n$$\n${inner.trim()}\n$$\n`
|
||||
);
|
||||
// 5. Inline: \begin{math}...\end{math} → $$...$$
|
||||
content = content.replace(
|
||||
/\\begin\{math\}([\s\S]*?)\\end\{math\}/g,
|
||||
(_, inner) => `$$${inner.trim()}$$`
|
||||
);
|
||||
// 6. Strip backtick wrapping around math: `$$...$$` → $$...$$ and `$...$` → $...$
|
||||
content = content.replace(/`(\${1,2})((?:(?!\1).)+)\1`/g, "$1$2$1");
|
||||
// 4. Same-line $$...$$ → $...$ (inline math) so it works inside table cells.
|
||||
// True display math has $$ on its own line, so this only affects inline usage.
|
||||
content = content.replace(/\$\$([^\n]+?)\$\$/g, (_match, inner) => {
|
||||
return `$${inner}$`;
|
||||
});
|
||||
// 7. Normalise single-dollar $...$ to double-dollar $$...$$ so they render
|
||||
// reliably in Streamdown (single-$ has strict no-space rules that often fail).
|
||||
// We match $…$ where the content starts with a backslash (LaTeX command)
|
||||
// to avoid converting currency like $50.
|
||||
content = content.replace(
|
||||
/(?<!\$)\$(?!\$)(\\[a-zA-Z][\s\S]*?)(?<!\$)\$(?!\$)/g,
|
||||
(_, inner) => `$$${inner.trim()}$$`
|
||||
);
|
||||
return content;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ import {
|
|||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
} from "@/components/ui/dropdown-menu";
|
||||
import { Spinner } from "@/components/ui/spinner";
|
||||
import { useMediaQuery } from "@/hooks/use-media-query";
|
||||
import { baseApiService } from "@/lib/apis/base-api.service";
|
||||
import { authenticatedFetch } from "@/lib/auth-utils";
|
||||
|
|
@ -298,14 +297,12 @@ function ReportPanelContent({
|
|||
onClick={() => handleExport("pdf")}
|
||||
disabled={exporting !== null}
|
||||
>
|
||||
{exporting === "pdf" && <Spinner size="xs" />}
|
||||
Download PDF
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem
|
||||
onClick={() => handleExport("docx")}
|
||||
disabled={exporting !== null}
|
||||
>
|
||||
{exporting === "docx" && <Spinner size="xs" />}
|
||||
Download DOCX
|
||||
</DropdownMenuItem>
|
||||
</>
|
||||
|
|
|
|||
|
|
@ -99,9 +99,11 @@
|
|||
"react-json-view-lite": "^2.4.1",
|
||||
"react-syntax-highlighter": "^15.6.1",
|
||||
"react-wrap-balancer": "^1.1.1",
|
||||
"rehype-katex": "^7.0.1",
|
||||
"rehype-raw": "^7.0.0",
|
||||
"rehype-sanitize": "^6.0.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-math": "^6.0.0",
|
||||
"server-only": "^0.0.1",
|
||||
"sonner": "^2.0.6",
|
||||
"streamdown": "^2.2.0",
|
||||
|
|
|
|||
6
surfsense_web/pnpm-lock.yaml
generated
6
surfsense_web/pnpm-lock.yaml
generated
|
|
@ -242,6 +242,9 @@ importers:
|
|||
react-wrap-balancer:
|
||||
specifier: ^1.1.1
|
||||
version: 1.1.1(react@19.2.3)
|
||||
rehype-katex:
|
||||
specifier: ^7.0.1
|
||||
version: 7.0.1
|
||||
rehype-raw:
|
||||
specifier: ^7.0.0
|
||||
version: 7.0.0
|
||||
|
|
@ -251,6 +254,9 @@ importers:
|
|||
remark-gfm:
|
||||
specifier: ^4.0.1
|
||||
version: 4.0.1
|
||||
remark-math:
|
||||
specifier: ^6.0.0
|
||||
version: 6.0.0
|
||||
server-only:
|
||||
specifier: ^0.0.1
|
||||
version: 0.0.1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue