feat: expand report export functionality to support multiple formats (PDF, DOCX, HTML, EPUB, etc.) and enhance UI for format selection

2026-07-20 23:21:06 +02:00 · 2026-03-09 18:31:14 -07:00 · 2026-03-09 18:31:14 -07:00 · 11b84a3fb4
commit 11b84a3fb4
parent ddb070bca8
10 changed files with 5375 additions and 4302 deletions
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -12,4 +12,5 @@ celerybeat-schedule*
 celerybeat-schedule.*
 celerybeat-schedule.dir
 celerybeat-schedule.bak
-global_llm_config.yaml
+global_llm_config.yaml
+app/templates/_generated/
--- a/surfsense_backend/app/routes/reports_routes.py
+++ b/surfsense_backend/app/routes/reports_routes.py
@ -4,8 +4,9 @@ Report routes for read, update, export (PDF/DOCX), and delete operations.
 Reports are generated inline by the agent tool during chat and stored as
 Markdown in the database.  Users can edit report content via the Plate editor
 and save changes through the PUT endpoint.
-Export to PDF/DOCX is on-demand — PDF uses pypandoc (Markdown→Typst) + typst-py
-(Typst→PDF); DOCX uses pypandoc directly.
+Export is on-demand in multiple formats (PDF, DOCX, HTML, PPTX, LaTeX, EPUB,
+ODT, plain text).  PDF uses pypandoc (Markdown->Typst) + typst-py; the rest
+use pypandoc directly with format-specific templates and options.

 Authorization: lightweight search-space membership checks (no granular RBAC)
 since reports are chat-generated artifacts, not standalone managed resources.
@ -36,6 +37,11 @@ from app.db import (
 )
 from app.schemas import ReportContentRead, ReportContentUpdate, ReportRead
 from app.schemas.reports import ReportVersionInfo
+from app.templates.export_helpers import (
+    get_html_css_path,
+    get_reference_docx_path,
+    get_typst_template_path,
+)
 from app.users import current_active_user
 from app.utils.rbac import check_search_space_access

@ -49,6 +55,32 @@ MAX_REPORT_LIST_LIMIT = 500
 class ExportFormat(StrEnum):
    PDF = "pdf"
    DOCX = "docx"
+    HTML = "html"
+    LATEX = "latex"
+    EPUB = "epub"
+    ODT = "odt"
+    PLAIN = "plain"
+
+
+_MEDIA_TYPES: dict[ExportFormat, str] = {
+    ExportFormat.PDF: "application/pdf",
+    ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    ExportFormat.HTML: "text/html; charset=utf-8",
+    ExportFormat.LATEX: "application/x-tex",
+    ExportFormat.EPUB: "application/epub+zip",
+    ExportFormat.ODT: "application/vnd.oasis.opendocument.text",
+    ExportFormat.PLAIN: "text/plain; charset=utf-8",
+}
+
+_FILE_EXTENSIONS: dict[ExportFormat, str] = {
+    ExportFormat.PDF: "pdf",
+    ExportFormat.DOCX: "docx",
+    ExportFormat.HTML: "html",
+    ExportFormat.LATEX: "tex",
+    ExportFormat.EPUB: "epub",
+    ExportFormat.ODT: "odt",
+    ExportFormat.PLAIN: "txt",
+}


 # ---------------------------------------------------------------------------
@ -305,13 +337,14 @@ async def update_report_content(
 async def export_report(
    report_id: int,
    format: ExportFormat = Query(
-        ExportFormat.PDF, description="Export format: pdf or docx"
+        ExportFormat.PDF,
+        description="Export format: pdf, docx, html, pptx, latex, epub, odt, or plain",
    ),
    session: AsyncSession = Depends(get_async_session),
    user: User = Depends(current_active_user),
 ):
    """
-    Export a report as PDF or DOCX.
+    Export a report in the requested format.
    """
    try:
        report = await _get_report_with_access(report_id, session, user)
@ -329,83 +362,124 @@ async def export_report(
        # etc.) into $/$$ form that pandoc's tex_math_dollars extension can parse.
        markdown_content = _normalize_latex_delimiters(markdown_content)

-        # Convert Markdown to the requested format.
-        #
-        # DOCX: pypandoc (pandoc) handles the full conversion directly.
-        #
-        # PDF: two-step pipeline — pypandoc converts Markdown → Typst markup,
-        # then the `typst` Python library compiles Typst → PDF.  This avoids
-        # requiring the Typst CLI on the system PATH; the typst pip package
-        # bundles the compiler as a native extension.  Typst produces
-        # professional styling for tables, headings, code blocks, etc.
-        #
        # Use "gfm" as the base input format because LLM output uses GFM-style
        # pipe tables that pandoc's stricter default "markdown" may mangle.
        # The +tex_math_dollars extension enables $/$$ math recognition.

+        formatted_date = report.created_at.strftime("%B %d, %Y")
+        report_title = report.title or "Report"
+        input_fmt = "gfm+tex_math_dollars"
+        meta_args = ["-M", f"title:{report_title}", "-M", f"date:{formatted_date}"]
+
        def _convert_and_read() -> bytes:
-            """Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread."""
+            """Run all blocking I/O in a thread."""
+
+            # -- PDF: Markdown -> Typst markup -> typst-py -> PDF bytes ------
            if format == ExportFormat.PDF:
-                # Step 1: Markdown → Typst markup via pandoc.
-                # We must set mainfont / monofont so the generated template's
-                # `font` parameter is non-empty; without it pandoc emits
-                # `font: ()` which makes Typst error with
-                # "font fallback list must not be empty".
-                # We use fonts that ship embedded inside typst-py so this
-                # works even on systems with no fonts installed.
+                typst_template = str(get_typst_template_path())
                typst_markup: str = pypandoc.convert_text(
                    markdown_content,
                    "typst",
-                    format="gfm+tex_math_dollars",
+                    format=input_fmt,
                    extra_args=[
                        "--standalone",
+                        f"--template={typst_template}",
                        "-V",
                        "mainfont:Libertinus Serif",
                        "-V",
-                        "monofont:DejaVu Sans Mono",
+                        "codefont:DejaVu Sans Mono",
+                        *meta_args,
                    ],
                )
-                # Step 2: Typst markup → PDF via typst Python library
-                pdf_bytes: bytes = typst.compile(typst_markup.encode("utf-8"))
-                return pdf_bytes
-            else:
-                # DOCX: let pandoc handle the full conversion
-                fd, tmp_path = tempfile.mkstemp(suffix=f".{format.value}")
-                os.close(fd)
-                try:
-                    pypandoc.convert_text(
-                        markdown_content,
-                        format.value,
-                        format="gfm+tex_math_dollars",
-                        extra_args=["--standalone"],
-                        outputfile=tmp_path,
-                    )
-                    with open(tmp_path, "rb") as f:
-                        return f.read()
-                finally:
-                    os.unlink(tmp_path)
+                return typst.compile(typst_markup.encode("utf-8"))
+
+            # -- DOCX: styled reference doc ----------------------------------
+            if format == ExportFormat.DOCX:
+                return _pandoc_to_tempfile(
+                    format.value,
+                    [
+                        "--standalone",
+                        f"--reference-doc={get_reference_docx_path()}",
+                        *meta_args,
+                    ],
+                )
+
+            # -- HTML: self-contained with custom CSS ------------------------
+            if format == ExportFormat.HTML:
+                html_str: str = pypandoc.convert_text(
+                    markdown_content,
+                    "html5",
+                    format=input_fmt,
+                    extra_args=[
+                        "--standalone",
+                        "--embed-resources",
+                        f"--css={get_html_css_path()}",
+                        "--syntax-highlighting=pygments",
+                        *meta_args,
+                    ],
+                )
+                return html_str.encode("utf-8")
+
+            # -- EPUB: binary output via tempfile ----------------------------
+            if format == ExportFormat.EPUB:
+                return _pandoc_to_tempfile("epub3", ["--standalone", *meta_args])
+
+            # -- ODT: binary output via tempfile -----------------------------
+            if format == ExportFormat.ODT:
+                return _pandoc_to_tempfile("odt", ["--standalone", *meta_args])
+
+            # -- LaTeX: text output ------------------------------------------
+            if format == ExportFormat.LATEX:
+                tex_str: str = pypandoc.convert_text(
+                    markdown_content,
+                    "latex",
+                    format=input_fmt,
+                    extra_args=["--standalone", *meta_args],
+                )
+                return tex_str.encode("utf-8")
+
+            # -- Plain text: text output -------------------------------------
+            plain_str: str = pypandoc.convert_text(
+                markdown_content,
+                "plain",
+                format=input_fmt,
+                extra_args=["--wrap=auto", "--columns=80"],
+            )
+            return plain_str.encode("utf-8")
+
+        def _pandoc_to_tempfile(output_format: str, extra_args: list[str]) -> bytes:
+            """Convert via pandoc to a binary format using a temp file."""
+            fd, tmp_path = tempfile.mkstemp(suffix=f".{output_format}")
+            os.close(fd)
+            try:
+                pypandoc.convert_text(
+                    markdown_content,
+                    output_format,
+                    format=input_fmt,
+                    extra_args=extra_args,
+                    outputfile=tmp_path,
+                )
+                with open(tmp_path, "rb") as f:
+                    return f.read()
+            finally:
+                os.unlink(tmp_path)

        loop = asyncio.get_running_loop()
        output = await loop.run_in_executor(None, _convert_and_read)

-        # Sanitize filename
        safe_title = (
            "".join(
                c if c.isalnum() or c in " -_" else "_" for c in report.title
            ).strip()[:80]
            or "report"
        )
-
-        media_types = {
-            ExportFormat.PDF: "application/pdf",
-            ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        }
+        ext = _FILE_EXTENSIONS[format]

        return StreamingResponse(
            io.BytesIO(output),
-            media_type=media_types[format],
+            media_type=_MEDIA_TYPES[format],
            headers={
-                "Content-Disposition": f'attachment; filename="{safe_title}.{format.value}"',
+                "Content-Disposition": f'attachment; filename="{safe_title}.{ext}"',
            },
        )

--- a/surfsense_backend/app/routes/youtube_routes.py
+++ b/surfsense_backend/app/routes/youtube_routes.py
@ -50,9 +50,7 @@ async def get_playlist_videos(
                detail="No videos found in the playlist. It may be private or empty.",
            )

-        video_urls = [
-            f"https://www.youtube.com/watch?v={vid}" for vid in video_ids
-        ]
+        video_urls = [f"https://www.youtube.com/watch?v={vid}" for vid in video_ids]
        return {"video_urls": video_urls, "count": len(video_urls)}

    except HTTPException:
@ -74,12 +72,15 @@ async def _fetch_playlist_via_innertube(playlist_id: str) -> list[str]:
    proxies = get_requests_proxies()

    try:
-        async with aiohttp.ClientSession() as session, session.post(
-            _INNERTUBE_API_URL,
-            json=payload,
-            headers={"Content-Type": "application/json"},
-            proxy=proxies["http"] if proxies else None,
-        ) as response:
+        async with (
+            aiohttp.ClientSession() as session,
+            session.post(
+                _INNERTUBE_API_URL,
+                json=payload,
+                headers={"Content-Type": "application/json"},
+                proxy=proxies["http"] if proxies else None,
+            ) as response,
+        ):
            if response.status != 200:
                logger.warning(
                    "Innertube API returned %d for playlist %s",
@ -118,14 +119,14 @@ async def _fetch_playlist_via_html(playlist_id: str) -> list[str]:
                proxy=proxies["http"] if proxies else None,
            ) as response,
        ):
-                if response.status != 200:
-                    logger.warning(
-                        "HTML fallback returned %d for playlist %s",
-                        response.status,
-                        playlist_id,
-                    )
-                    return []
-                html = await response.text()
+            if response.status != 200:
+                logger.warning(
+                    "HTML fallback returned %d for playlist %s",
+                    response.status,
+                    playlist_id,
+                )
+                return []
+            html = await response.text()

        yt_data = _extract_yt_initial_data(html)
        if not yt_data:
--- a/surfsense_backend/app/templates/init.py
+++ b/surfsense_backend/app/templates/init.py
--- a/surfsense_backend/app/templates/export_helpers.py
+++ b/surfsense_backend/app/templates/export_helpers.py
@ -0,0 +1,323 @@
+"""
+Helpers for report export templates.
+
+* ``get_typst_template_path()``  - path to the custom Pandoc -> Typst template.
+* ``get_html_css_path()``        - path to the CSS stylesheet for HTML exports.
+* ``get_reference_docx_path()``  - path to a styled reference.docx for Pandoc.
+* ``get_reference_pptx_path()``  - path to a styled reference.pptx for Pandoc.
+
+The reference DOCX is generated lazily on first call from Pandoc's built-in
+default, then restyled with *python-docx* and cached on disk so subsequent
+exports are instant.
+"""
+
+from __future__ import annotations
+
+import subprocess
+import threading
+from pathlib import Path
+
+_DIR = Path(__file__).resolve().parent
+_GENERATED_DIR = _DIR / "_generated"
+_REFERENCE_DOCX = _GENERATED_DIR / "reference.docx"
+_REFERENCE_PPTX = _GENERATED_DIR / "reference.pptx"
+_TYPST_TEMPLATE = _DIR / "report_pdf.typst"
+_HTML_CSS = _DIR / "report_html.css"
+
+_docx_lock = threading.Lock()
+_pptx_lock = threading.Lock()
+
+
+def get_typst_template_path() -> Path:
+    return _TYPST_TEMPLATE
+
+
+def get_html_css_path() -> Path:
+    return _HTML_CSS
+
+
+def get_reference_pptx_path() -> Path:
+    """Return path to the styled reference.pptx, creating it if absent."""
+    if _REFERENCE_PPTX.exists():
+        return _REFERENCE_PPTX
+    with _pptx_lock:
+        if _REFERENCE_PPTX.exists():
+            return _REFERENCE_PPTX
+        _generate_reference_pptx()
+    return _REFERENCE_PPTX
+
+
+def get_reference_docx_path() -> Path:
+    """Return path to the styled reference.docx, creating it if absent."""
+    if _REFERENCE_DOCX.exists():
+        return _REFERENCE_DOCX
+    with _docx_lock:
+        if _REFERENCE_DOCX.exists():
+            return _REFERENCE_DOCX
+        _generate_reference_docx()
+    return _REFERENCE_DOCX
+
+
+# ---------------------------------------------------------------------------
+# Reference DOCX generation
+# ---------------------------------------------------------------------------
+
+_HEADING_COLOR_RGB = (0x1E, 0x29, 0x3B)  # Slate-900
+_ACCENT_RGB = (0x25, 0x63, 0xEB)  # Blue-600
+
+
+def _generate_reference_docx() -> None:
+    """Build a professional reference.docx from Pandoc's default + restyling."""
+    import pypandoc
+    from docx import Document
+    from docx.enum.text import WD_ALIGN_PARAGRAPH
+    from docx.shared import Inches, Pt, RGBColor
+
+    _GENERATED_DIR.mkdir(parents=True, exist_ok=True)
+
+    # Step 1 - extract Pandoc's built-in reference.docx (contains all the
+    # style names that Pandoc maps its output to).
+    pandoc_bin = pypandoc.get_pandoc_path()
+    result = subprocess.run(
+        [pandoc_bin, "--print-default-data-file", "reference.docx"],
+        capture_output=True,
+        check=True,
+    )
+    _REFERENCE_DOCX.write_bytes(result.stdout)
+
+    # Step 2 - open and restyle
+    doc = Document(str(_REFERENCE_DOCX))
+
+    heading_color = RGBColor(*_HEADING_COLOR_RGB)
+    accent_color = RGBColor(*_ACCENT_RGB)
+
+    # -- Page setup ----------------------------------------------------------
+    for section in doc.sections:
+        section.page_width = Inches(8.5)
+        section.page_height = Inches(11)
+        section.left_margin = Inches(1.25)
+        section.right_margin = Inches(1.25)
+        section.top_margin = Inches(1)
+        section.bottom_margin = Inches(1)
+        _add_page_number_footer(section)
+
+    # -- Restyle existing styles ---------------------------------------------
+    heading_sizes = {1: 24, 2: 18, 3: 14, 4: 12, 5: 11, 6: 11}
+
+    for style in doc.styles:
+        name = style.name or ""
+
+        # Normal / body text
+        if name in ("Normal", "Body Text", "First Paragraph"):
+            style.font.name = "Calibri"
+            style.font.size = Pt(11)
+            pf = style.paragraph_format
+            pf.space_after = Pt(6)
+            pf.space_before = Pt(0)
+            pf.line_spacing = 1.15
+            if name == "First Paragraph":
+                pf.space_before = Pt(2)
+
+        # Headings 1-6
+        elif name.startswith("Heading") and name[-1:].isdigit():
+            level = int(name[-1])
+            style.font.name = "Calibri"
+            style.font.bold = True
+            style.font.color.rgb = heading_color
+            style.font.size = Pt(heading_sizes.get(level, 11))
+            pf = style.paragraph_format
+            pf.space_before = Pt(18 if level <= 2 else 12)
+            pf.space_after = Pt(6)
+            pf.keep_with_next = True
+            if level >= 4:
+                style.font.bold = False
+                style.font.italic = True
+
+        # Source Code (code blocks)
+        elif name == "Source Code":
+            style.font.name = "Consolas"
+            style.font.size = Pt(9.5)
+            pf = style.paragraph_format
+            pf.space_before = Pt(4)
+            pf.space_after = Pt(4)
+            pf.line_spacing = 1.0
+            _set_paragraph_shading(pf, "F8FAFC")
+
+        # Verbatim Char (inline code)
+        elif name == "Verbatim Char":
+            style.font.name = "Consolas"
+            style.font.size = Pt(9.5)
+
+        # Block Text (block quotes)
+        elif name == "Block Text":
+            style.font.italic = True
+            style.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
+            pf = style.paragraph_format
+            pf.left_indent = Inches(0.4)
+            pf.space_before = Pt(6)
+            pf.space_after = Pt(6)
+
+        # Hyperlink
+        elif name == "Hyperlink":
+            style.font.color.rgb = accent_color
+            style.font.underline = True
+
+        # Compact (tight lists)
+        elif name == "Compact":
+            style.font.name = "Calibri"
+            style.font.size = Pt(11)
+            if style.paragraph_format:
+                style.paragraph_format.space_after = Pt(2)
+
+        # Title
+        elif name == "Title":
+            style.font.name = "Calibri"
+            style.font.size = Pt(28)
+            style.font.bold = True
+            style.font.color.rgb = heading_color
+            pf = style.paragraph_format
+            pf.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            pf.space_after = Pt(4)
+
+        # Subtitle
+        elif name == "Subtitle":
+            style.font.name = "Calibri"
+            style.font.size = Pt(14)
+            style.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
+            pf = style.paragraph_format
+            pf.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            pf.space_after = Pt(12)
+
+        # Date
+        elif name == "Date":
+            style.font.name = "Calibri"
+            style.font.size = Pt(11)
+            style.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
+            pf = style.paragraph_format
+            pf.alignment = WD_ALIGN_PARAGRAPH.CENTER
+
+    doc.save(str(_REFERENCE_DOCX))
+
+
+# ---------------------------------------------------------------------------
+# Reference PPTX generation
+# ---------------------------------------------------------------------------
+
+
+def _generate_reference_pptx() -> None:
+    """Build a reference.pptx with smaller fonts for report-to-slide conversion."""
+    import pypandoc
+    from lxml import etree
+
+    _GENERATED_DIR.mkdir(parents=True, exist_ok=True)
+
+    pandoc_bin = pypandoc.get_pandoc_path()
+    result = subprocess.run(
+        [pandoc_bin, "--print-default-data-file", "reference.pptx"],
+        capture_output=True,
+        check=True,
+    )
+    _REFERENCE_PPTX.write_bytes(result.stdout)
+
+    from pptx import Presentation
+
+    prs = Presentation(str(_REFERENCE_PPTX))
+    master = prs.slide_masters[0]
+
+    ns = {
+        "a": "http://schemas.openxmlformats.org/drawingml/2006/main",
+        "p": "http://schemas.openxmlformats.org/presentationml/2006/main",
+    }
+
+    # Shrink body text: 24pt -> 16pt base, scaling down per level
+    body_sizes = [1600, 1400, 1300, 1200, 1100, 1100, 1100, 1100, 1100]
+    body_style = master._element.find(".//p:txStyles/p:bodyStyle", ns)
+    if body_style is not None:
+        for lvl_el in body_style:
+            tag = etree.QName(lvl_el).localname
+            if tag.startswith("lvl") and tag.endswith("pPr"):
+                idx = int(tag[3]) - 1
+                def_rpr = lvl_el.find("a:defRPr", ns)
+                if def_rpr is not None and idx < len(body_sizes):
+                    def_rpr.set("sz", str(body_sizes[idx]))
+
+    # Shrink title: 33pt -> 26pt
+    title_style = master._element.find(".//p:txStyles/p:titleStyle", ns)
+    if title_style is not None:
+        for lvl_el in title_style:
+            def_rpr = lvl_el.find("a:defRPr", ns)
+            if def_rpr is not None:
+                def_rpr.set("sz", "2600")
+
+    prs.save(str(_REFERENCE_PPTX))
+
+
+# ---------------------------------------------------------------------------
+# OOXML helpers
+# ---------------------------------------------------------------------------
+
+
+def _set_paragraph_shading(paragraph_format, hex_color: str) -> None:
+    """Apply background shading to a paragraph style via raw OOXML."""
+    from docx.oxml import OxmlElement
+    from docx.oxml.ns import qn
+
+    ppr = paragraph_format._element if hasattr(paragraph_format, "_element") else None
+    if ppr is None:
+        return
+    shd = OxmlElement("w:shd")
+    shd.set(qn("w:val"), "clear")
+    shd.set(qn("w:color"), "auto")
+    shd.set(qn("w:fill"), hex_color)
+    ppr.append(shd)
+
+
+def _add_page_number_footer(section) -> None:
+    """Add a centered page number to the section footer via OOXML."""
+    from docx.oxml import OxmlElement
+    from docx.oxml.ns import qn
+
+    footer = section.footer
+    footer.is_linked_to_previous = False
+    p = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
+    p.alignment = 1  # CENTER
+
+    run = p.add_run()
+    rpr = OxmlElement("w:rPr")
+    rsz = OxmlElement("w:sz")
+    rsz.set(qn("w:val"), "18")  # 9pt
+    rpr.append(rsz)
+    rcolor = OxmlElement("w:color")
+    rcolor.set(qn("w:val"), "64748B")
+    rpr.append(rcolor)
+    run._element.append(rpr)
+
+    run.add_text("Page ")
+
+    fld_char_begin = OxmlElement("w:fldChar")
+    fld_char_begin.set(qn("w:fldCharType"), "begin")
+    run._element.append(fld_char_begin)
+
+    instr = OxmlElement("w:instrText")
+    instr.set(qn("xml:space"), "preserve")
+    instr.text = " PAGE "
+    run._element.append(instr)
+
+    fld_char_end = OxmlElement("w:fldChar")
+    fld_char_end.set(qn("w:fldCharType"), "end")
+    run._element.append(fld_char_end)
+
+    run.add_text(" of ")
+
+    fld_char_begin2 = OxmlElement("w:fldChar")
+    fld_char_begin2.set(qn("w:fldCharType"), "begin")
+    run._element.append(fld_char_begin2)
+
+    instr2 = OxmlElement("w:instrText")
+    instr2.set(qn("xml:space"), "preserve")
+    instr2.text = " NUMPAGES "
+    run._element.append(instr2)
+
+    fld_char_end2 = OxmlElement("w:fldChar")
+    fld_char_end2.set(qn("w:fldCharType"), "end")
+    run._element.append(fld_char_end2)
--- a/surfsense_backend/app/templates/report_html.css
+++ b/surfsense_backend/app/templates/report_html.css
@ -0,0 +1,289 @@
+/* SurfSense - Professional HTML Report Stylesheet */
+
+:root {
+  --accent: #2563eb;
+  --heading: #1e293b;
+  --muted: #64748b;
+  --border: #e2e8f0;
+  --subtle-bg: #f8fafc;
+  --body-text: #334155;
+}
+
+*,
+*::before,
+*::after {
+  box-sizing: border-box;
+}
+
+html {
+  font-size: 16px;
+  -webkit-text-size-adjust: 100%;
+}
+
+body {
+  max-width: 52rem;
+  margin: 2rem auto;
+  padding: 0 1.5rem;
+  font-family: "Segoe UI", system-ui, -apple-system, BlinkMacSystemFont,
+    "Helvetica Neue", Arial, sans-serif;
+  font-size: 1rem;
+  line-height: 1.7;
+  color: var(--body-text);
+  background: #fff;
+}
+
+/* Title block generated by pandoc --standalone with -M title */
+header#title-block-header {
+  text-align: center;
+  margin-bottom: 2.5rem;
+  padding-bottom: 1.5rem;
+  border-bottom: 1px solid var(--border);
+}
+
+header#title-block-header h1.title {
+  font-size: 2.25rem;
+  font-weight: 700;
+  color: var(--heading);
+  margin: 0 0 0.25rem;
+  line-height: 1.2;
+}
+
+header#title-block-header p.subtitle {
+  font-size: 1.15rem;
+  color: var(--muted);
+  margin: 0.5rem 0 0;
+}
+
+header#title-block-header p.date {
+  font-size: 0.95rem;
+  color: var(--muted);
+  margin: 0.75rem 0 0;
+}
+
+header#title-block-header p.author {
+  font-size: 1rem;
+  color: var(--body-text);
+  margin: 0.25rem 0 0;
+}
+
+/* Headings */
+h1, h2, h3, h4, h5, h6 {
+  color: var(--heading);
+  margin-top: 1.8em;
+  margin-bottom: 0.6em;
+  line-height: 1.3;
+}
+
+h1 {
+  font-size: 1.75rem;
+  font-weight: 700;
+  padding-bottom: 0.3em;
+  border-bottom: 2px solid var(--accent);
+}
+
+h2 {
+  font-size: 1.4rem;
+  font-weight: 700;
+  padding-bottom: 0.2em;
+  border-bottom: 1px solid var(--border);
+}
+
+h3 {
+  font-size: 1.15rem;
+  font-weight: 600;
+}
+
+h4 {
+  font-size: 1rem;
+  font-weight: 600;
+  color: var(--muted);
+}
+
+h5, h6 {
+  font-size: 0.95rem;
+  font-weight: 600;
+  color: var(--muted);
+}
+
+/* Paragraphs & text */
+p {
+  margin: 0 0 1em;
+}
+
+a {
+  color: var(--accent);
+  text-decoration: none;
+}
+
+a:hover {
+  text-decoration: underline;
+}
+
+strong {
+  font-weight: 600;
+  color: var(--heading);
+}
+
+/* Lists */
+ul, ol {
+  margin: 0 0 1em;
+  padding-left: 1.75em;
+}
+
+li {
+  margin-bottom: 0.3em;
+}
+
+li > ul, li > ol {
+  margin-top: 0.3em;
+  margin-bottom: 0;
+}
+
+/* Code */
+code {
+  font-family: "Cascadia Code", "Fira Code", "JetBrains Mono", Consolas,
+    "Courier New", monospace;
+  font-size: 0.875em;
+  background: var(--subtle-bg);
+  border: 1px solid var(--border);
+  border-radius: 4px;
+  padding: 0.15em 0.4em;
+}
+
+pre {
+  background: var(--subtle-bg);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  padding: 1em 1.25em;
+  overflow-x: auto;
+  margin: 0 0 1.25em;
+  line-height: 1.5;
+}
+
+pre code {
+  background: none;
+  border: none;
+  border-radius: 0;
+  padding: 0;
+  font-size: 0.85em;
+}
+
+/* Tables */
+table {
+  width: 100%;
+  border-collapse: collapse;
+  margin: 0 0 1.25em;
+  font-size: 0.95em;
+}
+
+thead th {
+  text-align: left;
+  font-weight: 600;
+  color: var(--heading);
+  padding: 0.6em 0.75em;
+  border-bottom: 2px solid var(--heading);
+}
+
+tbody td {
+  padding: 0.5em 0.75em;
+  border-bottom: 1px solid var(--border);
+}
+
+tbody tr:last-child td {
+  border-bottom: none;
+}
+
+/* Block quotes */
+blockquote {
+  margin: 0 0 1.25em;
+  padding: 0.75em 1em;
+  border-left: 4px solid var(--accent);
+  background: var(--subtle-bg);
+  color: var(--muted);
+  border-radius: 0 4px 4px 0;
+}
+
+blockquote p {
+  margin: 0;
+}
+
+blockquote p + p {
+  margin-top: 0.5em;
+}
+
+/* Horizontal rule */
+hr {
+  border: none;
+  border-top: 1px solid var(--border);
+  margin: 2em 0;
+}
+
+/* Images */
+img {
+  max-width: 100%;
+  height: auto;
+  border-radius: 6px;
+}
+
+figure {
+  margin: 1.5em 0;
+  text-align: center;
+}
+
+figcaption {
+  font-size: 0.875em;
+  color: var(--muted);
+  margin-top: 0.5em;
+}
+
+/* Definition lists */
+dt {
+  font-weight: 600;
+  color: var(--heading);
+  margin-top: 1em;
+}
+
+dd {
+  margin-left: 1.5em;
+  margin-bottom: 0.5em;
+}
+
+/* Footnotes */
+section.footnotes {
+  margin-top: 2em;
+  padding-top: 1em;
+  border-top: 1px solid var(--border);
+  font-size: 0.9em;
+  color: var(--muted);
+}
+
+/* Pandoc syntax highlighting (keep pandoc's generated highlight classes) */
+div.sourceCode {
+  background: var(--subtle-bg);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  margin: 0 0 1.25em;
+}
+
+div.sourceCode pre {
+  border: none;
+  margin: 0;
+}
+
+/* Print styles */
+@media print {
+  body {
+    max-width: none;
+    margin: 0;
+    padding: 0;
+    font-size: 11pt;
+  }
+
+  pre, blockquote {
+    page-break-inside: avoid;
+  }
+
+  h1, h2, h3, h4 {
+    page-break-after: avoid;
+  }
+}
--- a/surfsense_backend/app/templates/report_pdf.typst
+++ b/surfsense_backend/app/templates/report_pdf.typst
@ -0,0 +1,333 @@
+// ---------------------------------------------------------------------------
+// SurfSense – Professional PDF Report Template (Pandoc → Typst)
+//
+// This file is a *pandoc template* (uses $$variable$$ syntax) that emits Typst
+// source code.  Pandoc substitutes the variables at conversion time; Typst
+// then compiles the result into a PDF.
+// ---------------------------------------------------------------------------
+
+// ── Pandoc helpers ─────────────────────────────────────────────────────────
+
+#let horizontalrule = line(length: 100%, stroke: 0.5pt + luma(180))
+
+#show terms: it => {
+  it.children
+    .map(child => [
+      #strong[#child.term]
+      #block(inset: (left: 1.5em, top: -0.4em))[#child.description]
+    ])
+    .join()
+}
+
+#set table(inset: 8pt)
+
+#show figure.where(
+  kind: table
+): set figure.caption(position: $if(table-caption-position)$$table-caption-position$$else$top$endif$)
+
+#show figure.where(
+  kind: image
+): set figure.caption(position: $if(figure-caption-position)$$figure-caption-position$$else$bottom$endif$)
+
+$if(highlighting-definitions)$
+$highlighting-definitions$
+
+$endif$
+
+// ── Colour palette ─────────────────────────────────────────────────────────
+
+#let accent      = rgb("#2563eb")   // Blue-600
+#let heading-dark = rgb("#1e293b")  // Slate-900
+#let subtle-bg    = rgb("#f8fafc")  // Slate-50
+#let border-color = rgb("#e2e8f0")  // Slate-200
+#let muted        = rgb("#64748b")  // Slate-500
+
+// ── conf – document setup & styling ────────────────────────────────────────
+
+#let conf(
+  title: none,
+  subtitle: none,
+  authors: (),
+  keywords: (),
+  date: none,
+  abstract-title: none,
+  abstract: none,
+  thanks: none,
+  cols: 1,
+  margin: (x: 1.25in, y: 1.25in),
+  paper: "us-letter",
+  lang: "en",
+  region: "US",
+  font: (),
+  fontsize: 11pt,
+  mathfont: none,
+  codefont: none,
+  linestretch: 1.5,
+  sectionnumbering: none,
+  linkcolor: none,
+  citecolor: none,
+  filecolor: none,
+  pagenumbering: "1",
+  doc,
+) = {
+  set document(title: title, keywords: keywords)
+
+  // ── Page layout ──────────────────────────────────────────────────────────
+
+  set page(
+    paper: paper,
+    margin: (x: 1.25in, top: 1.2in, bottom: 1in),
+    numbering: "1",
+
+    header: context {
+      if counter(page).get().first() > 1 {
+        set text(size: 8.5pt, fill: muted)
+        if title != none { title } else { [] }
+        h(1fr)
+        text[Report]
+        v(4pt)
+        line(length: 100%, stroke: 0.4pt + border-color)
+      }
+    },
+
+    footer: context {
+      set text(size: 8.5pt, fill: muted)
+      line(length: 100%, stroke: 0.4pt + border-color)
+      v(4pt)
+      h(1fr)
+      [Page #counter(page).display("1 of 1", both: true)]
+      h(1fr)
+    },
+  )
+
+  // ── Typography ───────────────────────────────────────────────────────────
+
+  set text(lang: lang, region: region, font: font, size: fontsize)
+  set par(justify: true, leading: linestretch * 0.65em, first-line-indent: 0pt)
+
+  // ── Headings ─────────────────────────────────────────────────────────────
+
+  set heading(numbering: sectionnumbering)
+
+  show heading.where(level: 1): it => block(above: 1.6em, below: 0.8em, breakable: false)[
+    #text(size: 1.5em, weight: "bold", fill: heading-dark)[#it.body]
+    #v(0.2em)
+    #line(length: 100%, stroke: 1.5pt + accent)
+  ]
+
+  show heading.where(level: 2): it => block(above: 1.4em, below: 0.6em, breakable: false)[
+    #text(size: 1.25em, weight: "bold", fill: heading-dark)[#it.body]
+    #v(0.1em)
+    #line(length: 40%, stroke: 0.75pt + border-color)
+  ]
+
+  show heading.where(level: 3): it => block(above: 1.2em, below: 0.5em, breakable: false)[
+    #text(size: 1.1em, weight: "semibold", fill: heading-dark)[#it.body]
+  ]
+
+  show heading.where(level: 4): it => block(above: 1em, below: 0.4em, breakable: false)[
+    #text(size: 1em, weight: "semibold", fill: muted, style: "italic")[#it.body]
+  ]
+
+  // ── Code blocks ──────────────────────────────────────────────────────────
+
+  show raw.where(block: true): it => {
+    block(
+      fill: subtle-bg,
+      stroke: 0.5pt + border-color,
+      inset: (x: 12pt, y: 10pt),
+      radius: 4pt,
+      width: 100%,
+      breakable: true,
+    )[#text(size: 9pt)[#it]]
+  }
+
+  show raw.where(block: false): box.with(
+    fill: subtle-bg,
+    stroke: 0.5pt + border-color,
+    inset: (x: 4pt, y: 2pt),
+    radius: 2pt,
+  )
+
+  // ── Tables ───────────────────────────────────────────────────────────────
+
+  set table(
+    inset: (x: 10pt, y: 6pt),
+    stroke: (x: none, y: 0.5pt + border-color),
+  )
+
+  // ── Links ────────────────────────────────────────────────────────────────
+
+  show link: set text(fill: accent)
+
+  // ── Block quotes ─────────────────────────────────────────────────────────
+
+  show quote: it => block(
+    inset: (left: 14pt, rest: 8pt),
+    stroke: (left: 3pt + accent),
+  )[#text(fill: muted)[#it.body]]
+
+  // ── Math ─────────────────────────────────────────────────────────────────
+
+  show math.equation: set text(font: mathfont) if mathfont != none
+  show raw: set text(font: codefont) if codefont != none
+
+  // ── Title block ──────────────────────────────────────────────────────────
+
+  if title != none {
+    v(2em)
+    align(center)[
+      #block(below: 0.6em)[
+        #text(size: 2em, weight: "bold", fill: heading-dark)[#title]
+      ]
+      #if subtitle != none {
+        block(below: 0.5em)[
+          #text(size: 1.3em, fill: muted)[#subtitle]
+        ]
+      }
+      #if authors != none and authors != () {
+        v(0.2em)
+        grid(
+          columns: (1fr,) * calc.min(authors.len(), 3),
+          row-gutter: 1.5em,
+          ..authors.map(author => align(center)[
+            #text(weight: "semibold")[#author.name]
+          ]),
+        )
+      }
+      #if date != none {
+        block(above: 0.6em)[
+          #text(size: 0.95em, fill: muted)[#date]
+        ]
+      }
+    ]
+    v(0.6em)
+    line(length: 60%, stroke: 1pt + border-color)
+    v(1.2em)
+  }
+
+  if abstract != none {
+    block(
+      inset: (x: 2em, y: 1em),
+      stroke: (left: 3pt + accent),
+    )[
+      #text(weight: "semibold")[#abstract-title] #h(0.5em) #abstract
+    ]
+    v(1em)
+  }
+
+  doc
+}
+
+// ── Smart quotes ───────────────────────────────────────────────────────────
+
+$if(smart)$
+$else$
+#set smartquote(enabled: false)
+
+$endif$
+// ── Apply conf to document ─────────────────────────────────────────────────
+
+$for(header-includes)$
+$header-includes$
+
+$endfor$
+#show: doc => conf(
+$if(title)$
+  title: [$title$],
+$endif$
+$if(subtitle)$
+  subtitle: [$subtitle$],
+$endif$
+$if(author)$
+  authors: (
+$for(author)$
+$if(author.name)$
+    ( name: [$author.name$],
+      affiliation: [$author.affiliation$],
+      email: [$author.email$] ),
+$else$
+    ( name: [$author$],
+      affiliation: "",
+      email: "" ),
+$endif$
+$endfor$
+    ),
+$endif$
+$if(date)$
+  date: [$date$],
+$endif$
+$if(lang)$
+  lang: "$lang$",
+$endif$
+$if(region)$
+  region: "$region$",
+$endif$
+$if(abstract-title)$
+  abstract-title: [$abstract-title$],
+$endif$
+$if(abstract)$
+  abstract: [$abstract$],
+$endif$
+$if(margin)$
+  margin: ($for(margin/pairs)$$margin.key$: $margin.value$,$endfor$),
+$endif$
+$if(papersize)$
+  paper: "$papersize$",
+$endif$
+$if(mainfont)$
+  font: ("$mainfont$",),
+$endif$
+$if(fontsize)$
+  fontsize: $fontsize$,
+$endif$
+$if(mathfont)$
+  mathfont: ($for(mathfont)$"$mathfont$",$endfor$),
+$endif$
+$if(codefont)$
+  codefont: ($for(codefont)$"$codefont$",$endfor$),
+$endif$
+$if(linestretch)$
+  linestretch: $linestretch$,
+$endif$
+$if(section-numbering)$
+  sectionnumbering: "$section-numbering$",
+$endif$
+  pagenumbering: $if(page-numbering)$"$page-numbering$"$else$none$endif$,
+  cols: $if(columns)$$columns$$else$1$endif$,
+  doc,
+)
+
+$for(include-before)$
+$include-before$
+
+$endfor$
+$if(toc)$
+#outline(
+  title: auto,
+  depth: $toc-depth$
+);
+$endif$
+
+$body$
+
+$if(citations)$
+$for(nocite-ids)$
+#cite(label("${it}"), form: none)
+$endfor$
+$if(csl)$
+
+#set bibliography(style: "$csl$")
+$elseif(bibliographystyle)$
+
+#set bibliography(style: "$bibliographystyle$")
+$endif$
+$if(bibliography)$
+
+#bibliography($for(bibliography)$"$bibliography$"$sep$,$endfor$$if(full-bibliography)$, full: true$endif$)
+$endif$
+$endif$
+$for(include-after)$
+
+$include-after$
+$endfor$
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -67,6 +67,8 @@ dependencies = [
    "typst>=0.14.0",
    "deepagents>=0.4.3",
    "langchain-daytona>=0.0.2",
+    "pypandoc>=1.16.2",
+    "mmdc>=0.4.0",
 ]

 [dependency-groups]
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
--- a/surfsense_web/components/report-panel/report-panel.tsx
+++ b/surfsense_web/components/report-panel/report-panel.tsx
@ -15,6 +15,8 @@ import {
 	DropdownMenu,
 	DropdownMenuContent,
 	DropdownMenuItem,
+	DropdownMenuLabel,
+	DropdownMenuSeparator,
 	DropdownMenuTrigger,
 } from "@/components/ui/dropdown-menu";
 import { useMediaQuery } from "@/hooks/use-media-query";
@ -114,7 +116,7 @@ function ReportPanelContent({
 	const [isLoading, setIsLoading] = useState(true);
 	const [error, setError] = useState<string | null>(null);
 	const [copied, setCopied] = useState(false);
-	const [exporting, setExporting] = useState<"pdf" | "docx" | "md" | null>(null);
+	const [exporting, setExporting] = useState<string | null>(null);
 	const [saving, setSaving] = useState(false);

 	// Editor state — tracks the latest markdown from the Plate editor
@ -196,18 +198,30 @@ function ReportPanelContent({
 		}
 	}, [currentMarkdown]);

+	// Maps backend format values to download file extensions
+	const FILE_EXTENSIONS: Record<string, string> = {
+		pdf: "pdf",
+		docx: "docx",
+		html: "html",
+		latex: "tex",
+		epub: "epub",
+		odt: "odt",
+		plain: "txt",
+		md: "md",
+	};
+
 	// Export report
 	const handleExport = useCallback(
-		async (format: "pdf" | "docx" | "md") => {
+		async (format: string) => {
 			setExporting(format);
 			const safeTitle =
 				title
 					.replace(/[^a-zA-Z0-9 _-]/g, "_")
 					.trim()
 					.slice(0, 80) || "report";
+			const ext = FILE_EXTENSIONS[format] ?? format;
 			try {
 				if (format === "md") {
-					// Download markdown content directly as a .md file (uses latest editor content)
 					if (!currentMarkdown) return;
 					const blob = new Blob([currentMarkdown], {
 						type: "text/markdown;charset=utf-8",
@ -215,7 +229,7 @@ function ReportPanelContent({
 					const url = URL.createObjectURL(blob);
 					const a = document.createElement("a");
 					a.href = url;
-					a.download = `${safeTitle}.md`;
+					a.download = `${safeTitle}.${ext}`;
 					document.body.appendChild(a);
 					a.click();
 					document.body.removeChild(a);
@ -234,7 +248,7 @@ function ReportPanelContent({
 					const url = URL.createObjectURL(blob);
 					const a = document.createElement("a");
 					a.href = url;
-					a.download = `${safeTitle}.${format}`;
+					a.download = `${safeTitle}.${ext}`;
 					document.body.appendChild(a);
 					a.click();
 					document.body.removeChild(a);
@ -334,29 +348,43 @@ function ReportPanelContent({
 						</DropdownMenuTrigger>
 						<DropdownMenuContent
 							align="start"
-							className={`min-w-[180px] dark:bg-neutral-900 dark:border dark:border-white/5${insideDrawer ? " z-[100]" : ""}`}
+							className={`min-w-[200px] dark:bg-neutral-900 dark:border dark:border-white/5${insideDrawer ? " z-[100]" : ""}`}
 						>
-							<DropdownMenuItem onClick={() => handleExport("md")}>
-								Download Markdown
-							</DropdownMenuItem>
-							{/* PDF/DOCX export requires server-side conversion via authenticated endpoint.
-						    Hide for public viewers who have no auth token. */}
 							{!shareToken && (
 								<>
-									<DropdownMenuItem
-										onClick={() => handleExport("pdf")}
-										disabled={exporting !== null}
-									>
-										Download PDF
+									<DropdownMenuLabel className="text-xs text-muted-foreground">Documents</DropdownMenuLabel>
+									<DropdownMenuItem onClick={() => handleExport("pdf")} disabled={exporting !== null}>
+										PDF (.pdf)
 									</DropdownMenuItem>
-									<DropdownMenuItem
-										onClick={() => handleExport("docx")}
-										disabled={exporting !== null}
-									>
-										Download DOCX
+									<DropdownMenuItem onClick={() => handleExport("docx")} disabled={exporting !== null}>
+										Word (.docx)
+									</DropdownMenuItem>
+									<DropdownMenuItem onClick={() => handleExport("odt")} disabled={exporting !== null}>
+										OpenDocument (.odt)
+									</DropdownMenuItem>
+									<DropdownMenuSeparator />
+									<DropdownMenuLabel className="text-xs text-muted-foreground">Web &amp; E-Book</DropdownMenuLabel>
+									<DropdownMenuItem onClick={() => handleExport("html")} disabled={exporting !== null}>
+										HTML (.html)
+									</DropdownMenuItem>
+									<DropdownMenuItem onClick={() => handleExport("epub")} disabled={exporting !== null}>
+										EPUB (.epub)
+									</DropdownMenuItem>
+									<DropdownMenuSeparator />
+									<DropdownMenuLabel className="text-xs text-muted-foreground">Source &amp; Plain</DropdownMenuLabel>
+									<DropdownMenuItem onClick={() => handleExport("latex")} disabled={exporting !== null}>
+										LaTeX (.tex)
 									</DropdownMenuItem>
 								</>
 							)}
+							<DropdownMenuItem onClick={() => handleExport("md")} disabled={exporting !== null}>
+								Markdown (.md)
+							</DropdownMenuItem>
+							{!shareToken && (
+								<DropdownMenuItem onClick={() => handleExport("plain")} disabled={exporting !== null}>
+									Plain Text (.txt)
+								</DropdownMenuItem>
+							)}
 						</DropdownMenuContent>
 					</DropdownMenu>