feat: expand report export functionality to support multiple formats (PDF, DOCX, HTML, EPUB, etc.) and enhance UI for format selection

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-03-09 18:31:14 -07:00
parent ddb070bca8
commit 11b84a3fb4
10 changed files with 5375 additions and 4302 deletions

View file

@ -12,4 +12,5 @@ celerybeat-schedule*
celerybeat-schedule.*
celerybeat-schedule.dir
celerybeat-schedule.bak
global_llm_config.yaml
global_llm_config.yaml
app/templates/_generated/

View file

@ -4,8 +4,9 @@ Report routes for read, update, export (PDF/DOCX), and delete operations.
Reports are generated inline by the agent tool during chat and stored as
Markdown in the database. Users can edit report content via the Plate editor
and save changes through the PUT endpoint.
Export to PDF/DOCX is on-demand PDF uses pypandoc (MarkdownTypst) + typst-py
(TypstPDF); DOCX uses pypandoc directly.
Export is on-demand in multiple formats (PDF, DOCX, HTML, PPTX, LaTeX, EPUB,
ODT, plain text). PDF uses pypandoc (Markdown->Typst) + typst-py; the rest
use pypandoc directly with format-specific templates and options.
Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources.
@ -36,6 +37,11 @@ from app.db import (
)
from app.schemas import ReportContentRead, ReportContentUpdate, ReportRead
from app.schemas.reports import ReportVersionInfo
from app.templates.export_helpers import (
get_html_css_path,
get_reference_docx_path,
get_typst_template_path,
)
from app.users import current_active_user
from app.utils.rbac import check_search_space_access
@ -49,6 +55,32 @@ MAX_REPORT_LIST_LIMIT = 500
class ExportFormat(StrEnum):
PDF = "pdf"
DOCX = "docx"
HTML = "html"
LATEX = "latex"
EPUB = "epub"
ODT = "odt"
PLAIN = "plain"
_MEDIA_TYPES: dict[ExportFormat, str] = {
ExportFormat.PDF: "application/pdf",
ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
ExportFormat.HTML: "text/html; charset=utf-8",
ExportFormat.LATEX: "application/x-tex",
ExportFormat.EPUB: "application/epub+zip",
ExportFormat.ODT: "application/vnd.oasis.opendocument.text",
ExportFormat.PLAIN: "text/plain; charset=utf-8",
}
_FILE_EXTENSIONS: dict[ExportFormat, str] = {
ExportFormat.PDF: "pdf",
ExportFormat.DOCX: "docx",
ExportFormat.HTML: "html",
ExportFormat.LATEX: "tex",
ExportFormat.EPUB: "epub",
ExportFormat.ODT: "odt",
ExportFormat.PLAIN: "txt",
}
# ---------------------------------------------------------------------------
@ -305,13 +337,14 @@ async def update_report_content(
async def export_report(
report_id: int,
format: ExportFormat = Query(
ExportFormat.PDF, description="Export format: pdf or docx"
ExportFormat.PDF,
description="Export format: pdf, docx, html, pptx, latex, epub, odt, or plain",
),
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Export a report as PDF or DOCX.
Export a report in the requested format.
"""
try:
report = await _get_report_with_access(report_id, session, user)
@ -329,83 +362,124 @@ async def export_report(
# etc.) into $/$$ form that pandoc's tex_math_dollars extension can parse.
markdown_content = _normalize_latex_delimiters(markdown_content)
# Convert Markdown to the requested format.
#
# DOCX: pypandoc (pandoc) handles the full conversion directly.
#
# PDF: two-step pipeline — pypandoc converts Markdown → Typst markup,
# then the `typst` Python library compiles Typst → PDF. This avoids
# requiring the Typst CLI on the system PATH; the typst pip package
# bundles the compiler as a native extension. Typst produces
# professional styling for tables, headings, code blocks, etc.
#
# Use "gfm" as the base input format because LLM output uses GFM-style
# pipe tables that pandoc's stricter default "markdown" may mangle.
# The +tex_math_dollars extension enables $/$$ math recognition.
formatted_date = report.created_at.strftime("%B %d, %Y")
report_title = report.title or "Report"
input_fmt = "gfm+tex_math_dollars"
meta_args = ["-M", f"title:{report_title}", "-M", f"date:{formatted_date}"]
def _convert_and_read() -> bytes:
"""Run all blocking I/O (tempfile, pandoc/typst, file read, cleanup) in a thread."""
"""Run all blocking I/O in a thread."""
# -- PDF: Markdown -> Typst markup -> typst-py -> PDF bytes ------
if format == ExportFormat.PDF:
# Step 1: Markdown → Typst markup via pandoc.
# We must set mainfont / monofont so the generated template's
# `font` parameter is non-empty; without it pandoc emits
# `font: ()` which makes Typst error with
# "font fallback list must not be empty".
# We use fonts that ship embedded inside typst-py so this
# works even on systems with no fonts installed.
typst_template = str(get_typst_template_path())
typst_markup: str = pypandoc.convert_text(
markdown_content,
"typst",
format="gfm+tex_math_dollars",
format=input_fmt,
extra_args=[
"--standalone",
f"--template={typst_template}",
"-V",
"mainfont:Libertinus Serif",
"-V",
"monofont:DejaVu Sans Mono",
"codefont:DejaVu Sans Mono",
*meta_args,
],
)
# Step 2: Typst markup → PDF via typst Python library
pdf_bytes: bytes = typst.compile(typst_markup.encode("utf-8"))
return pdf_bytes
else:
# DOCX: let pandoc handle the full conversion
fd, tmp_path = tempfile.mkstemp(suffix=f".{format.value}")
os.close(fd)
try:
pypandoc.convert_text(
markdown_content,
format.value,
format="gfm+tex_math_dollars",
extra_args=["--standalone"],
outputfile=tmp_path,
)
with open(tmp_path, "rb") as f:
return f.read()
finally:
os.unlink(tmp_path)
return typst.compile(typst_markup.encode("utf-8"))
# -- DOCX: styled reference doc ----------------------------------
if format == ExportFormat.DOCX:
return _pandoc_to_tempfile(
format.value,
[
"--standalone",
f"--reference-doc={get_reference_docx_path()}",
*meta_args,
],
)
# -- HTML: self-contained with custom CSS ------------------------
if format == ExportFormat.HTML:
html_str: str = pypandoc.convert_text(
markdown_content,
"html5",
format=input_fmt,
extra_args=[
"--standalone",
"--embed-resources",
f"--css={get_html_css_path()}",
"--syntax-highlighting=pygments",
*meta_args,
],
)
return html_str.encode("utf-8")
# -- EPUB: binary output via tempfile ----------------------------
if format == ExportFormat.EPUB:
return _pandoc_to_tempfile("epub3", ["--standalone", *meta_args])
# -- ODT: binary output via tempfile -----------------------------
if format == ExportFormat.ODT:
return _pandoc_to_tempfile("odt", ["--standalone", *meta_args])
# -- LaTeX: text output ------------------------------------------
if format == ExportFormat.LATEX:
tex_str: str = pypandoc.convert_text(
markdown_content,
"latex",
format=input_fmt,
extra_args=["--standalone", *meta_args],
)
return tex_str.encode("utf-8")
# -- Plain text: text output -------------------------------------
plain_str: str = pypandoc.convert_text(
markdown_content,
"plain",
format=input_fmt,
extra_args=["--wrap=auto", "--columns=80"],
)
return plain_str.encode("utf-8")
def _pandoc_to_tempfile(output_format: str, extra_args: list[str]) -> bytes:
"""Convert via pandoc to a binary format using a temp file."""
fd, tmp_path = tempfile.mkstemp(suffix=f".{output_format}")
os.close(fd)
try:
pypandoc.convert_text(
markdown_content,
output_format,
format=input_fmt,
extra_args=extra_args,
outputfile=tmp_path,
)
with open(tmp_path, "rb") as f:
return f.read()
finally:
os.unlink(tmp_path)
loop = asyncio.get_running_loop()
output = await loop.run_in_executor(None, _convert_and_read)
# Sanitize filename
safe_title = (
"".join(
c if c.isalnum() or c in " -_" else "_" for c in report.title
).strip()[:80]
or "report"
)
media_types = {
ExportFormat.PDF: "application/pdf",
ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
}
ext = _FILE_EXTENSIONS[format]
return StreamingResponse(
io.BytesIO(output),
media_type=media_types[format],
media_type=_MEDIA_TYPES[format],
headers={
"Content-Disposition": f'attachment; filename="{safe_title}.{format.value}"',
"Content-Disposition": f'attachment; filename="{safe_title}.{ext}"',
},
)

View file

@ -50,9 +50,7 @@ async def get_playlist_videos(
detail="No videos found in the playlist. It may be private or empty.",
)
video_urls = [
f"https://www.youtube.com/watch?v={vid}" for vid in video_ids
]
video_urls = [f"https://www.youtube.com/watch?v={vid}" for vid in video_ids]
return {"video_urls": video_urls, "count": len(video_urls)}
except HTTPException:
@ -74,12 +72,15 @@ async def _fetch_playlist_via_innertube(playlist_id: str) -> list[str]:
proxies = get_requests_proxies()
try:
async with aiohttp.ClientSession() as session, session.post(
_INNERTUBE_API_URL,
json=payload,
headers={"Content-Type": "application/json"},
proxy=proxies["http"] if proxies else None,
) as response:
async with (
aiohttp.ClientSession() as session,
session.post(
_INNERTUBE_API_URL,
json=payload,
headers={"Content-Type": "application/json"},
proxy=proxies["http"] if proxies else None,
) as response,
):
if response.status != 200:
logger.warning(
"Innertube API returned %d for playlist %s",
@ -118,14 +119,14 @@ async def _fetch_playlist_via_html(playlist_id: str) -> list[str]:
proxy=proxies["http"] if proxies else None,
) as response,
):
if response.status != 200:
logger.warning(
"HTML fallback returned %d for playlist %s",
response.status,
playlist_id,
)
return []
html = await response.text()
if response.status != 200:
logger.warning(
"HTML fallback returned %d for playlist %s",
response.status,
playlist_id,
)
return []
html = await response.text()
yt_data = _extract_yt_initial_data(html)
if not yt_data:

View file

@ -0,0 +1,323 @@
"""
Helpers for report export templates.
* ``get_typst_template_path()`` - path to the custom Pandoc -> Typst template.
* ``get_html_css_path()`` - path to the CSS stylesheet for HTML exports.
* ``get_reference_docx_path()`` - path to a styled reference.docx for Pandoc.
* ``get_reference_pptx_path()`` - path to a styled reference.pptx for Pandoc.
The reference DOCX is generated lazily on first call from Pandoc's built-in
default, then restyled with *python-docx* and cached on disk so subsequent
exports are instant.
"""
from __future__ import annotations
import subprocess
import threading
from pathlib import Path
_DIR = Path(__file__).resolve().parent
_GENERATED_DIR = _DIR / "_generated"
_REFERENCE_DOCX = _GENERATED_DIR / "reference.docx"
_REFERENCE_PPTX = _GENERATED_DIR / "reference.pptx"
_TYPST_TEMPLATE = _DIR / "report_pdf.typst"
_HTML_CSS = _DIR / "report_html.css"
_docx_lock = threading.Lock()
_pptx_lock = threading.Lock()
def get_typst_template_path() -> Path:
return _TYPST_TEMPLATE
def get_html_css_path() -> Path:
return _HTML_CSS
def get_reference_pptx_path() -> Path:
"""Return path to the styled reference.pptx, creating it if absent."""
if _REFERENCE_PPTX.exists():
return _REFERENCE_PPTX
with _pptx_lock:
if _REFERENCE_PPTX.exists():
return _REFERENCE_PPTX
_generate_reference_pptx()
return _REFERENCE_PPTX
def get_reference_docx_path() -> Path:
"""Return path to the styled reference.docx, creating it if absent."""
if _REFERENCE_DOCX.exists():
return _REFERENCE_DOCX
with _docx_lock:
if _REFERENCE_DOCX.exists():
return _REFERENCE_DOCX
_generate_reference_docx()
return _REFERENCE_DOCX
# ---------------------------------------------------------------------------
# Reference DOCX generation
# ---------------------------------------------------------------------------
_HEADING_COLOR_RGB = (0x1E, 0x29, 0x3B) # Slate-900
_ACCENT_RGB = (0x25, 0x63, 0xEB) # Blue-600
def _generate_reference_docx() -> None:
"""Build a professional reference.docx from Pandoc's default + restyling."""
import pypandoc
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Inches, Pt, RGBColor
_GENERATED_DIR.mkdir(parents=True, exist_ok=True)
# Step 1 - extract Pandoc's built-in reference.docx (contains all the
# style names that Pandoc maps its output to).
pandoc_bin = pypandoc.get_pandoc_path()
result = subprocess.run(
[pandoc_bin, "--print-default-data-file", "reference.docx"],
capture_output=True,
check=True,
)
_REFERENCE_DOCX.write_bytes(result.stdout)
# Step 2 - open and restyle
doc = Document(str(_REFERENCE_DOCX))
heading_color = RGBColor(*_HEADING_COLOR_RGB)
accent_color = RGBColor(*_ACCENT_RGB)
# -- Page setup ----------------------------------------------------------
for section in doc.sections:
section.page_width = Inches(8.5)
section.page_height = Inches(11)
section.left_margin = Inches(1.25)
section.right_margin = Inches(1.25)
section.top_margin = Inches(1)
section.bottom_margin = Inches(1)
_add_page_number_footer(section)
# -- Restyle existing styles ---------------------------------------------
heading_sizes = {1: 24, 2: 18, 3: 14, 4: 12, 5: 11, 6: 11}
for style in doc.styles:
name = style.name or ""
# Normal / body text
if name in ("Normal", "Body Text", "First Paragraph"):
style.font.name = "Calibri"
style.font.size = Pt(11)
pf = style.paragraph_format
pf.space_after = Pt(6)
pf.space_before = Pt(0)
pf.line_spacing = 1.15
if name == "First Paragraph":
pf.space_before = Pt(2)
# Headings 1-6
elif name.startswith("Heading") and name[-1:].isdigit():
level = int(name[-1])
style.font.name = "Calibri"
style.font.bold = True
style.font.color.rgb = heading_color
style.font.size = Pt(heading_sizes.get(level, 11))
pf = style.paragraph_format
pf.space_before = Pt(18 if level <= 2 else 12)
pf.space_after = Pt(6)
pf.keep_with_next = True
if level >= 4:
style.font.bold = False
style.font.italic = True
# Source Code (code blocks)
elif name == "Source Code":
style.font.name = "Consolas"
style.font.size = Pt(9.5)
pf = style.paragraph_format
pf.space_before = Pt(4)
pf.space_after = Pt(4)
pf.line_spacing = 1.0
_set_paragraph_shading(pf, "F8FAFC")
# Verbatim Char (inline code)
elif name == "Verbatim Char":
style.font.name = "Consolas"
style.font.size = Pt(9.5)
# Block Text (block quotes)
elif name == "Block Text":
style.font.italic = True
style.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
pf = style.paragraph_format
pf.left_indent = Inches(0.4)
pf.space_before = Pt(6)
pf.space_after = Pt(6)
# Hyperlink
elif name == "Hyperlink":
style.font.color.rgb = accent_color
style.font.underline = True
# Compact (tight lists)
elif name == "Compact":
style.font.name = "Calibri"
style.font.size = Pt(11)
if style.paragraph_format:
style.paragraph_format.space_after = Pt(2)
# Title
elif name == "Title":
style.font.name = "Calibri"
style.font.size = Pt(28)
style.font.bold = True
style.font.color.rgb = heading_color
pf = style.paragraph_format
pf.alignment = WD_ALIGN_PARAGRAPH.CENTER
pf.space_after = Pt(4)
# Subtitle
elif name == "Subtitle":
style.font.name = "Calibri"
style.font.size = Pt(14)
style.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
pf = style.paragraph_format
pf.alignment = WD_ALIGN_PARAGRAPH.CENTER
pf.space_after = Pt(12)
# Date
elif name == "Date":
style.font.name = "Calibri"
style.font.size = Pt(11)
style.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
pf = style.paragraph_format
pf.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.save(str(_REFERENCE_DOCX))
# ---------------------------------------------------------------------------
# Reference PPTX generation
# ---------------------------------------------------------------------------
def _generate_reference_pptx() -> None:
"""Build a reference.pptx with smaller fonts for report-to-slide conversion."""
import pypandoc
from lxml import etree
_GENERATED_DIR.mkdir(parents=True, exist_ok=True)
pandoc_bin = pypandoc.get_pandoc_path()
result = subprocess.run(
[pandoc_bin, "--print-default-data-file", "reference.pptx"],
capture_output=True,
check=True,
)
_REFERENCE_PPTX.write_bytes(result.stdout)
from pptx import Presentation
prs = Presentation(str(_REFERENCE_PPTX))
master = prs.slide_masters[0]
ns = {
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
"p": "http://schemas.openxmlformats.org/presentationml/2006/main",
}
# Shrink body text: 24pt -> 16pt base, scaling down per level
body_sizes = [1600, 1400, 1300, 1200, 1100, 1100, 1100, 1100, 1100]
body_style = master._element.find(".//p:txStyles/p:bodyStyle", ns)
if body_style is not None:
for lvl_el in body_style:
tag = etree.QName(lvl_el).localname
if tag.startswith("lvl") and tag.endswith("pPr"):
idx = int(tag[3]) - 1
def_rpr = lvl_el.find("a:defRPr", ns)
if def_rpr is not None and idx < len(body_sizes):
def_rpr.set("sz", str(body_sizes[idx]))
# Shrink title: 33pt -> 26pt
title_style = master._element.find(".//p:txStyles/p:titleStyle", ns)
if title_style is not None:
for lvl_el in title_style:
def_rpr = lvl_el.find("a:defRPr", ns)
if def_rpr is not None:
def_rpr.set("sz", "2600")
prs.save(str(_REFERENCE_PPTX))
# ---------------------------------------------------------------------------
# OOXML helpers
# ---------------------------------------------------------------------------
def _set_paragraph_shading(paragraph_format, hex_color: str) -> None:
"""Apply background shading to a paragraph style via raw OOXML."""
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
ppr = paragraph_format._element if hasattr(paragraph_format, "_element") else None
if ppr is None:
return
shd = OxmlElement("w:shd")
shd.set(qn("w:val"), "clear")
shd.set(qn("w:color"), "auto")
shd.set(qn("w:fill"), hex_color)
ppr.append(shd)
def _add_page_number_footer(section) -> None:
"""Add a centered page number to the section footer via OOXML."""
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
footer = section.footer
footer.is_linked_to_previous = False
p = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
p.alignment = 1 # CENTER
run = p.add_run()
rpr = OxmlElement("w:rPr")
rsz = OxmlElement("w:sz")
rsz.set(qn("w:val"), "18") # 9pt
rpr.append(rsz)
rcolor = OxmlElement("w:color")
rcolor.set(qn("w:val"), "64748B")
rpr.append(rcolor)
run._element.append(rpr)
run.add_text("Page ")
fld_char_begin = OxmlElement("w:fldChar")
fld_char_begin.set(qn("w:fldCharType"), "begin")
run._element.append(fld_char_begin)
instr = OxmlElement("w:instrText")
instr.set(qn("xml:space"), "preserve")
instr.text = " PAGE "
run._element.append(instr)
fld_char_end = OxmlElement("w:fldChar")
fld_char_end.set(qn("w:fldCharType"), "end")
run._element.append(fld_char_end)
run.add_text(" of ")
fld_char_begin2 = OxmlElement("w:fldChar")
fld_char_begin2.set(qn("w:fldCharType"), "begin")
run._element.append(fld_char_begin2)
instr2 = OxmlElement("w:instrText")
instr2.set(qn("xml:space"), "preserve")
instr2.text = " NUMPAGES "
run._element.append(instr2)
fld_char_end2 = OxmlElement("w:fldChar")
fld_char_end2.set(qn("w:fldCharType"), "end")
run._element.append(fld_char_end2)

View file

@ -0,0 +1,289 @@
/* SurfSense - Professional HTML Report Stylesheet */
:root {
--accent: #2563eb;
--heading: #1e293b;
--muted: #64748b;
--border: #e2e8f0;
--subtle-bg: #f8fafc;
--body-text: #334155;
}
*,
*::before,
*::after {
box-sizing: border-box;
}
html {
font-size: 16px;
-webkit-text-size-adjust: 100%;
}
body {
max-width: 52rem;
margin: 2rem auto;
padding: 0 1.5rem;
font-family: "Segoe UI", system-ui, -apple-system, BlinkMacSystemFont,
"Helvetica Neue", Arial, sans-serif;
font-size: 1rem;
line-height: 1.7;
color: var(--body-text);
background: #fff;
}
/* Title block generated by pandoc --standalone with -M title */
header#title-block-header {
text-align: center;
margin-bottom: 2.5rem;
padding-bottom: 1.5rem;
border-bottom: 1px solid var(--border);
}
header#title-block-header h1.title {
font-size: 2.25rem;
font-weight: 700;
color: var(--heading);
margin: 0 0 0.25rem;
line-height: 1.2;
}
header#title-block-header p.subtitle {
font-size: 1.15rem;
color: var(--muted);
margin: 0.5rem 0 0;
}
header#title-block-header p.date {
font-size: 0.95rem;
color: var(--muted);
margin: 0.75rem 0 0;
}
header#title-block-header p.author {
font-size: 1rem;
color: var(--body-text);
margin: 0.25rem 0 0;
}
/* Headings */
h1, h2, h3, h4, h5, h6 {
color: var(--heading);
margin-top: 1.8em;
margin-bottom: 0.6em;
line-height: 1.3;
}
h1 {
font-size: 1.75rem;
font-weight: 700;
padding-bottom: 0.3em;
border-bottom: 2px solid var(--accent);
}
h2 {
font-size: 1.4rem;
font-weight: 700;
padding-bottom: 0.2em;
border-bottom: 1px solid var(--border);
}
h3 {
font-size: 1.15rem;
font-weight: 600;
}
h4 {
font-size: 1rem;
font-weight: 600;
color: var(--muted);
}
h5, h6 {
font-size: 0.95rem;
font-weight: 600;
color: var(--muted);
}
/* Paragraphs & text */
p {
margin: 0 0 1em;
}
a {
color: var(--accent);
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
strong {
font-weight: 600;
color: var(--heading);
}
/* Lists */
ul, ol {
margin: 0 0 1em;
padding-left: 1.75em;
}
li {
margin-bottom: 0.3em;
}
li > ul, li > ol {
margin-top: 0.3em;
margin-bottom: 0;
}
/* Code */
code {
font-family: "Cascadia Code", "Fira Code", "JetBrains Mono", Consolas,
"Courier New", monospace;
font-size: 0.875em;
background: var(--subtle-bg);
border: 1px solid var(--border);
border-radius: 4px;
padding: 0.15em 0.4em;
}
pre {
background: var(--subtle-bg);
border: 1px solid var(--border);
border-radius: 6px;
padding: 1em 1.25em;
overflow-x: auto;
margin: 0 0 1.25em;
line-height: 1.5;
}
pre code {
background: none;
border: none;
border-radius: 0;
padding: 0;
font-size: 0.85em;
}
/* Tables */
table {
width: 100%;
border-collapse: collapse;
margin: 0 0 1.25em;
font-size: 0.95em;
}
thead th {
text-align: left;
font-weight: 600;
color: var(--heading);
padding: 0.6em 0.75em;
border-bottom: 2px solid var(--heading);
}
tbody td {
padding: 0.5em 0.75em;
border-bottom: 1px solid var(--border);
}
tbody tr:last-child td {
border-bottom: none;
}
/* Block quotes */
blockquote {
margin: 0 0 1.25em;
padding: 0.75em 1em;
border-left: 4px solid var(--accent);
background: var(--subtle-bg);
color: var(--muted);
border-radius: 0 4px 4px 0;
}
blockquote p {
margin: 0;
}
blockquote p + p {
margin-top: 0.5em;
}
/* Horizontal rule */
hr {
border: none;
border-top: 1px solid var(--border);
margin: 2em 0;
}
/* Images */
img {
max-width: 100%;
height: auto;
border-radius: 6px;
}
figure {
margin: 1.5em 0;
text-align: center;
}
figcaption {
font-size: 0.875em;
color: var(--muted);
margin-top: 0.5em;
}
/* Definition lists */
dt {
font-weight: 600;
color: var(--heading);
margin-top: 1em;
}
dd {
margin-left: 1.5em;
margin-bottom: 0.5em;
}
/* Footnotes */
section.footnotes {
margin-top: 2em;
padding-top: 1em;
border-top: 1px solid var(--border);
font-size: 0.9em;
color: var(--muted);
}
/* Pandoc syntax highlighting (keep pandoc's generated highlight classes) */
div.sourceCode {
background: var(--subtle-bg);
border: 1px solid var(--border);
border-radius: 6px;
margin: 0 0 1.25em;
}
div.sourceCode pre {
border: none;
margin: 0;
}
/* Print styles */
@media print {
body {
max-width: none;
margin: 0;
padding: 0;
font-size: 11pt;
}
pre, blockquote {
page-break-inside: avoid;
}
h1, h2, h3, h4 {
page-break-after: avoid;
}
}

View file

@ -0,0 +1,333 @@
// ---------------------------------------------------------------------------
// SurfSense Professional PDF Report Template (Pandoc → Typst)
//
// This file is a *pandoc template* (uses $$variable$$ syntax) that emits Typst
// source code. Pandoc substitutes the variables at conversion time; Typst
// then compiles the result into a PDF.
// ---------------------------------------------------------------------------
// ── Pandoc helpers ─────────────────────────────────────────────────────────
#let horizontalrule = line(length: 100%, stroke: 0.5pt + luma(180))
#show terms: it => {
it.children
.map(child => [
#strong[#child.term]
#block(inset: (left: 1.5em, top: -0.4em))[#child.description]
])
.join()
}
#set table(inset: 8pt)
#show figure.where(
kind: table
): set figure.caption(position: $if(table-caption-position)$$table-caption-position$$else$top$endif$)
#show figure.where(
kind: image
): set figure.caption(position: $if(figure-caption-position)$$figure-caption-position$$else$bottom$endif$)
$if(highlighting-definitions)$
$highlighting-definitions$
$endif$
// ── Colour palette ─────────────────────────────────────────────────────────
#let accent = rgb("#2563eb") // Blue-600
#let heading-dark = rgb("#1e293b") // Slate-900
#let subtle-bg = rgb("#f8fafc") // Slate-50
#let border-color = rgb("#e2e8f0") // Slate-200
#let muted = rgb("#64748b") // Slate-500
// ── conf document setup & styling ────────────────────────────────────────
#let conf(
title: none,
subtitle: none,
authors: (),
keywords: (),
date: none,
abstract-title: none,
abstract: none,
thanks: none,
cols: 1,
margin: (x: 1.25in, y: 1.25in),
paper: "us-letter",
lang: "en",
region: "US",
font: (),
fontsize: 11pt,
mathfont: none,
codefont: none,
linestretch: 1.5,
sectionnumbering: none,
linkcolor: none,
citecolor: none,
filecolor: none,
pagenumbering: "1",
doc,
) = {
set document(title: title, keywords: keywords)
// ── Page layout ──────────────────────────────────────────────────────────
set page(
paper: paper,
margin: (x: 1.25in, top: 1.2in, bottom: 1in),
numbering: "1",
header: context {
if counter(page).get().first() > 1 {
set text(size: 8.5pt, fill: muted)
if title != none { title } else { [] }
h(1fr)
text[Report]
v(4pt)
line(length: 100%, stroke: 0.4pt + border-color)
}
},
footer: context {
set text(size: 8.5pt, fill: muted)
line(length: 100%, stroke: 0.4pt + border-color)
v(4pt)
h(1fr)
[Page #counter(page).display("1 of 1", both: true)]
h(1fr)
},
)
// ── Typography ───────────────────────────────────────────────────────────
set text(lang: lang, region: region, font: font, size: fontsize)
set par(justify: true, leading: linestretch * 0.65em, first-line-indent: 0pt)
// ── Headings ─────────────────────────────────────────────────────────────
set heading(numbering: sectionnumbering)
show heading.where(level: 1): it => block(above: 1.6em, below: 0.8em, breakable: false)[
#text(size: 1.5em, weight: "bold", fill: heading-dark)[#it.body]
#v(0.2em)
#line(length: 100%, stroke: 1.5pt + accent)
]
show heading.where(level: 2): it => block(above: 1.4em, below: 0.6em, breakable: false)[
#text(size: 1.25em, weight: "bold", fill: heading-dark)[#it.body]
#v(0.1em)
#line(length: 40%, stroke: 0.75pt + border-color)
]
show heading.where(level: 3): it => block(above: 1.2em, below: 0.5em, breakable: false)[
#text(size: 1.1em, weight: "semibold", fill: heading-dark)[#it.body]
]
show heading.where(level: 4): it => block(above: 1em, below: 0.4em, breakable: false)[
#text(size: 1em, weight: "semibold", fill: muted, style: "italic")[#it.body]
]
// ── Code blocks ──────────────────────────────────────────────────────────
show raw.where(block: true): it => {
block(
fill: subtle-bg,
stroke: 0.5pt + border-color,
inset: (x: 12pt, y: 10pt),
radius: 4pt,
width: 100%,
breakable: true,
)[#text(size: 9pt)[#it]]
}
show raw.where(block: false): box.with(
fill: subtle-bg,
stroke: 0.5pt + border-color,
inset: (x: 4pt, y: 2pt),
radius: 2pt,
)
// ── Tables ───────────────────────────────────────────────────────────────
set table(
inset: (x: 10pt, y: 6pt),
stroke: (x: none, y: 0.5pt + border-color),
)
// ── Links ────────────────────────────────────────────────────────────────
show link: set text(fill: accent)
// ── Block quotes ─────────────────────────────────────────────────────────
show quote: it => block(
inset: (left: 14pt, rest: 8pt),
stroke: (left: 3pt + accent),
)[#text(fill: muted)[#it.body]]
// ── Math ─────────────────────────────────────────────────────────────────
show math.equation: set text(font: mathfont) if mathfont != none
show raw: set text(font: codefont) if codefont != none
// ── Title block ──────────────────────────────────────────────────────────
if title != none {
v(2em)
align(center)[
#block(below: 0.6em)[
#text(size: 2em, weight: "bold", fill: heading-dark)[#title]
]
#if subtitle != none {
block(below: 0.5em)[
#text(size: 1.3em, fill: muted)[#subtitle]
]
}
#if authors != none and authors != () {
v(0.2em)
grid(
columns: (1fr,) * calc.min(authors.len(), 3),
row-gutter: 1.5em,
..authors.map(author => align(center)[
#text(weight: "semibold")[#author.name]
]),
)
}
#if date != none {
block(above: 0.6em)[
#text(size: 0.95em, fill: muted)[#date]
]
}
]
v(0.6em)
line(length: 60%, stroke: 1pt + border-color)
v(1.2em)
}
if abstract != none {
block(
inset: (x: 2em, y: 1em),
stroke: (left: 3pt + accent),
)[
#text(weight: "semibold")[#abstract-title] #h(0.5em) #abstract
]
v(1em)
}
doc
}
// ── Smart quotes ───────────────────────────────────────────────────────────
$if(smart)$
$else$
#set smartquote(enabled: false)
$endif$
// ── Apply conf to document ─────────────────────────────────────────────────
$for(header-includes)$
$header-includes$
$endfor$
#show: doc => conf(
$if(title)$
title: [$title$],
$endif$
$if(subtitle)$
subtitle: [$subtitle$],
$endif$
$if(author)$
authors: (
$for(author)$
$if(author.name)$
( name: [$author.name$],
affiliation: [$author.affiliation$],
email: [$author.email$] ),
$else$
( name: [$author$],
affiliation: "",
email: "" ),
$endif$
$endfor$
),
$endif$
$if(date)$
date: [$date$],
$endif$
$if(lang)$
lang: "$lang$",
$endif$
$if(region)$
region: "$region$",
$endif$
$if(abstract-title)$
abstract-title: [$abstract-title$],
$endif$
$if(abstract)$
abstract: [$abstract$],
$endif$
$if(margin)$
margin: ($for(margin/pairs)$$margin.key$: $margin.value$,$endfor$),
$endif$
$if(papersize)$
paper: "$papersize$",
$endif$
$if(mainfont)$
font: ("$mainfont$",),
$endif$
$if(fontsize)$
fontsize: $fontsize$,
$endif$
$if(mathfont)$
mathfont: ($for(mathfont)$"$mathfont$",$endfor$),
$endif$
$if(codefont)$
codefont: ($for(codefont)$"$codefont$",$endfor$),
$endif$
$if(linestretch)$
linestretch: $linestretch$,
$endif$
$if(section-numbering)$
sectionnumbering: "$section-numbering$",
$endif$
pagenumbering: $if(page-numbering)$"$page-numbering$"$else$none$endif$,
cols: $if(columns)$$columns$$else$1$endif$,
doc,
)
$for(include-before)$
$include-before$
$endfor$
$if(toc)$
#outline(
title: auto,
depth: $toc-depth$
);
$endif$
$body$
$if(citations)$
$for(nocite-ids)$
#cite(label("${it}"), form: none)
$endfor$
$if(csl)$
#set bibliography(style: "$csl$")
$elseif(bibliographystyle)$
#set bibliography(style: "$bibliographystyle$")
$endif$
$if(bibliography)$
#bibliography($for(bibliography)$"$bibliography$"$sep$,$endfor$$if(full-bibliography)$, full: true$endif$)
$endif$
$endif$
$for(include-after)$
$include-after$
$endfor$

View file

@ -67,6 +67,8 @@ dependencies = [
"typst>=0.14.0",
"deepagents>=0.4.3",
"langchain-daytona>=0.0.2",
"pypandoc>=1.16.2",
"mmdc>=0.4.0",
]
[dependency-groups]

8446
surfsense_backend/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -15,6 +15,8 @@ import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuLabel,
DropdownMenuSeparator,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { useMediaQuery } from "@/hooks/use-media-query";
@ -114,7 +116,7 @@ function ReportPanelContent({
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [copied, setCopied] = useState(false);
const [exporting, setExporting] = useState<"pdf" | "docx" | "md" | null>(null);
const [exporting, setExporting] = useState<string | null>(null);
const [saving, setSaving] = useState(false);
// Editor state — tracks the latest markdown from the Plate editor
@ -196,18 +198,30 @@ function ReportPanelContent({
}
}, [currentMarkdown]);
// Maps backend format values to download file extensions
const FILE_EXTENSIONS: Record<string, string> = {
pdf: "pdf",
docx: "docx",
html: "html",
latex: "tex",
epub: "epub",
odt: "odt",
plain: "txt",
md: "md",
};
// Export report
const handleExport = useCallback(
async (format: "pdf" | "docx" | "md") => {
async (format: string) => {
setExporting(format);
const safeTitle =
title
.replace(/[^a-zA-Z0-9 _-]/g, "_")
.trim()
.slice(0, 80) || "report";
const ext = FILE_EXTENSIONS[format] ?? format;
try {
if (format === "md") {
// Download markdown content directly as a .md file (uses latest editor content)
if (!currentMarkdown) return;
const blob = new Blob([currentMarkdown], {
type: "text/markdown;charset=utf-8",
@ -215,7 +229,7 @@ function ReportPanelContent({
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = `${safeTitle}.md`;
a.download = `${safeTitle}.${ext}`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
@ -234,7 +248,7 @@ function ReportPanelContent({
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = `${safeTitle}.${format}`;
a.download = `${safeTitle}.${ext}`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
@ -334,29 +348,43 @@ function ReportPanelContent({
</DropdownMenuTrigger>
<DropdownMenuContent
align="start"
className={`min-w-[180px] dark:bg-neutral-900 dark:border dark:border-white/5${insideDrawer ? " z-[100]" : ""}`}
className={`min-w-[200px] dark:bg-neutral-900 dark:border dark:border-white/5${insideDrawer ? " z-[100]" : ""}`}
>
<DropdownMenuItem onClick={() => handleExport("md")}>
Download Markdown
</DropdownMenuItem>
{/* PDF/DOCX export requires server-side conversion via authenticated endpoint.
Hide for public viewers who have no auth token. */}
{!shareToken && (
<>
<DropdownMenuItem
onClick={() => handleExport("pdf")}
disabled={exporting !== null}
>
Download PDF
<DropdownMenuLabel className="text-xs text-muted-foreground">Documents</DropdownMenuLabel>
<DropdownMenuItem onClick={() => handleExport("pdf")} disabled={exporting !== null}>
PDF (.pdf)
</DropdownMenuItem>
<DropdownMenuItem
onClick={() => handleExport("docx")}
disabled={exporting !== null}
>
Download DOCX
<DropdownMenuItem onClick={() => handleExport("docx")} disabled={exporting !== null}>
Word (.docx)
</DropdownMenuItem>
<DropdownMenuItem onClick={() => handleExport("odt")} disabled={exporting !== null}>
OpenDocument (.odt)
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuLabel className="text-xs text-muted-foreground">Web &amp; E-Book</DropdownMenuLabel>
<DropdownMenuItem onClick={() => handleExport("html")} disabled={exporting !== null}>
HTML (.html)
</DropdownMenuItem>
<DropdownMenuItem onClick={() => handleExport("epub")} disabled={exporting !== null}>
EPUB (.epub)
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuLabel className="text-xs text-muted-foreground">Source &amp; Plain</DropdownMenuLabel>
<DropdownMenuItem onClick={() => handleExport("latex")} disabled={exporting !== null}>
LaTeX (.tex)
</DropdownMenuItem>
</>
)}
<DropdownMenuItem onClick={() => handleExport("md")} disabled={exporting !== null}>
Markdown (.md)
</DropdownMenuItem>
{!shareToken && (
<DropdownMenuItem onClick={() => handleExport("plain")} disabled={exporting !== null}>
Plain Text (.txt)
</DropdownMenuItem>
)}
</DropdownMenuContent>
</DropdownMenu>