diff --git a/Dockerfile.allinone b/Dockerfile.allinone index 51f1d8619..3196ae0c2 100644 --- a/Dockerfile.allinone +++ b/Dockerfile.allinone @@ -121,14 +121,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgbm1 \ libcairo2 \ libpango-1.0-0 \ - # ============================ - # Report export (WeasyPrint runtime libs — Pandoc installed separately below) - # ============================ - libpangoft2-1.0-0 \ - libharfbuzz0b \ - # Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs). - # Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros. - && (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \ && rm -rf /var/lib/apt/lists/* # Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering). @@ -137,6 +129,17 @@ RUN ARCH=$(dpkg --print-architecture) && \ dpkg -i /tmp/pandoc.deb && \ rm /tmp/pandoc.deb +# Install Typst for PDF rendering (Typst has built-in professional styling +# for tables, headings, code blocks, etc., no CSS needed). +RUN ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \ + else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \ + wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \ + tar -xf /tmp/typst.tar.xz -C /tmp && \ + cp /tmp/typst-*/typst /usr/local/bin/typst && \ + rm -rf /tmp/typst* && \ + typst --version + # Install Node.js 20.x (for running frontend) RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ && apt-get install -y nodejs \ diff --git a/surfsense_backend/Dockerfile b/surfsense_backend/Dockerfile index 6efdcc47b..044eca9eb 100644 --- a/surfsense_backend/Dockerfile +++ b/surfsense_backend/Dockerfile @@ -20,11 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libxrender1 \ dos2unix \ git \ - libpango-1.0-0 \ - libpangoft2-1.0-0 \ - # Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs). - # Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros. - && (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \ && rm -rf /var/lib/apt/lists/* # Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering). @@ -33,6 +28,17 @@ RUN ARCH=$(dpkg --print-architecture) && \ dpkg -i /tmp/pandoc.deb && \ rm /tmp/pandoc.deb +# Install Typst for PDF rendering (Typst has built-in professional styling +# for tables, headings, code blocks, etc., no CSS needed). +RUN ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \ + else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \ + wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \ + tar -xf /tmp/typst.tar.xz -C /tmp && \ + cp /tmp/typst-*/typst /usr/local/bin/typst && \ + rm -rf /tmp/typst* && \ + typst --version + # Update certificates and install SSL tools RUN update-ca-certificates RUN pip install --upgrade certifi pip-system-certs diff --git a/surfsense_backend/app/routes/reports_routes.py b/surfsense_backend/app/routes/reports_routes.py index 9a250ce29..39ff320f5 100644 --- a/surfsense_backend/app/routes/reports_routes.py +++ b/surfsense_backend/app/routes/reports_routes.py @@ -3,7 +3,7 @@ Report routes for read, export (PDF/DOCX), and delete operations. No create or update endpoints here — reports are generated inline by the agent tool during chat and stored as Markdown in the database. -Export to PDF/DOCX is on-demand via pypandoc. +Export to PDF/DOCX is on-demand via pypandoc (PDF uses Typst as the engine). Authorization: lightweight search-space membership checks (no granular RBAC) since reports are chat-generated artifacts, not standalone managed resources. @@ -210,9 +210,16 @@ async def export_report( # pypandoc spawns a pandoc subprocess (blocking), so we run the # entire convert → read → cleanup pipeline in a thread executor # to avoid blocking the async event loop on any file I/O. + # + # PDF uses Typst as the rendering engine — Typst has built-in + # professional styling for tables, headings, code blocks, etc., + # so no CSS injection is needed. + # + # Use "gfm" because LLM output uses GFM-style pipe tables that + # pandoc's stricter default "markdown" format may fail to parse. extra_args = ["--standalone"] if format == ExportFormat.PDF: - extra_args.append("--pdf-engine=weasyprint") + extra_args.append("--pdf-engine=typst") def _convert_and_read() -> bytes: """Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread.""" @@ -222,7 +229,7 @@ async def export_report( pypandoc.convert_text( report.content, format.value, - format="md", + format="gfm", extra_args=extra_args, outputfile=tmp_path, ) diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 38da661b3..2abc07b5e 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -64,7 +64,6 @@ dependencies = [ "langchain-unstructured>=1.0.1", "slowapi>=0.1.9", "pypandoc>=1.16.2", - "weasyprint>=68.1", ] [dependency-groups]