chore: update Dockerfiles to install Typst for PDF rendering and remove WeasyPrint dependencies

This commit is contained in:
Anish Sarkar 2026-02-12 15:57:35 +05:30
parent 828e750320
commit 3336626336
4 changed files with 32 additions and 17 deletions

View file

@ -121,14 +121,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgbm1 \
libcairo2 \
libpango-1.0-0 \
# ============================
# Report export (WeasyPrint runtime libs — Pandoc installed separately below)
# ============================
libpangoft2-1.0-0 \
libharfbuzz0b \
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
&& rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
@ -137,6 +129,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb
# Install Typst for PDF rendering (Typst has built-in professional styling
# for tables, headings, code blocks, etc., no CSS needed).
RUN ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
tar -xf /tmp/typst.tar.xz -C /tmp && \
cp /tmp/typst-*/typst /usr/local/bin/typst && \
rm -rf /tmp/typst* && \
typst --version
# Install Node.js 20.x (for running frontend)
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \

View file

@ -20,11 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libxrender1 \
dos2unix \
git \
libpango-1.0-0 \
libpangoft2-1.0-0 \
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
&& rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering).
@ -33,6 +28,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb
# Install Typst for PDF rendering (Typst has built-in professional styling
# for tables, headings, code blocks, etc., no CSS needed).
RUN ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
tar -xf /tmp/typst.tar.xz -C /tmp && \
cp /tmp/typst-*/typst /usr/local/bin/typst && \
rm -rf /tmp/typst* && \
typst --version
# Update certificates and install SSL tools
RUN update-ca-certificates
RUN pip install --upgrade certifi pip-system-certs

View file

@ -3,7 +3,7 @@ Report routes for read, export (PDF/DOCX), and delete operations.
No create or update endpoints here reports are generated inline by the
agent tool during chat and stored as Markdown in the database.
Export to PDF/DOCX is on-demand via pypandoc.
Export to PDF/DOCX is on-demand via pypandoc (PDF uses Typst as the engine).
Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources.
@ -210,9 +210,16 @@ async def export_report(
# pypandoc spawns a pandoc subprocess (blocking), so we run the
# entire convert → read → cleanup pipeline in a thread executor
# to avoid blocking the async event loop on any file I/O.
#
# PDF uses Typst as the rendering engine — Typst has built-in
# professional styling for tables, headings, code blocks, etc.,
# so no CSS injection is needed.
#
# Use "gfm" because LLM output uses GFM-style pipe tables that
# pandoc's stricter default "markdown" format may fail to parse.
extra_args = ["--standalone"]
if format == ExportFormat.PDF:
extra_args.append("--pdf-engine=weasyprint")
extra_args.append("--pdf-engine=typst")
def _convert_and_read() -> bytes:
"""Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread."""
@ -222,7 +229,7 @@ async def export_report(
pypandoc.convert_text(
report.content,
format.value,
format="md",
format="gfm",
extra_args=extra_args,
outputfile=tmp_path,
)

View file

@ -64,7 +64,6 @@ dependencies = [
"langchain-unstructured>=1.0.1",
"slowapi>=0.1.9",
"pypandoc>=1.16.2",
"weasyprint>=68.1",
]
[dependency-groups]