chore: update Dockerfiles to install Typst for PDF rendering and remove WeasyPrint dependencies

This commit is contained in:
Anish Sarkar 2026-02-12 15:57:35 +05:30
parent 828e750320
commit 3336626336
4 changed files with 32 additions and 17 deletions

View file

@ -121,14 +121,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgbm1 \ libgbm1 \
libcairo2 \ libcairo2 \
libpango-1.0-0 \ libpango-1.0-0 \
# ============================
# Report export (WeasyPrint runtime libs — Pandoc installed separately below)
# ============================
libpangoft2-1.0-0 \
libharfbuzz0b \
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering). # Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
@ -137,6 +129,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
dpkg -i /tmp/pandoc.deb && \ dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb rm /tmp/pandoc.deb
# Install Typst for PDF rendering (Typst has built-in professional styling
# for tables, headings, code blocks, etc., no CSS needed).
RUN ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
tar -xf /tmp/typst.tar.xz -C /tmp && \
cp /tmp/typst-*/typst /usr/local/bin/typst && \
rm -rf /tmp/typst* && \
typst --version
# Install Node.js 20.x (for running frontend) # Install Node.js 20.x (for running frontend)
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \ && apt-get install -y nodejs \

View file

@ -20,11 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libxrender1 \ libxrender1 \
dos2unix \ dos2unix \
git \ git \
libpango-1.0-0 \
libpangoft2-1.0-0 \
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering). # Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering).
@ -33,6 +28,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
dpkg -i /tmp/pandoc.deb && \ dpkg -i /tmp/pandoc.deb && \
rm /tmp/pandoc.deb rm /tmp/pandoc.deb
# Install Typst for PDF rendering (Typst has built-in professional styling
# for tables, headings, code blocks, etc., no CSS needed).
RUN ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
tar -xf /tmp/typst.tar.xz -C /tmp && \
cp /tmp/typst-*/typst /usr/local/bin/typst && \
rm -rf /tmp/typst* && \
typst --version
# Update certificates and install SSL tools # Update certificates and install SSL tools
RUN update-ca-certificates RUN update-ca-certificates
RUN pip install --upgrade certifi pip-system-certs RUN pip install --upgrade certifi pip-system-certs

View file

@ -3,7 +3,7 @@ Report routes for read, export (PDF/DOCX), and delete operations.
No create or update endpoints here reports are generated inline by the No create or update endpoints here reports are generated inline by the
agent tool during chat and stored as Markdown in the database. agent tool during chat and stored as Markdown in the database.
Export to PDF/DOCX is on-demand via pypandoc. Export to PDF/DOCX is on-demand via pypandoc (PDF uses Typst as the engine).
Authorization: lightweight search-space membership checks (no granular RBAC) Authorization: lightweight search-space membership checks (no granular RBAC)
since reports are chat-generated artifacts, not standalone managed resources. since reports are chat-generated artifacts, not standalone managed resources.
@ -210,9 +210,16 @@ async def export_report(
# pypandoc spawns a pandoc subprocess (blocking), so we run the # pypandoc spawns a pandoc subprocess (blocking), so we run the
# entire convert → read → cleanup pipeline in a thread executor # entire convert → read → cleanup pipeline in a thread executor
# to avoid blocking the async event loop on any file I/O. # to avoid blocking the async event loop on any file I/O.
#
# PDF uses Typst as the rendering engine — Typst has built-in
# professional styling for tables, headings, code blocks, etc.,
# so no CSS injection is needed.
#
# Use "gfm" because LLM output uses GFM-style pipe tables that
# pandoc's stricter default "markdown" format may fail to parse.
extra_args = ["--standalone"] extra_args = ["--standalone"]
if format == ExportFormat.PDF: if format == ExportFormat.PDF:
extra_args.append("--pdf-engine=weasyprint") extra_args.append("--pdf-engine=typst")
def _convert_and_read() -> bytes: def _convert_and_read() -> bytes:
"""Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread.""" """Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread."""
@ -222,7 +229,7 @@ async def export_report(
pypandoc.convert_text( pypandoc.convert_text(
report.content, report.content,
format.value, format.value,
format="md", format="gfm",
extra_args=extra_args, extra_args=extra_args,
outputfile=tmp_path, outputfile=tmp_path,
) )

View file

@ -64,7 +64,6 @@ dependencies = [
"langchain-unstructured>=1.0.1", "langchain-unstructured>=1.0.1",
"slowapi>=0.1.9", "slowapi>=0.1.9",
"pypandoc>=1.16.2", "pypandoc>=1.16.2",
"weasyprint>=68.1",
] ]
[dependency-groups] [dependency-groups]