mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
chore: update Dockerfiles to install Typst for PDF rendering and remove WeasyPrint dependencies
This commit is contained in:
parent
828e750320
commit
3336626336
4 changed files with 32 additions and 17 deletions
|
|
@ -121,14 +121,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
libgbm1 \
|
||||
libcairo2 \
|
||||
libpango-1.0-0 \
|
||||
# ============================
|
||||
# Report export (WeasyPrint runtime libs — Pandoc installed separately below)
|
||||
# ============================
|
||||
libpangoft2-1.0-0 \
|
||||
libharfbuzz0b \
|
||||
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
|
||||
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
|
||||
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
|
||||
|
|
@ -137,6 +129,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
|
|||
dpkg -i /tmp/pandoc.deb && \
|
||||
rm /tmp/pandoc.deb
|
||||
|
||||
# Install Typst for PDF rendering (Typst has built-in professional styling
|
||||
# for tables, headings, code blocks, etc., no CSS needed).
|
||||
RUN ARCH=$(dpkg --print-architecture) && \
|
||||
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
|
||||
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
|
||||
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
|
||||
tar -xf /tmp/typst.tar.xz -C /tmp && \
|
||||
cp /tmp/typst-*/typst /usr/local/bin/typst && \
|
||||
rm -rf /tmp/typst* && \
|
||||
typst --version
|
||||
|
||||
# Install Node.js 20.x (for running frontend)
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||
&& apt-get install -y nodejs \
|
||||
|
|
|
|||
|
|
@ -20,11 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
libxrender1 \
|
||||
dos2unix \
|
||||
git \
|
||||
libpango-1.0-0 \
|
||||
libpangoft2-1.0-0 \
|
||||
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
|
||||
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
|
||||
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering).
|
||||
|
|
@ -33,6 +28,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
|
|||
dpkg -i /tmp/pandoc.deb && \
|
||||
rm /tmp/pandoc.deb
|
||||
|
||||
# Install Typst for PDF rendering (Typst has built-in professional styling
|
||||
# for tables, headings, code blocks, etc., no CSS needed).
|
||||
RUN ARCH=$(dpkg --print-architecture) && \
|
||||
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
|
||||
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
|
||||
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
|
||||
tar -xf /tmp/typst.tar.xz -C /tmp && \
|
||||
cp /tmp/typst-*/typst /usr/local/bin/typst && \
|
||||
rm -rf /tmp/typst* && \
|
||||
typst --version
|
||||
|
||||
# Update certificates and install SSL tools
|
||||
RUN update-ca-certificates
|
||||
RUN pip install --upgrade certifi pip-system-certs
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ Report routes for read, export (PDF/DOCX), and delete operations.
|
|||
|
||||
No create or update endpoints here — reports are generated inline by the
|
||||
agent tool during chat and stored as Markdown in the database.
|
||||
Export to PDF/DOCX is on-demand via pypandoc.
|
||||
Export to PDF/DOCX is on-demand via pypandoc (PDF uses Typst as the engine).
|
||||
|
||||
Authorization: lightweight search-space membership checks (no granular RBAC)
|
||||
since reports are chat-generated artifacts, not standalone managed resources.
|
||||
|
|
@ -210,9 +210,16 @@ async def export_report(
|
|||
# pypandoc spawns a pandoc subprocess (blocking), so we run the
|
||||
# entire convert → read → cleanup pipeline in a thread executor
|
||||
# to avoid blocking the async event loop on any file I/O.
|
||||
#
|
||||
# PDF uses Typst as the rendering engine — Typst has built-in
|
||||
# professional styling for tables, headings, code blocks, etc.,
|
||||
# so no CSS injection is needed.
|
||||
#
|
||||
# Use "gfm" because LLM output uses GFM-style pipe tables that
|
||||
# pandoc's stricter default "markdown" format may fail to parse.
|
||||
extra_args = ["--standalone"]
|
||||
if format == ExportFormat.PDF:
|
||||
extra_args.append("--pdf-engine=weasyprint")
|
||||
extra_args.append("--pdf-engine=typst")
|
||||
|
||||
def _convert_and_read() -> bytes:
|
||||
"""Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread."""
|
||||
|
|
@ -222,7 +229,7 @@ async def export_report(
|
|||
pypandoc.convert_text(
|
||||
report.content,
|
||||
format.value,
|
||||
format="md",
|
||||
format="gfm",
|
||||
extra_args=extra_args,
|
||||
outputfile=tmp_path,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ dependencies = [
|
|||
"langchain-unstructured>=1.0.1",
|
||||
"slowapi>=0.1.9",
|
||||
"pypandoc>=1.16.2",
|
||||
"weasyprint>=68.1",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue