mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
chore: update Dockerfiles to install Typst for PDF rendering and remove WeasyPrint dependencies
This commit is contained in:
parent
828e750320
commit
3336626336
4 changed files with 32 additions and 17 deletions
|
|
@ -121,14 +121,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
libgbm1 \
|
libgbm1 \
|
||||||
libcairo2 \
|
libcairo2 \
|
||||||
libpango-1.0-0 \
|
libpango-1.0-0 \
|
||||||
# ============================
|
|
||||||
# Report export (WeasyPrint runtime libs — Pandoc installed separately below)
|
|
||||||
# ============================
|
|
||||||
libpangoft2-1.0-0 \
|
|
||||||
libharfbuzz0b \
|
|
||||||
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
|
|
||||||
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
|
|
||||||
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
|
# Install Pandoc 3.x from GitHub (apt ships 2.9 which has broken table rendering).
|
||||||
|
|
@ -137,6 +129,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
dpkg -i /tmp/pandoc.deb && \
|
dpkg -i /tmp/pandoc.deb && \
|
||||||
rm /tmp/pandoc.deb
|
rm /tmp/pandoc.deb
|
||||||
|
|
||||||
|
# Install Typst for PDF rendering (Typst has built-in professional styling
|
||||||
|
# for tables, headings, code blocks, etc., no CSS needed).
|
||||||
|
RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
|
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
|
||||||
|
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
|
||||||
|
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
|
||||||
|
tar -xf /tmp/typst.tar.xz -C /tmp && \
|
||||||
|
cp /tmp/typst-*/typst /usr/local/bin/typst && \
|
||||||
|
rm -rf /tmp/typst* && \
|
||||||
|
typst --version
|
||||||
|
|
||||||
# Install Node.js 20.x (for running frontend)
|
# Install Node.js 20.x (for running frontend)
|
||||||
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
||||||
&& apt-get install -y nodejs \
|
&& apt-get install -y nodejs \
|
||||||
|
|
|
||||||
|
|
@ -20,11 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
libxrender1 \
|
libxrender1 \
|
||||||
dos2unix \
|
dos2unix \
|
||||||
git \
|
git \
|
||||||
libpango-1.0-0 \
|
|
||||||
libpangoft2-1.0-0 \
|
|
||||||
# Try to install libharfbuzz-subset0 (WeasyPrint font subsetting for smaller PDFs).
|
|
||||||
# Available on Debian Bookworm+ / Ubuntu 24.04+; gracefully skipped on older distros.
|
|
||||||
&& (apt-get install -y --no-install-recommends libharfbuzz-subset0 || true) \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering).
|
# Install Pandoc 3.x from GitHub (apt ships 2.17 which has broken table rendering).
|
||||||
|
|
@ -33,6 +28,17 @@ RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
dpkg -i /tmp/pandoc.deb && \
|
dpkg -i /tmp/pandoc.deb && \
|
||||||
rm /tmp/pandoc.deb
|
rm /tmp/pandoc.deb
|
||||||
|
|
||||||
|
# Install Typst for PDF rendering (Typst has built-in professional styling
|
||||||
|
# for tables, headings, code blocks, etc., no CSS needed).
|
||||||
|
RUN ARCH=$(dpkg --print-architecture) && \
|
||||||
|
if [ "$ARCH" = "amd64" ]; then TYPST_ARCH="x86_64-unknown-linux-musl"; \
|
||||||
|
else TYPST_ARCH="aarch64-unknown-linux-musl"; fi && \
|
||||||
|
wget -qO /tmp/typst.tar.xz "https://github.com/typst/typst/releases/download/v0.13.1/typst-${TYPST_ARCH}.tar.xz" && \
|
||||||
|
tar -xf /tmp/typst.tar.xz -C /tmp && \
|
||||||
|
cp /tmp/typst-*/typst /usr/local/bin/typst && \
|
||||||
|
rm -rf /tmp/typst* && \
|
||||||
|
typst --version
|
||||||
|
|
||||||
# Update certificates and install SSL tools
|
# Update certificates and install SSL tools
|
||||||
RUN update-ca-certificates
|
RUN update-ca-certificates
|
||||||
RUN pip install --upgrade certifi pip-system-certs
|
RUN pip install --upgrade certifi pip-system-certs
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ Report routes for read, export (PDF/DOCX), and delete operations.
|
||||||
|
|
||||||
No create or update endpoints here — reports are generated inline by the
|
No create or update endpoints here — reports are generated inline by the
|
||||||
agent tool during chat and stored as Markdown in the database.
|
agent tool during chat and stored as Markdown in the database.
|
||||||
Export to PDF/DOCX is on-demand via pypandoc.
|
Export to PDF/DOCX is on-demand via pypandoc (PDF uses Typst as the engine).
|
||||||
|
|
||||||
Authorization: lightweight search-space membership checks (no granular RBAC)
|
Authorization: lightweight search-space membership checks (no granular RBAC)
|
||||||
since reports are chat-generated artifacts, not standalone managed resources.
|
since reports are chat-generated artifacts, not standalone managed resources.
|
||||||
|
|
@ -210,9 +210,16 @@ async def export_report(
|
||||||
# pypandoc spawns a pandoc subprocess (blocking), so we run the
|
# pypandoc spawns a pandoc subprocess (blocking), so we run the
|
||||||
# entire convert → read → cleanup pipeline in a thread executor
|
# entire convert → read → cleanup pipeline in a thread executor
|
||||||
# to avoid blocking the async event loop on any file I/O.
|
# to avoid blocking the async event loop on any file I/O.
|
||||||
|
#
|
||||||
|
# PDF uses Typst as the rendering engine — Typst has built-in
|
||||||
|
# professional styling for tables, headings, code blocks, etc.,
|
||||||
|
# so no CSS injection is needed.
|
||||||
|
#
|
||||||
|
# Use "gfm" because LLM output uses GFM-style pipe tables that
|
||||||
|
# pandoc's stricter default "markdown" format may fail to parse.
|
||||||
extra_args = ["--standalone"]
|
extra_args = ["--standalone"]
|
||||||
if format == ExportFormat.PDF:
|
if format == ExportFormat.PDF:
|
||||||
extra_args.append("--pdf-engine=weasyprint")
|
extra_args.append("--pdf-engine=typst")
|
||||||
|
|
||||||
def _convert_and_read() -> bytes:
|
def _convert_and_read() -> bytes:
|
||||||
"""Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread."""
|
"""Run all blocking I/O (tempfile, pandoc, file read, cleanup) in a thread."""
|
||||||
|
|
@ -222,7 +229,7 @@ async def export_report(
|
||||||
pypandoc.convert_text(
|
pypandoc.convert_text(
|
||||||
report.content,
|
report.content,
|
||||||
format.value,
|
format.value,
|
||||||
format="md",
|
format="gfm",
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
outputfile=tmp_path,
|
outputfile=tmp_path,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,6 @@ dependencies = [
|
||||||
"langchain-unstructured>=1.0.1",
|
"langchain-unstructured>=1.0.1",
|
||||||
"slowapi>=0.1.9",
|
"slowapi>=0.1.9",
|
||||||
"pypandoc>=1.16.2",
|
"pypandoc>=1.16.2",
|
||||||
"weasyprint>=68.1",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[dependency-groups]
|
[dependency-groups]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue