Add missing pdf extra to unstructured dependency (#728)

* Fix PDF processing deps so that PDF processing works
This commit is contained in:
cybermaggedon 2026-03-29 20:22:06 +01:00 committed by Cyber MacGeddon
parent 20204d87c3
commit 413f917676
2 changed files with 7 additions and 2 deletions

View file

@ -7,7 +7,7 @@ FROM docker.io/fedora:42 AS base
ENV PIP_BREAK_SYSTEM_PACKAGES=1
RUN dnf install -y python3.13 && \
RUN dnf install -y python3.13 libxcb mesa-libGL && \
alternatives --install /usr/bin/python python /usr/bin/python3.13 1 && \
python -m ensurepip --upgrade && \
pip3 install --no-cache-dir build wheel aiohttp && \
@ -38,6 +38,11 @@ RUN ls /root/wheels
FROM base
# Pre-install CPU-only PyTorch so that unstructured[pdf]'s torch
# dependency is satisfied without pulling in CUDA (~190MB vs ~2GB+)
RUN pip3 install --no-cache-dir torch==2.11.0+cpu \
--index-url https://download.pytorch.org/whl/cpu
COPY --from=build /root/wheels /root/wheels
RUN \