Use unstructured-inference, CPU only

This commit is contained in:
Cyber MacGeddon 2026-03-28 14:04:43 +00:00
parent 8326795495
commit 54968e3e7d
2 changed files with 6 additions and 7 deletions

View file

@ -38,6 +38,11 @@ RUN ls /root/wheels
FROM base
# Pre-install CPU-only PyTorch so that unstructured[pdf]'s torch
# dependency is satisfied without pulling in CUDA (~190MB vs ~2GB+)
RUN pip3 install --no-cache-dir torch==2.11.0+cpu \
--index-url https://download.pytorch.org/whl/cpu
COPY --from=build /root/wheels /root/wheels
RUN \

View file

@ -14,13 +14,7 @@ dependencies = [
"pulsar-client",
"prometheus-client",
"python-magic",
"unstructured[csv,docx,epub,md,odt,pptx,rst,rtf,tsv,xlsx]",
"pdfminer.six",
"pdf2image",
"pikepdf",
"pi_heif",
"pypdfium2",
"unstructured.pytesseract",
"unstructured[csv,docx,epub,md,odt,pdf,pptx,rst,rtf,tsv,xlsx]",
]
classifiers = [
"Programming Language :: Python :: 3",