Added module which does OCR for PDF, pdf-ocr in a separate package (#324)

(has a lot of dependencies).  Uses Tesseract.
This commit is contained in:
cybermaggedon 2025-03-20 09:29:40 +00:00 committed by GitHub
parent cbfe37fec7
commit c759d55734
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 208 additions and 0 deletions

View file

@ -45,6 +45,7 @@ COPY trustgraph-vertexai/ /root/build/trustgraph-vertexai/
COPY trustgraph-bedrock/ /root/build/trustgraph-bedrock/
COPY trustgraph-embeddings-hf/ /root/build/trustgraph-embeddings-hf/
COPY trustgraph-cli/ /root/build/trustgraph-cli/
COPY trustgraph-ocr/ /root/build/trustgraph-ocr/
WORKDIR /root/build/
@ -54,6 +55,7 @@ RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-vertexai/
RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-bedrock/
RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-embeddings-hf/
RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-cli/
RUN pip3 wheel -w /root/wheels/ --no-deps ./trustgraph-ocr/
RUN ls /root/wheels
@ -72,6 +74,7 @@ RUN \
pip3 install /root/wheels/trustgraph_bedrock-* && \
pip3 install /root/wheels/trustgraph_embeddings_hf-* && \
pip3 install /root/wheels/trustgraph_cli-* && \
pip3 install /root/wheels/trustgraph_ocr-* && \
pip3 cache purge && \
rm -rf /root/wheels