fix: repair deferred imports to preserve module-level names for test patching (#831)

A previous commit moved SDK imports into __init__/methods and stashed them on self, which broke @patch targets in 24 unit tests. This fixes the approach: chunker and pdf_decoder use module-level sentinels with global/if-None guards so imports are still deferred but patchable. Google AI Studio reverts to standard module-level imports since the module is only loaded when communicating with Gemini. Keeps lazy loading on other imports.
2026-06-14 01:05:14 +02:00 · 2026-04-18 11:43:21 +01:00 · 2026-04-18 11:43:21 +01:00 · cce3acd84f
commit cce3acd84f
parent d7745baab4
4 changed files with 48 additions and 36 deletions
--- a/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
+++ b/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
@ -15,6 +15,9 @@ from ... schema import Document, TextDocument, Metadata
 from ... schema import librarian_request_queue, librarian_response_queue
 from ... schema import Triples
 from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianClient
+
+PyPDFLoader = None
+
 from ... provenance import (
    document_uri, page_uri as make_page_uri, derived_entity_triples,
    set_graph, GRAPH_SOURCE,
@ -128,7 +131,12 @@ class Processor(FlowProcessor):
                fp.write(base64.b64decode(v.data))
                fp.close()

-            from langchain_community.document_loaders import PyPDFLoader
+            global PyPDFLoader
+            if PyPDFLoader is None:
+                from langchain_community.document_loaders import (
+                    PyPDFLoader as _cls,
+                )
+                PyPDFLoader = _cls
            loader = PyPDFLoader(temp_path)
            pages = loader.load()