fix: repair deferred imports to preserve module-level names for test patching (#831)

A previous commit moved SDK imports into __init__/methods and
stashed them on self, which broke @patch targets in 24 unit tests.

This fixes the approach: chunker and pdf_decoder use module-level
sentinels with global/if-None guards so imports are still deferred but
patchable. Google AI Studio reverts to standard module-level imports
since the module is only loaded when communicating with Gemini.
Keeps lazy loading on other imports.
This commit is contained in:
cybermaggedon 2026-04-18 11:43:21 +01:00 committed by GitHub
parent d7745baab4
commit cce3acd84f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 48 additions and 36 deletions

View file

@ -10,6 +10,8 @@ from prometheus_client import Histogram
from ... schema import TextDocument, Chunk, Metadata, Triples
from ... base import ChunkingService, ConsumerSpec, ProducerSpec
RecursiveCharacterTextSplitter = None
from ... provenance import (
chunk_uri as make_chunk_uri, derived_entity_triples,
set_graph, GRAPH_SOURCE,
@ -41,8 +43,12 @@ class Processor(ChunkingService):
self.default_chunk_size = chunk_size
self.default_chunk_overlap = chunk_overlap
from langchain_text_splitters import RecursiveCharacterTextSplitter
self.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter
global RecursiveCharacterTextSplitter
if RecursiveCharacterTextSplitter is None:
from langchain_text_splitters import (
RecursiveCharacterTextSplitter as _cls,
)
RecursiveCharacterTextSplitter = _cls
if not hasattr(__class__, "chunk_metric"):
__class__.chunk_metric = Histogram(
@ -52,7 +58,7 @@ class Processor(ChunkingService):
2500, 4000, 6400, 10000, 16000]
)
self.text_splitter = self.RecursiveCharacterTextSplitter(
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
@ -105,7 +111,7 @@ class Processor(ChunkingService):
chunk_overlap = int(chunk_overlap)
# Create text splitter with effective parameters
text_splitter = self.RecursiveCharacterTextSplitter(
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,

View file

@ -10,6 +10,8 @@ from prometheus_client import Histogram
from ... schema import TextDocument, Chunk, Metadata, Triples
from ... base import ChunkingService, ConsumerSpec, ProducerSpec
TokenTextSplitter = None
from ... provenance import (
chunk_uri as make_chunk_uri, derived_entity_triples,
set_graph, GRAPH_SOURCE,
@ -41,8 +43,10 @@ class Processor(ChunkingService):
self.default_chunk_size = chunk_size
self.default_chunk_overlap = chunk_overlap
from langchain_text_splitters import TokenTextSplitter
self.TokenTextSplitter = TokenTextSplitter
global TokenTextSplitter
if TokenTextSplitter is None:
from langchain_text_splitters import TokenTextSplitter as _cls
TokenTextSplitter = _cls
if not hasattr(__class__, "chunk_metric"):
__class__.chunk_metric = Histogram(
@ -52,7 +56,7 @@ class Processor(ChunkingService):
2500, 4000, 6400, 10000, 16000]
)
self.text_splitter = self.TokenTextSplitter(
self.text_splitter = TokenTextSplitter(
encoding_name="cl100k_base",
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
@ -104,7 +108,7 @@ class Processor(ChunkingService):
chunk_overlap = int(chunk_overlap)
# Create text splitter with effective parameters
text_splitter = self.TokenTextSplitter(
text_splitter = TokenTextSplitter(
encoding_name="cl100k_base",
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,

View file

@ -15,6 +15,9 @@ from ... schema import Document, TextDocument, Metadata
from ... schema import librarian_request_queue, librarian_response_queue
from ... schema import Triples
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec, LibrarianClient
PyPDFLoader = None
from ... provenance import (
document_uri, page_uri as make_page_uri, derived_entity_triples,
set_graph, GRAPH_SOURCE,
@ -128,7 +131,12 @@ class Processor(FlowProcessor):
fp.write(base64.b64decode(v.data))
fp.close()
from langchain_community.document_loaders import PyPDFLoader
global PyPDFLoader
if PyPDFLoader is None:
from langchain_community.document_loaders import (
PyPDFLoader as _cls,
)
PyPDFLoader = _cls
loader = PyPDFLoader(temp_path)
pages = loader.load()