mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-12 19:55:17 +02:00
fix(filesystem): lazy-load pifs import surface
Avoid eager optional dependency imports when importing PageIndexFileSystem or filesystem semantic exports.
This commit is contained in:
parent
70eece52e5
commit
e9453725b6
4 changed files with 78 additions and 13 deletions
|
|
@ -2,13 +2,21 @@ import os
|
|||
|
||||
os.environ.setdefault("LITELLM_LOCAL_MODEL_COST_MAP", "true")
|
||||
|
||||
_OPTIONAL_CORE_IMPORTS = {"litellm", "openai", "PyPDF2", "pymupdf"}
|
||||
|
||||
try:
|
||||
from .page_index import *
|
||||
from .page_index_md import md_to_tree
|
||||
from .retrieve import get_document, get_document_structure, get_page_content
|
||||
from .client import PageIndexClient
|
||||
except ModuleNotFoundError as exc:
|
||||
if exc.name != "litellm":
|
||||
if exc.name not in _OPTIONAL_CORE_IMPORTS:
|
||||
raise
|
||||
|
||||
from .filesystem import PageIndexFileSystem
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name == "PageIndexFileSystem":
|
||||
from .filesystem import PageIndexFileSystem
|
||||
|
||||
return PageIndexFileSystem
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from importlib import import_module
|
||||
|
||||
from .commands import PIFSCommandExecutor
|
||||
from .core import PageIndexFileSystem
|
||||
from .hybrid_projection import HybridProjectionSearchBackend
|
||||
from .metadata_generation import (
|
||||
MetadataGenerationBackend,
|
||||
MetadataGenerationError,
|
||||
|
|
@ -8,15 +9,17 @@ from .metadata_generation import (
|
|||
MetadataGenerationResult,
|
||||
MetadataGenerator,
|
||||
)
|
||||
from .projection_indexing import SummaryProjectionIndexer
|
||||
from .semantic_index import (
|
||||
RebuildableSemanticIndex,
|
||||
SemanticIndexRecord,
|
||||
SemanticSearchResult,
|
||||
SQLiteVecSemanticIndex,
|
||||
)
|
||||
from .types import OpenResult, SearchResult
|
||||
|
||||
_LAZY_EXPORTS = {
|
||||
"HybridProjectionSearchBackend": (".hybrid_projection", "HybridProjectionSearchBackend"),
|
||||
"RebuildableSemanticIndex": (".semantic_index", "RebuildableSemanticIndex"),
|
||||
"SemanticIndexRecord": (".semantic_index", "SemanticIndexRecord"),
|
||||
"SemanticSearchResult": (".semantic_index", "SemanticSearchResult"),
|
||||
"SQLiteVecSemanticIndex": (".semantic_index", "SQLiteVecSemanticIndex"),
|
||||
"SummaryProjectionIndexer": (".projection_indexing", "SummaryProjectionIndexer"),
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
"OpenResult",
|
||||
"HybridProjectionSearchBackend",
|
||||
|
|
@ -34,3 +37,16 @@ __all__ = [
|
|||
"SummaryProjectionIndexer",
|
||||
"SQLiteVecSemanticIndex",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name in _LAZY_EXPORTS:
|
||||
module_name, attribute_name = _LAZY_EXPORTS[name]
|
||||
value = getattr(import_module(module_name, __name__), attribute_name)
|
||||
globals()[name] = value
|
||||
return value
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def __dir__() -> list[str]:
|
||||
return sorted(set(globals()) | set(__all__))
|
||||
|
|
|
|||
|
|
@ -3,10 +3,9 @@ from __future__ import annotations
|
|||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from ..client import PageIndexClient
|
||||
from .metadata import MetadataQueryEngine
|
||||
from .metadata_generation import (
|
||||
MetadataGenerationBackend,
|
||||
|
|
@ -15,7 +14,6 @@ from .metadata_generation import (
|
|||
MetadataGenerationResult,
|
||||
MetadataGenerator,
|
||||
)
|
||||
from .projection_indexing import SummaryProjectionIndexer
|
||||
from .semantic_folder_policy import (
|
||||
SEMANTIC_FOLDER_BASE_FIELDS,
|
||||
SEMANTIC_FOLDER_ROOT,
|
||||
|
|
@ -39,6 +37,10 @@ from .structural_read import (
|
|||
)
|
||||
from .types import OpenResult, SearchResult
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..client import PageIndexClient
|
||||
from .projection_indexing import SummaryProjectionIndexer
|
||||
|
||||
DEFAULT_METADATA_GENERATION_FIELDS = {
|
||||
"summary": True,
|
||||
"doc_type": True,
|
||||
|
|
@ -215,6 +217,8 @@ class PageIndexFileSystem:
|
|||
max_text_chars=self.metadata_max_text_chars,
|
||||
)
|
||||
if self.summary_projection_index and self.summary_projection_indexer is None:
|
||||
from .projection_indexing import SummaryProjectionIndexer
|
||||
|
||||
self.summary_projection_indexer = SummaryProjectionIndexer.from_provider(
|
||||
self.summary_projection_index_dir,
|
||||
embedding_provider=self.summary_projection_embedding_provider,
|
||||
|
|
@ -836,6 +840,8 @@ class PageIndexFileSystem:
|
|||
return self.workspace / "artifacts" / "pageindex_client"
|
||||
|
||||
def _pageindex_client(self) -> PageIndexClient:
|
||||
from ..client import PageIndexClient
|
||||
|
||||
return PageIndexClient(workspace=str(self.pageindex_client_workspace))
|
||||
|
||||
def _pageindex_client_doc_for_entry(self, entry: Any) -> tuple[PageIndexClient, str | None]:
|
||||
|
|
|
|||
35
tests/test_import_surface.py
Normal file
35
tests/test_import_surface.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import builtins
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
|
||||
def test_filesystem_import_works_without_eager_optional_dependencies(monkeypatch):
|
||||
blocked_roots = {"litellm", "openai", "PyPDF2", "pymupdf", "sqlite_vec"}
|
||||
real_import = builtins.__import__
|
||||
|
||||
def clear_pageindex_modules() -> None:
|
||||
for name in list(sys.modules):
|
||||
if name == "pageindex" or name.startswith("pageindex."):
|
||||
sys.modules.pop(name, None)
|
||||
|
||||
def import_without_optional_deps(name, globals=None, locals=None, fromlist=(), level=0):
|
||||
root = name.split(".", 1)[0]
|
||||
if root in blocked_roots:
|
||||
raise ModuleNotFoundError(f"No module named '{root}'", name=root)
|
||||
return real_import(name, globals, locals, fromlist, level)
|
||||
|
||||
clear_pageindex_modules()
|
||||
try:
|
||||
with monkeypatch.context() as patch:
|
||||
patch.setattr(builtins, "__import__", import_without_optional_deps)
|
||||
|
||||
filesystem_module = importlib.import_module("pageindex.filesystem")
|
||||
from pageindex import PageIndexFileSystem as TopLevelPageIndexFileSystem
|
||||
from pageindex.filesystem import PageIndexFileSystem
|
||||
|
||||
assert filesystem_module.PageIndexFileSystem is PageIndexFileSystem
|
||||
assert TopLevelPageIndexFileSystem is PageIndexFileSystem
|
||||
finally:
|
||||
clear_pageindex_modules()
|
||||
Loading…
Add table
Add a link
Reference in a new issue