diff --git a/pageindex/filesystem/__init__.py b/pageindex/filesystem/__init__.py index 2c8fd1b..7908393 100644 --- a/pageindex/filesystem/__init__.py +++ b/pageindex/filesystem/__init__.py @@ -1,4 +1,5 @@ from importlib import import_module +from typing import TYPE_CHECKING from .commands import PIFSCommandExecutor from .core import PageIndexFileSystem @@ -11,6 +12,16 @@ from .metadata_generation import ( ) from .types import OpenResult, SearchResult +if TYPE_CHECKING: + from .hybrid_projection import HybridProjectionSearchBackend + from .projection_indexing import SummaryProjectionIndexer + from .semantic_index import ( + RebuildableSemanticIndex, + SemanticIndexRecord, + SemanticSearchResult, + SQLiteVecSemanticIndex, + ) + _LAZY_EXPORTS = { "HybridProjectionSearchBackend": (".hybrid_projection", "HybridProjectionSearchBackend"), "RebuildableSemanticIndex": (".semantic_index", "RebuildableSemanticIndex"), @@ -49,4 +60,4 @@ def __getattr__(name: str): def __dir__() -> list[str]: - return sorted(set(globals()) | set(__all__)) + return sorted(set(globals()) | set(__all__) | set(_LAZY_EXPORTS)) diff --git a/pageindex/filesystem/cli.py b/pageindex/filesystem/cli.py index 24a78f4..8af12e6 100644 --- a/pageindex/filesystem/cli.py +++ b/pageindex/filesystem/cli.py @@ -284,7 +284,6 @@ def main(argv: list[str] | None = None) -> int: except Exception as exc: print(f"ERROR: {exc}", file=sys.stderr) return 1 - return 0 if __name__ == "__main__": diff --git a/pageindex/filesystem/commands.py b/pageindex/filesystem/commands.py index 6341b8b..73470a6 100644 --- a/pageindex/filesystem/commands.py +++ b/pageindex/filesystem/commands.py @@ -1050,7 +1050,6 @@ class PIFSCommandExecutor: row["folder_paths"] = folder_paths metadata = info.get("metadata") or {} raw_value = metadata.get(field) - value_text = "" if raw_value is None else str(raw_value) row.update( { "field": field, @@ -1652,7 +1651,7 @@ class PIFSCommandExecutor: storage_path = Path(row["storage_uri"]) source_path = Path(row["source_path"]) root = storage_path - for _part in source_path.parts: + for _ in range(len(source_path.parts)): root = root.parent return root diff --git a/tests/test_pageindex_filesystem_scope.py b/tests/test_pageindex_filesystem_scope.py index 5bd730c..7c9e31b 100644 --- a/tests/test_pageindex_filesystem_scope.py +++ b/tests/test_pageindex_filesystem_scope.py @@ -4,6 +4,36 @@ from types import SimpleNamespace import pytest +def test_filesystem_lazy_exports_remain_public(): + import pageindex.filesystem as filesystem + from pageindex.filesystem import ( + HybridProjectionSearchBackend, + RebuildableSemanticIndex, + SemanticIndexRecord, + SemanticSearchResult, + SQLiteVecSemanticIndex, + SummaryProjectionIndexer, + ) + + for name in ( + "HybridProjectionSearchBackend", + "RebuildableSemanticIndex", + "SemanticIndexRecord", + "SemanticSearchResult", + "SQLiteVecSemanticIndex", + "SummaryProjectionIndexer", + ): + assert name in filesystem.__all__ + assert name in dir(filesystem) + + assert HybridProjectionSearchBackend.__name__ == "HybridProjectionSearchBackend" + assert RebuildableSemanticIndex.__name__ == "RebuildableSemanticIndex" + assert SemanticIndexRecord.__name__ == "SemanticIndexRecord" + assert SemanticSearchResult.__name__ == "SemanticSearchResult" + assert SQLiteVecSemanticIndex.__name__ == "SQLiteVecSemanticIndex" + assert SummaryProjectionIndexer.__name__ == "SummaryProjectionIndexer" + + class SummaryBackend: def __init__(self, document_id): self.document_id = document_id diff --git a/tests/test_pageindex_structural_read.py b/tests/test_pageindex_structural_read.py index f5b6dea..a78f1d1 100644 --- a/tests/test_pageindex_structural_read.py +++ b/tests/test_pageindex_structural_read.py @@ -640,7 +640,8 @@ def test_cat_all_is_limited_to_text_files(): filesystem.open("dsid_md_file") with pytest.raises(PIFSCommandError, match="only supported for txt/text files"): executor.execute("cat dsid_json_file --all") - assert filesystem.open("dsid_json_file").text == '{"body":"json"}' + opened_json = filesystem.open("dsid_json_file") + assert opened_json.text == '{"body":"json"}' for command in ( "head dsid_pdf_file", "tail dsid_pdf_file",