import json from types import SimpleNamespace class SummaryBackend: def __init__(self, document_id): self.document_id = document_id self.calls = [] def available_channels(self): return ("summary",) def search_channel(self, channel, query, *, limit=10, filters=None): self.calls.append((channel, query, filters)) return [ SimpleNamespace( document_id=self.document_id, snippet=f"summary candidate: {query}", ) ] def test_semantic_search_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace") filesystem.register_file( storage_uri="file:///tmp/report.pdf", source_path="examples/documents/report.pdf", folder_path="/documents", external_id="dsid_report", title="Annual report", metadata={"source_type": "examples-documents"}, content="Federal Reserve supervision and regulation annual report.", ) backend = SummaryBackend("dsid_report") filesystem.semantic_retrieval_backend = backend executor = PIFSCommandExecutor(filesystem, json_output=True) result = json.loads( executor.execute('search-summary "Federal Reserve annual report" /documents') ) assert backend.calls[0][2] == {} assert result["data"]["data"][0]["external_id"] == "dsid_report" def test_semantic_search_scope_filters_explicit_source_type_facets(): from pageindex.filesystem import PageIndexFileSystem assert PageIndexFileSystem._semantic_filters_for_scope( {"folder_path": "/source_type=google-drive"} ) == {"source_type": "google_drive"} assert PageIndexFileSystem._semantic_filters_for_scope( {"folder_path": "/semantic/source_type=google-drive"} ) == {"source_type": "google_drive"} assert PageIndexFileSystem._semantic_filters_for_scope( {"folder_path": "/documents"} ) == {}