PageIndex/tests/test_pageindex_filesystem_scope.py

60 lines
2 KiB
Python

import json
from types import SimpleNamespace
class SummaryBackend:
def __init__(self, document_id):
self.document_id = document_id
self.calls = []
def available_channels(self):
return ("summary",)
def search_channel(self, channel, query, *, limit=10, filters=None):
self.calls.append((channel, query, filters))
return [
SimpleNamespace(
document_id=self.document_id,
snippet=f"summary candidate: {query}",
)
]
def test_semantic_search_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path):
from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
filesystem.register_file(
storage_uri="file:///tmp/report.pdf",
source_path="examples/documents/report.pdf",
folder_path="/documents",
external_id="dsid_report",
title="Annual report",
metadata={"source_type": "examples-documents"},
content="Federal Reserve supervision and regulation annual report.",
)
backend = SummaryBackend("dsid_report")
filesystem.semantic_retrieval_backend = backend
executor = PIFSCommandExecutor(filesystem, json_output=True)
result = json.loads(
executor.execute('search-summary "Federal Reserve annual report" /documents')
)
assert backend.calls[0][2] == {}
assert result["data"]["data"][0]["external_id"] == "dsid_report"
def test_semantic_search_scope_filters_explicit_source_type_facets():
from pageindex.filesystem import PageIndexFileSystem
assert PageIndexFileSystem._semantic_filters_for_scope(
{"folder_path": "/source_type=google-drive"}
) == {"source_type": "google_drive"}
assert PageIndexFileSystem._semantic_filters_for_scope(
{"folder_path": "/semantic/source_type=google-drive"}
) == {"source_type": "google_drive"}
assert PageIndexFileSystem._semantic_filters_for_scope(
{"folder_path": "/documents"}
) == {}