mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-12 19:55:17 +02:00
60 lines
2 KiB
Python
60 lines
2 KiB
Python
import json
|
|
from types import SimpleNamespace
|
|
|
|
|
|
class SummaryBackend:
|
|
def __init__(self, document_id):
|
|
self.document_id = document_id
|
|
self.calls = []
|
|
|
|
def available_channels(self):
|
|
return ("summary",)
|
|
|
|
def search_channel(self, channel, query, *, limit=10, filters=None):
|
|
self.calls.append((channel, query, filters))
|
|
return [
|
|
SimpleNamespace(
|
|
document_id=self.document_id,
|
|
snippet=f"summary candidate: {query}",
|
|
)
|
|
]
|
|
|
|
|
|
def test_semantic_search_scope_keeps_ordinary_folders_out_of_source_type_filters(tmp_path):
|
|
from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem
|
|
|
|
filesystem = PageIndexFileSystem(workspace=tmp_path / "workspace")
|
|
filesystem.register_file(
|
|
storage_uri="file:///tmp/report.pdf",
|
|
source_path="examples/documents/report.pdf",
|
|
folder_path="/documents",
|
|
external_id="dsid_report",
|
|
title="Annual report",
|
|
metadata={"source_type": "examples-documents"},
|
|
content="Federal Reserve supervision and regulation annual report.",
|
|
)
|
|
backend = SummaryBackend("dsid_report")
|
|
filesystem.semantic_retrieval_backend = backend
|
|
executor = PIFSCommandExecutor(filesystem, json_output=True)
|
|
|
|
result = json.loads(
|
|
executor.execute('search-summary "Federal Reserve annual report" /documents')
|
|
)
|
|
|
|
assert backend.calls[0][2] == {}
|
|
assert result["data"]["data"][0]["external_id"] == "dsid_report"
|
|
|
|
|
|
def test_semantic_search_scope_filters_explicit_source_type_facets():
|
|
from pageindex.filesystem import PageIndexFileSystem
|
|
|
|
assert PageIndexFileSystem._semantic_filters_for_scope(
|
|
{"folder_path": "/source_type=google-drive"}
|
|
) == {"source_type": "google_drive"}
|
|
assert PageIndexFileSystem._semantic_filters_for_scope(
|
|
{"folder_path": "/semantic/source_type=google-drive"}
|
|
) == {"source_type": "google_drive"}
|
|
assert PageIndexFileSystem._semantic_filters_for_scope(
|
|
{"folder_path": "/documents"}
|
|
) == {}
|
|
|