mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-05-01 02:56:21 +02:00
feat: add PageIndex SDK with local/cloud dual-mode support (#207)
This commit is contained in:
parent
f2dcffc0b7
commit
c7fe93bb56
45 changed files with 4225 additions and 274 deletions
14
tests/test_agent.py
Normal file
14
tests/test_agent.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
from pageindex.agent import AgentRunner, SYSTEM_PROMPT
|
||||
from pageindex.backend.protocol import AgentTools
|
||||
|
||||
|
||||
def test_agent_runner_init():
|
||||
tools = AgentTools(function_tools=["mock_tool"])
|
||||
runner = AgentRunner(tools=tools, model="gpt-4o")
|
||||
assert runner._model == "gpt-4o"
|
||||
|
||||
|
||||
def test_system_prompt_has_tool_instructions():
|
||||
assert "list_documents" in SYSTEM_PROMPT
|
||||
assert "get_document_structure" in SYSTEM_PROMPT
|
||||
assert "get_page_content" in SYSTEM_PROMPT
|
||||
51
tests/test_client.py
Normal file
51
tests/test_client.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# tests/sdk/test_client.py
|
||||
import pytest
|
||||
from pageindex.client import PageIndexClient, LocalClient, CloudClient
|
||||
|
||||
|
||||
def test_local_client_is_pageindex_client(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
assert isinstance(client, PageIndexClient)
|
||||
|
||||
|
||||
def test_cloud_client_is_pageindex_client():
|
||||
client = CloudClient(api_key="pi-test")
|
||||
assert isinstance(client, PageIndexClient)
|
||||
|
||||
|
||||
def test_collection_default_name(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
col = client.collection()
|
||||
assert col.name == "default"
|
||||
|
||||
|
||||
def test_collection_custom_name(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
col = client.collection("papers")
|
||||
assert col.name == "papers"
|
||||
|
||||
|
||||
def test_list_collections_empty(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
assert client.list_collections() == []
|
||||
|
||||
|
||||
def test_list_collections_after_create(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
client.collection("papers")
|
||||
assert "papers" in client.list_collections()
|
||||
|
||||
|
||||
def test_delete_collection(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
client.collection("papers")
|
||||
client.delete_collection("papers")
|
||||
assert "papers" not in client.list_collections()
|
||||
|
||||
|
||||
def test_register_parser(tmp_path):
|
||||
client = LocalClient(model="gpt-4o", storage_path=str(tmp_path / "pi"))
|
||||
class FakeParser:
|
||||
def supported_extensions(self): return [".txt"]
|
||||
def parse(self, file_path, **kwargs): pass
|
||||
client.register_parser(FakeParser())
|
||||
16
tests/test_cloud_backend.py
Normal file
16
tests/test_cloud_backend.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
from pageindex.backend.cloud import CloudBackend, API_BASE
|
||||
|
||||
|
||||
def test_cloud_backend_init():
|
||||
backend = CloudBackend(api_key="pi-test")
|
||||
assert backend._api_key == "pi-test"
|
||||
assert backend._headers["api_key"] == "pi-test"
|
||||
|
||||
|
||||
def test_api_base_url():
|
||||
assert "pageindex.ai" in API_BASE
|
||||
|
||||
|
||||
def test_get_retrieve_model_is_none():
|
||||
backend = CloudBackend(api_key="pi-test")
|
||||
assert backend.get_agent_tools("col").function_tools == []
|
||||
41
tests/test_collection.py
Normal file
41
tests/test_collection.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# tests/sdk/test_collection.py
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from pageindex.collection import Collection
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def col():
|
||||
backend = MagicMock()
|
||||
backend.list_documents.return_value = [
|
||||
{"doc_id": "d1", "doc_name": "paper.pdf", "doc_type": "pdf"}
|
||||
]
|
||||
backend.get_document.return_value = {"doc_id": "d1", "doc_name": "paper.pdf"}
|
||||
backend.add_document.return_value = "d1"
|
||||
return Collection(name="papers", backend=backend)
|
||||
|
||||
|
||||
def test_add(col):
|
||||
doc_id = col.add("paper.pdf")
|
||||
assert doc_id == "d1"
|
||||
col._backend.add_document.assert_called_once_with("papers", "paper.pdf")
|
||||
|
||||
|
||||
def test_list_documents(col):
|
||||
docs = col.list_documents()
|
||||
assert len(docs) == 1
|
||||
assert docs[0]["doc_id"] == "d1"
|
||||
|
||||
|
||||
def test_get_document(col):
|
||||
doc = col.get_document("d1")
|
||||
assert doc["doc_name"] == "paper.pdf"
|
||||
|
||||
|
||||
def test_delete_document(col):
|
||||
col.delete_document("d1")
|
||||
col._backend.delete_document.assert_called_once_with("papers", "d1")
|
||||
|
||||
|
||||
def test_name_property(col):
|
||||
assert col.name == "papers"
|
||||
28
tests/test_config.py
Normal file
28
tests/test_config.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# tests/test_config.py
|
||||
import pytest
|
||||
from pageindex.config import IndexConfig
|
||||
|
||||
|
||||
def test_defaults():
|
||||
config = IndexConfig()
|
||||
assert config.model == "gpt-4o-2024-11-20"
|
||||
assert config.retrieve_model is None
|
||||
assert config.toc_check_page_num == 20
|
||||
|
||||
|
||||
def test_overrides():
|
||||
config = IndexConfig(model="gpt-5.4", retrieve_model="claude-sonnet")
|
||||
assert config.model == "gpt-5.4"
|
||||
assert config.retrieve_model == "claude-sonnet"
|
||||
|
||||
|
||||
def test_unknown_key_raises():
|
||||
with pytest.raises(Exception):
|
||||
IndexConfig(nonexistent_key="value")
|
||||
|
||||
|
||||
def test_model_copy_with_update():
|
||||
config = IndexConfig(toc_check_page_num=30)
|
||||
updated = config.model_copy(update={"model": "gpt-5.4"})
|
||||
assert updated.model == "gpt-5.4"
|
||||
assert updated.toc_check_page_num == 30
|
||||
45
tests/test_content_node.py
Normal file
45
tests/test_content_node.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
from pageindex.parser.protocol import ContentNode, ParsedDocument, DocumentParser
|
||||
|
||||
|
||||
def test_content_node_required_fields():
|
||||
node = ContentNode(content="hello", tokens=5)
|
||||
assert node.content == "hello"
|
||||
assert node.tokens == 5
|
||||
assert node.title is None
|
||||
assert node.index is None
|
||||
assert node.level is None
|
||||
|
||||
|
||||
def test_content_node_all_fields():
|
||||
node = ContentNode(content="# Intro", tokens=10, title="Intro", index=1, level=1)
|
||||
assert node.title == "Intro"
|
||||
assert node.index == 1
|
||||
assert node.level == 1
|
||||
|
||||
|
||||
def test_parsed_document():
|
||||
nodes = [ContentNode(content="page1", tokens=100, index=1)]
|
||||
doc = ParsedDocument(doc_name="test.pdf", nodes=nodes)
|
||||
assert doc.doc_name == "test.pdf"
|
||||
assert len(doc.nodes) == 1
|
||||
assert doc.metadata is None
|
||||
|
||||
|
||||
def test_parsed_document_with_metadata():
|
||||
nodes = [ContentNode(content="page1", tokens=100)]
|
||||
doc = ParsedDocument(doc_name="test.pdf", nodes=nodes, metadata={"author": "John"})
|
||||
assert doc.metadata["author"] == "John"
|
||||
|
||||
|
||||
def test_document_parser_protocol():
|
||||
"""Verify a class implementing DocumentParser is structurally compatible."""
|
||||
class MyParser:
|
||||
def supported_extensions(self) -> list[str]:
|
||||
return [".txt"]
|
||||
def parse(self, file_path: str, **kwargs) -> ParsedDocument:
|
||||
return ParsedDocument(doc_name="test", nodes=[])
|
||||
|
||||
parser = MyParser()
|
||||
assert parser.supported_extensions() == [".txt"]
|
||||
result = parser.parse("test.txt")
|
||||
assert isinstance(result, ParsedDocument)
|
||||
27
tests/test_errors.py
Normal file
27
tests/test_errors.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
from pageindex.errors import (
|
||||
PageIndexError,
|
||||
CollectionNotFoundError,
|
||||
DocumentNotFoundError,
|
||||
IndexingError,
|
||||
CloudAPIError,
|
||||
FileTypeError,
|
||||
)
|
||||
|
||||
|
||||
def test_all_errors_inherit_from_base():
|
||||
for cls in [CollectionNotFoundError, DocumentNotFoundError, IndexingError, CloudAPIError, FileTypeError]:
|
||||
assert issubclass(cls, PageIndexError)
|
||||
assert issubclass(cls, Exception)
|
||||
|
||||
|
||||
def test_error_message():
|
||||
err = FileTypeError("Unsupported: .docx")
|
||||
assert str(err) == "Unsupported: .docx"
|
||||
|
||||
|
||||
def test_catch_base_catches_all():
|
||||
for cls in [CollectionNotFoundError, DocumentNotFoundError, IndexingError, CloudAPIError, FileTypeError]:
|
||||
try:
|
||||
raise cls("test")
|
||||
except PageIndexError:
|
||||
pass # expected
|
||||
26
tests/test_events.py
Normal file
26
tests/test_events.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
from pageindex.events import QueryEvent
|
||||
from pageindex.backend.protocol import AgentTools
|
||||
|
||||
|
||||
def test_query_event():
|
||||
event = QueryEvent(type="answer_delta", data="hello")
|
||||
assert event.type == "answer_delta"
|
||||
assert event.data == "hello"
|
||||
|
||||
|
||||
def test_query_event_types():
|
||||
for t in ["reasoning", "tool_call", "tool_result", "answer_delta", "answer_done"]:
|
||||
event = QueryEvent(type=t, data="test")
|
||||
assert event.type == t
|
||||
|
||||
|
||||
def test_agent_tools_default_empty():
|
||||
tools = AgentTools()
|
||||
assert tools.function_tools == []
|
||||
assert tools.mcp_servers == []
|
||||
|
||||
|
||||
def test_agent_tools_with_values():
|
||||
tools = AgentTools(function_tools=["tool1"], mcp_servers=["server1"])
|
||||
assert len(tools.function_tools) == 1
|
||||
assert len(tools.mcp_servers) == 1
|
||||
50
tests/test_local_backend.py
Normal file
50
tests/test_local_backend.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# tests/sdk/test_local_backend.py
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from pageindex.backend.local import LocalBackend
|
||||
from pageindex.storage.sqlite import SQLiteStorage
|
||||
from pageindex.errors import FileTypeError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def backend(tmp_path):
|
||||
storage = SQLiteStorage(str(tmp_path / "test.db"))
|
||||
files_dir = tmp_path / "files"
|
||||
return LocalBackend(storage=storage, files_dir=str(files_dir), model="gpt-4o")
|
||||
|
||||
|
||||
def test_collection_lifecycle(backend):
|
||||
backend.get_or_create_collection("papers")
|
||||
assert "papers" in backend.list_collections()
|
||||
backend.delete_collection("papers")
|
||||
assert "papers" not in backend.list_collections()
|
||||
|
||||
|
||||
def test_list_documents_empty(backend):
|
||||
backend.get_or_create_collection("papers")
|
||||
assert backend.list_documents("papers") == []
|
||||
|
||||
|
||||
def test_unsupported_file_type_raises(backend, tmp_path):
|
||||
backend.get_or_create_collection("papers")
|
||||
bad_file = tmp_path / "test.xyz"
|
||||
bad_file.write_text("hello")
|
||||
with pytest.raises(FileTypeError):
|
||||
backend.add_document("papers", str(bad_file))
|
||||
|
||||
|
||||
def test_register_custom_parser(backend):
|
||||
from pageindex.parser.protocol import ParsedDocument, ContentNode
|
||||
|
||||
class TxtParser:
|
||||
def supported_extensions(self):
|
||||
return [".txt"]
|
||||
def parse(self, file_path, **kwargs):
|
||||
text = Path(file_path).read_text()
|
||||
return ParsedDocument(doc_name="test", nodes=[
|
||||
ContentNode(content=text, tokens=len(text.split()), title="Content", index=1, level=1)
|
||||
])
|
||||
|
||||
backend.register_parser(TxtParser())
|
||||
# Now .txt should be supported (won't raise FileTypeError)
|
||||
assert backend._resolve_parser("test.txt") is not None
|
||||
55
tests/test_markdown_parser.py
Normal file
55
tests/test_markdown_parser.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
import pytest
|
||||
from pathlib import Path
|
||||
from pageindex.parser.markdown import MarkdownParser
|
||||
from pageindex.parser.protocol import ContentNode, ParsedDocument
|
||||
|
||||
@pytest.fixture
|
||||
def sample_md(tmp_path):
|
||||
md = tmp_path / "test.md"
|
||||
md.write_text("""# Chapter 1
|
||||
Some intro text.
|
||||
|
||||
## Section 1.1
|
||||
Details here.
|
||||
|
||||
## Section 1.2
|
||||
More details.
|
||||
|
||||
# Chapter 2
|
||||
Another chapter.
|
||||
""")
|
||||
return str(md)
|
||||
|
||||
def test_supported_extensions():
|
||||
parser = MarkdownParser()
|
||||
exts = parser.supported_extensions()
|
||||
assert ".md" in exts
|
||||
assert ".markdown" in exts
|
||||
|
||||
def test_parse_returns_parsed_document(sample_md):
|
||||
parser = MarkdownParser()
|
||||
result = parser.parse(sample_md)
|
||||
assert isinstance(result, ParsedDocument)
|
||||
assert result.doc_name == "test"
|
||||
|
||||
def test_parse_nodes_have_level(sample_md):
|
||||
parser = MarkdownParser()
|
||||
result = parser.parse(sample_md)
|
||||
assert len(result.nodes) == 4
|
||||
assert result.nodes[0].level == 1
|
||||
assert result.nodes[0].title == "Chapter 1"
|
||||
assert result.nodes[1].level == 2
|
||||
assert result.nodes[1].title == "Section 1.1"
|
||||
assert result.nodes[3].level == 1
|
||||
|
||||
def test_parse_nodes_have_content(sample_md):
|
||||
parser = MarkdownParser()
|
||||
result = parser.parse(sample_md)
|
||||
assert "Some intro text" in result.nodes[0].content
|
||||
assert "Details here" in result.nodes[1].content
|
||||
|
||||
def test_parse_nodes_have_index(sample_md):
|
||||
parser = MarkdownParser()
|
||||
result = parser.parse(sample_md)
|
||||
for node in result.nodes:
|
||||
assert node.index is not None
|
||||
29
tests/test_pdf_parser.py
Normal file
29
tests/test_pdf_parser.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import pytest
|
||||
from pathlib import Path
|
||||
from pageindex.parser.pdf import PdfParser
|
||||
from pageindex.parser.protocol import ContentNode, ParsedDocument
|
||||
|
||||
TEST_PDF = Path("tests/pdfs/deepseek-r1.pdf")
|
||||
|
||||
def test_supported_extensions():
|
||||
parser = PdfParser()
|
||||
assert ".pdf" in parser.supported_extensions()
|
||||
|
||||
@pytest.mark.skipif(not TEST_PDF.exists(), reason="Test PDF not available")
|
||||
def test_parse_returns_parsed_document():
|
||||
parser = PdfParser()
|
||||
result = parser.parse(str(TEST_PDF))
|
||||
assert isinstance(result, ParsedDocument)
|
||||
assert len(result.nodes) > 0
|
||||
assert result.doc_name != ""
|
||||
|
||||
@pytest.mark.skipif(not TEST_PDF.exists(), reason="Test PDF not available")
|
||||
def test_parse_nodes_are_flat_without_level():
|
||||
parser = PdfParser()
|
||||
result = parser.parse(str(TEST_PDF))
|
||||
for node in result.nodes:
|
||||
assert isinstance(node, ContentNode)
|
||||
assert node.content is not None
|
||||
assert node.tokens >= 0
|
||||
assert node.index is not None
|
||||
assert node.level is None
|
||||
95
tests/test_pipeline.py
Normal file
95
tests/test_pipeline.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# tests/sdk/test_pipeline.py
|
||||
import asyncio
|
||||
from unittest.mock import patch, AsyncMock
|
||||
|
||||
from pageindex.parser.protocol import ContentNode, ParsedDocument
|
||||
from pageindex.index.pipeline import (
|
||||
detect_strategy, build_tree_from_levels, build_index,
|
||||
_content_based_pipeline, _NullLogger,
|
||||
)
|
||||
|
||||
|
||||
def test_detect_strategy_with_level():
|
||||
nodes = [
|
||||
ContentNode(content="# Intro", tokens=10, title="Intro", index=1, level=1),
|
||||
ContentNode(content="## Details", tokens=10, title="Details", index=5, level=2),
|
||||
]
|
||||
assert detect_strategy(nodes) == "level_based"
|
||||
|
||||
|
||||
def test_detect_strategy_without_level():
|
||||
nodes = [
|
||||
ContentNode(content="Page 1 text", tokens=100, index=1),
|
||||
ContentNode(content="Page 2 text", tokens=100, index=2),
|
||||
]
|
||||
assert detect_strategy(nodes) == "content_based"
|
||||
|
||||
|
||||
def test_build_tree_from_levels():
|
||||
nodes = [
|
||||
ContentNode(content="ch1 text", tokens=10, title="Chapter 1", index=1, level=1),
|
||||
ContentNode(content="s1.1 text", tokens=10, title="Section 1.1", index=5, level=2),
|
||||
ContentNode(content="s1.2 text", tokens=10, title="Section 1.2", index=10, level=2),
|
||||
ContentNode(content="ch2 text", tokens=10, title="Chapter 2", index=20, level=1),
|
||||
]
|
||||
tree = build_tree_from_levels(nodes)
|
||||
assert len(tree) == 2 # 2 root nodes (chapters)
|
||||
assert tree[0]["title"] == "Chapter 1"
|
||||
assert len(tree[0]["nodes"]) == 2 # 2 sections under chapter 1
|
||||
assert tree[0]["nodes"][0]["title"] == "Section 1.1"
|
||||
assert tree[0]["nodes"][1]["title"] == "Section 1.2"
|
||||
assert tree[1]["title"] == "Chapter 2"
|
||||
assert len(tree[1]["nodes"]) == 0
|
||||
|
||||
|
||||
def test_build_tree_from_levels_single_level():
|
||||
nodes = [
|
||||
ContentNode(content="a", tokens=5, title="A", index=1, level=1),
|
||||
ContentNode(content="b", tokens=5, title="B", index=2, level=1),
|
||||
]
|
||||
tree = build_tree_from_levels(nodes)
|
||||
assert len(tree) == 2
|
||||
assert tree[0]["title"] == "A"
|
||||
assert tree[1]["title"] == "B"
|
||||
|
||||
|
||||
def test_build_tree_from_levels_deep_nesting():
|
||||
nodes = [
|
||||
ContentNode(content="h1", tokens=5, title="H1", index=1, level=1),
|
||||
ContentNode(content="h2", tokens=5, title="H2", index=2, level=2),
|
||||
ContentNode(content="h3", tokens=5, title="H3", index=3, level=3),
|
||||
]
|
||||
tree = build_tree_from_levels(nodes)
|
||||
assert len(tree) == 1
|
||||
assert tree[0]["title"] == "H1"
|
||||
assert len(tree[0]["nodes"]) == 1
|
||||
assert tree[0]["nodes"][0]["title"] == "H2"
|
||||
assert len(tree[0]["nodes"][0]["nodes"]) == 1
|
||||
assert tree[0]["nodes"][0]["nodes"][0]["title"] == "H3"
|
||||
|
||||
|
||||
def test_content_based_pipeline_does_not_raise():
|
||||
"""_content_based_pipeline should delegate to tree_parser, not raise NotImplementedError."""
|
||||
fake_tree = [{"title": "Intro", "start_index": 1, "end_index": 2, "nodes": []}]
|
||||
|
||||
async def fake_tree_parser(page_list, opt, doc=None, logger=None):
|
||||
return fake_tree
|
||||
|
||||
page_list = [("Page 1 text", 50), ("Page 2 text", 60)]
|
||||
|
||||
from types import SimpleNamespace
|
||||
opt = SimpleNamespace(model="test-model")
|
||||
|
||||
with patch("pageindex.index.page_index.tree_parser", new=fake_tree_parser):
|
||||
result = asyncio.run(_content_based_pipeline(page_list, opt))
|
||||
|
||||
assert result == fake_tree
|
||||
|
||||
|
||||
def test_null_logger_methods():
|
||||
"""NullLogger should have info/error/debug and not raise."""
|
||||
logger = _NullLogger()
|
||||
logger.info("test message")
|
||||
logger.error("test error")
|
||||
logger.debug("test debug")
|
||||
logger.info({"key": "value"})
|
||||
61
tests/test_sqlite_storage.py
Normal file
61
tests/test_sqlite_storage.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import pytest
|
||||
from pageindex.storage.sqlite import SQLiteStorage
|
||||
|
||||
@pytest.fixture
|
||||
def storage(tmp_path):
|
||||
return SQLiteStorage(str(tmp_path / "test.db"))
|
||||
|
||||
def test_create_and_list_collections(storage):
|
||||
storage.create_collection("papers")
|
||||
assert "papers" in storage.list_collections()
|
||||
|
||||
def test_get_or_create_collection_idempotent(storage):
|
||||
storage.get_or_create_collection("papers")
|
||||
storage.get_or_create_collection("papers")
|
||||
assert storage.list_collections().count("papers") == 1
|
||||
|
||||
def test_delete_collection(storage):
|
||||
storage.create_collection("papers")
|
||||
storage.delete_collection("papers")
|
||||
assert "papers" not in storage.list_collections()
|
||||
|
||||
def test_save_and_get_document(storage):
|
||||
storage.create_collection("papers")
|
||||
doc = {
|
||||
"doc_name": "test.pdf", "doc_description": "A test",
|
||||
"file_path": "/tmp/test.pdf", "doc_type": "pdf",
|
||||
"structure": [{"title": "Intro", "node_id": "0001"}],
|
||||
}
|
||||
storage.save_document("papers", "doc-1", doc)
|
||||
result = storage.get_document("papers", "doc-1")
|
||||
assert result["doc_name"] == "test.pdf"
|
||||
assert result["doc_type"] == "pdf"
|
||||
|
||||
def test_get_document_structure(storage):
|
||||
storage.create_collection("papers")
|
||||
structure = [{"title": "Ch1", "node_id": "0001", "nodes": []}]
|
||||
storage.save_document("papers", "doc-1", {
|
||||
"doc_name": "test.pdf", "doc_type": "pdf",
|
||||
"file_path": "/tmp/test.pdf", "structure": structure,
|
||||
})
|
||||
result = storage.get_document_structure("papers", "doc-1")
|
||||
assert result[0]["title"] == "Ch1"
|
||||
|
||||
def test_list_documents(storage):
|
||||
storage.create_collection("papers")
|
||||
storage.save_document("papers", "doc-1", {"doc_name": "p1.pdf", "doc_type": "pdf", "file_path": "/tmp/p1.pdf", "structure": []})
|
||||
storage.save_document("papers", "doc-2", {"doc_name": "p2.pdf", "doc_type": "pdf", "file_path": "/tmp/p2.pdf", "structure": []})
|
||||
docs = storage.list_documents("papers")
|
||||
assert len(docs) == 2
|
||||
|
||||
def test_delete_document(storage):
|
||||
storage.create_collection("papers")
|
||||
storage.save_document("papers", "doc-1", {"doc_name": "test.pdf", "doc_type": "pdf", "file_path": "/tmp/test.pdf", "structure": []})
|
||||
storage.delete_document("papers", "doc-1")
|
||||
assert len(storage.list_documents("papers")) == 0
|
||||
|
||||
def test_delete_collection_cascades_documents(storage):
|
||||
storage.create_collection("papers")
|
||||
storage.save_document("papers", "doc-1", {"doc_name": "test.pdf", "doc_type": "pdf", "file_path": "/tmp/test.pdf", "structure": []})
|
||||
storage.delete_collection("papers")
|
||||
assert "papers" not in storage.list_collections()
|
||||
19
tests/test_storage_protocol.py
Normal file
19
tests/test_storage_protocol.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from pageindex.storage.protocol import StorageEngine
|
||||
|
||||
def test_storage_engine_is_protocol():
|
||||
class FakeStorage:
|
||||
def create_collection(self, name: str) -> None: pass
|
||||
def get_or_create_collection(self, name: str) -> None: pass
|
||||
def list_collections(self) -> list[str]: return []
|
||||
def delete_collection(self, name: str) -> None: pass
|
||||
def save_document(self, collection: str, doc_id: str, doc: dict) -> None: pass
|
||||
def find_document_by_hash(self, collection: str, file_hash: str) -> str | None: return None
|
||||
def get_document(self, collection: str, doc_id: str) -> dict: return {}
|
||||
def get_document_structure(self, collection: str, doc_id: str) -> dict: return {}
|
||||
def get_pages(self, collection: str, doc_id: str) -> list | None: return None
|
||||
def list_documents(self, collection: str) -> list[dict]: return []
|
||||
def delete_document(self, collection: str, doc_id: str) -> None: pass
|
||||
def close(self) -> None: pass
|
||||
|
||||
storage = FakeStorage()
|
||||
assert isinstance(storage, StorageEngine)
|
||||
Loading…
Add table
Add a link
Reference in a new issue