import json import tempfile from pathlib import Path import pytest def write_pageindex_client_doc(workspace: Path, doc_id: str, doc: dict) -> None: workspace.mkdir(parents=True, exist_ok=True) (workspace / f"{doc_id}.json").write_text( json.dumps(doc, ensure_ascii=False, indent=2), encoding="utf-8", ) meta = { doc_id: { "type": doc.get("type", ""), "doc_name": doc.get("doc_name", ""), "doc_description": doc.get("doc_description", ""), "path": doc.get("path", ""), } } if doc.get("type") == "pdf": meta[doc_id]["page_count"] = doc.get("page_count") elif doc.get("type") == "md": meta[doc_id]["line_count"] = doc.get("line_count") (workspace / "_meta.json").write_text( json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8", ) class RecordingMetadataGenerator: values = { "summary": "Generated retrieval summary.", "doc_type": "technical_note", "domain": "documentation", "topic": "pageindex extraction", } def __init__(self): self.calls = [] def generate(self, request, *, fields): self.calls.append((request, list(fields))) return {field: self.values[field] for field in fields if field in self.values} def test_pageindex_structure_options_report_failed_register_build(monkeypatch): from pageindex import PageIndexClient from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "report.md" source.write_text("# Report\n\nCached structure is not built yet.", encoding="utf-8") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") def fail_index(*args, **kwargs): raise RuntimeError("index failed: extractor unavailable") monkeypatch.setattr(PageIndexClient, "index", fail_index) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/report.md", external_id="dsid_structural_missing", title="Structural report", content=source.read_text(encoding="utf-8"), ) executor = PIFSCommandExecutor(filesystem, json_output=True) structure = json.loads(executor.execute("cat dsid_structural_missing --structure")) node = json.loads(executor.execute("cat dsid_structural_missing --node 0001")) pages = json.loads(executor.execute("cat dsid_structural_missing --page 1-2")) stat = json.loads(executor.execute("stat dsid_structural_missing")) assert structure["data"]["mode"] == "structure" assert structure["data"]["available"] is False assert structure["data"]["status"] == "failed" assert "RuntimeError: index failed: extractor unavailable" in structure["data"]["message"] assert stat["data"]["pageindex_tree_status"] == "failed" assert stat["data"]["metadata_status"]["pageindex_tree"] == { "status": "failed", "owner": "pageindex", "source": "PageIndexClient.index", "error_type": "RuntimeError", "message": "index failed: extractor unavailable", } assert node["data"]["mode"] == "node" assert node["data"]["available"] is False assert node["data"]["node_id"] == "0001" assert pages["data"]["mode"] == "page" assert pages["data"]["available"] is False assert pages["data"]["pages"] == "1-2" assert "cp" not in executor.allowed_commands() assert "mkdir" not in executor.allowed_commands() def test_register_pdf_markdown_uses_pageindex_extracted_text_for_metadata_and_fts(monkeypatch): from pageindex import PageIndexClient from pageindex.filesystem import PageIndexFileSystem def fake_index(self, file_path, mode="auto"): suffix = Path(file_path).suffix.lower() doc_id = f"doc_{suffix.lstrip('.')}" if suffix == ".pdf": doc = { "id": doc_id, "type": "pdf", "path": str(Path(file_path).resolve()), "doc_name": "report.pdf", "doc_description": "", "page_count": 2, "structure": [{"title": "Report", "node_id": "0001", "nodes": []}], "pages": [ {"page": 1, "content": "PageIndex PDF extracted alpha text."}, {"page": 2, "content": "Second PageIndex PDF extracted beta text."}, ], } else: doc = { "id": doc_id, "type": "md", "path": str(Path(file_path).resolve()), "doc_name": "notes", "doc_description": "", "line_count": 3, "structure": [ { "title": "Notes", "node_id": "0001", "line_num": 1, "text": "# Notes\n\nPageIndex Markdown extracted gamma text.", "nodes": [], } ], } write_pageindex_client_doc(self.workspace, doc_id, doc) self.documents[doc_id] = doc return doc_id monkeypatch.setattr(PageIndexClient, "index", fake_index) with tempfile.TemporaryDirectory() as tmp: source_pdf = Path(tmp) / "report.pdf" source_md = Path(tmp) / "notes.md" source_pdf.write_bytes(b"%PDF-1.4\n% test fixture\n") source_md.write_text("# Notes\n\nCaller markdown content", encoding="utf-8") generator = RecordingMetadataGenerator() filesystem = PageIndexFileSystem( workspace=Path(tmp) / "workspace", metadata_generator=generator, ) filesystem.register_file( storage_uri=source_pdf.as_uri(), source_path="docs/report.pdf", external_id="dsid_pdf_extracted", title="PDF extracted", content="CALLER PDF CONTENT MUST NOT REACH GENERATOR", ) filesystem.register_file( storage_uri=source_md.as_uri(), source_path="docs/notes.md", external_id="dsid_md_extracted", title="Markdown extracted", content="CALLER MD CONTENT MUST NOT REACH GENERATOR", ) pdf_request = generator.calls[0][0] md_request = generator.calls[1][0] pdf_stat = filesystem.store.file_info("dsid_pdf_extracted") md_stat = filesystem.store.file_info("dsid_md_extracted") assert "PageIndex PDF extracted alpha text" in pdf_request.text assert "Second PageIndex PDF extracted beta text" in pdf_request.text assert "CALLER PDF CONTENT" not in pdf_request.text assert "PageIndex Markdown extracted gamma text" in md_request.text assert "CALLER MD CONTENT" not in md_request.text assert "PageIndex PDF extracted alpha text" in Path( pdf_stat["text_artifact_path"] ).read_text(encoding="utf-8") assert "PageIndex Markdown extracted gamma text" in Path( md_stat["text_artifact_path"] ).read_text(encoding="utf-8") assert [r.external_id for r in filesystem.search("alpha beta", limit=5)] == [ "dsid_pdf_extracted" ] assert [r.external_id for r in filesystem.search("gamma", limit=5)] == [ "dsid_md_extracted" ] assert filesystem.search("CALLER", limit=5) == [] def test_register_text_metadata_generation_keeps_caller_content_without_pageindex(monkeypatch): from pageindex import PageIndexClient from pageindex.filesystem import PageIndexFileSystem def fail_index(*args, **kwargs): raise AssertionError("PageIndexClient.index should not be called for text files") monkeypatch.setattr(PageIndexClient, "index", fail_index) with tempfile.TemporaryDirectory() as tmp: generator = RecordingMetadataGenerator() filesystem = PageIndexFileSystem( workspace=Path(tmp) / "workspace", metadata_generator=generator, ) filesystem.register_file( storage_uri="file:///tmp/readme.txt", source_path="docs/readme.txt", external_id="dsid_text_generation", title="Text generation", content="Plain text caller content stays authoritative.", content_type="text/plain", ) stat = filesystem.store.file_info("dsid_text_generation") assert generator.calls[0][0].text == "Plain text caller content stays authoritative." assert stat["pageindex_doc_id"] is None assert stat["pageindex_tree_status"] == "not_built" assert Path(stat["text_artifact_path"]).read_text( encoding="utf-8" ) == "Plain text caller content stays authoritative." def test_register_pdf_markdown_cache_miss_invokes_pageindex_client_index(monkeypatch): from pageindex import PageIndexClient from pageindex.filesystem import PageIndexFileSystem calls: list[str] = [] def fake_index(self, file_path, mode="auto"): calls.append(str(file_path)) doc_id = f"doc_{Path(file_path).suffix.lstrip('.')}" doc_type = "pdf" if Path(file_path).suffix == ".pdf" else "md" doc = { "id": doc_id, "type": doc_type, "path": str(Path(file_path).resolve()), "doc_name": Path(file_path).name, "doc_description": "", "structure": [{"title": Path(file_path).stem, "node_id": "0001", "nodes": []}], } if doc_type == "pdf": doc["page_count"] = 1 doc["pages"] = [{"page": 1, "content": "Page one text"}] else: doc["line_count"] = 1 write_pageindex_client_doc(self.workspace, doc_id, doc) self.documents[doc_id] = doc return doc_id monkeypatch.setattr(PageIndexClient, "index", fake_index) with tempfile.TemporaryDirectory() as tmp: source_pdf = Path(tmp) / "report.pdf" source_md = Path(tmp) / "notes.md" source_pdf.write_bytes(b"%PDF-1.4\n% test fixture\n") source_md.write_text("# Notes", encoding="utf-8") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") filesystem.register_file( storage_uri=str(source_pdf), source_path="docs/report.pdf", external_id="dsid_pdf_build", title="PDF build", content="pdf text", ) filesystem.register_file( storage_uri=source_md.as_uri(), source_path="docs/notes.md", external_id="dsid_md_build", title="Markdown build", content=source_md.read_text(encoding="utf-8"), ) pdf_stat = filesystem.store.file_info("dsid_pdf_build") md_stat = filesystem.store.file_info("dsid_md_build") assert calls == [str(source_pdf.resolve()), str(source_md.resolve())] assert pdf_stat["pageindex_doc_id"] == "doc_pdf" assert pdf_stat["pageindex_tree_status"] == "built" assert md_stat["pageindex_doc_id"] == "doc_md" assert md_stat["pageindex_tree_status"] == "built" def test_cat_structure_page_reuses_pageindex_client_cache_without_indexing(monkeypatch): from pageindex import PageIndexClient from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "report.pdf" source.write_bytes(b"%PDF-1.4\n% test fixture\n") workspace = Path(tmp) / "workspace" filesystem = PageIndexFileSystem(workspace=workspace) write_pageindex_client_doc( filesystem.pageindex_client_workspace, "doc_cached_pdf", { "id": "doc_cached_pdf", "type": "pdf", "path": str(source.resolve()), "doc_name": "report.pdf", "doc_description": "", "page_count": 2, "structure": [ { "title": "Introduction", "node_id": "0001", "text": "Intro section text", "nodes": [ { "title": "Findings", "node_id": "0002", "physical_index": 2, "nodes": [], } ], } ], "pages": [ {"page": 1, "content": "Page one text"}, {"page": 2, "content": "Page two text"}, ], }, ) def fail_index(*args, **kwargs): raise AssertionError("PageIndexClient.index should not be called on cache hit") monkeypatch.setattr(PageIndexClient, "index", fail_index) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/report.pdf", external_id="dsid_structural_cached", title="Cached structural report", content="text artifact remains available for grep, not cat --all", ) executor = PIFSCommandExecutor(filesystem, json_output=True) structure = json.loads(executor.execute("cat dsid_structural_cached --structure")) pages = json.loads(executor.execute("cat dsid_structural_cached --page 1-2")) stat = json.loads(executor.execute("stat dsid_structural_cached")) assert structure["data"]["available"] is True assert structure["data"]["pageindex_doc_id"] == "doc_cached_pdf" assert structure["data"]["structure"][0]["title"] == "Introduction" assert structure["data"]["structure"][1]["title"] == "Findings" assert structure["data"]["structure_pagination"]["limit"] == 25 assert "text" not in structure["data"]["structure"][0] assert "text" not in structure["data"]["structure"][1] assert pages["data"]["available"] is True assert pages["data"]["text"] == "Page one text\n\nPage two text" with pytest.raises(PIFSCommandError, match="target-first"): executor.execute("cat --page 1-2 dsid_structural_cached") with pytest.raises(PIFSCommandError, match="one file target"): executor.execute("cat dsid_structural_cached --page 1 2") assert stat["data"]["pageindex_doc_id"] == "doc_cached_pdf" assert stat["data"]["pageindex_tree_status"] == "built" def test_cat_node_reads_pageindex_client_structure_without_custom_pifs_artifact(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "notes.md" source.write_text("# Notes\n\nBody", encoding="utf-8") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") write_pageindex_client_doc( filesystem.pageindex_client_workspace, "doc_cached_md", { "id": "doc_cached_md", "type": "md", "path": str(source.resolve()), "doc_name": "notes", "doc_description": "", "line_count": 3, "structure": [ { "title": "Notes", "node_id": "0001", "line_num": 1, "text": "# Notes\n\nBody", "nodes": [], } ], }, ) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/notes.md", external_id="dsid_md_cached", title="Cached markdown notes", content=source.read_text(encoding="utf-8"), ) executor = PIFSCommandExecutor(filesystem, json_output=True) node = json.loads(executor.execute("cat dsid_md_cached --node 0001")) assert node["data"]["available"] is True assert node["data"]["pageindex_doc_id"] == "doc_cached_md" assert node["data"]["node"]["title"] == "Notes" assert node["data"]["text"] == "# Notes\n\nBody" assert "text" not in node["data"]["node"] def test_cat_structure_page_node_and_text_outputs_are_hard_limited(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "report.pdf" source.write_bytes(b"%PDF-1.4\n% test fixture\n") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") structure_nodes = [ { "title": f"Section {index}", "node_id": f"{index:04d}", "start_index": index, "end_index": index, "text": f"node {index} text", "nodes": [], } for index in range(1, 31) ] write_pageindex_client_doc( filesystem.pageindex_client_workspace, "doc_limited_pdf", { "id": "doc_limited_pdf", "type": "pdf", "path": str(source.resolve()), "doc_name": "report.pdf", "doc_description": "", "page_count": 10, "structure": structure_nodes, "pages": [ {"page": index, "content": f"Page {index} text"} for index in range(1, 11) ], }, ) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/report.pdf", external_id="dsid_limited_pdf", title="Limited structural report", content="text artifact remains available for grep", ) text_content = "\n".join(f"line {index}" for index in range(1, 106)) filesystem.register_file( storage_uri="file:///tmp/long.txt", source_path="docs/long.txt", external_id="dsid_long_text", title="Long text", content=text_content, ) executor = PIFSCommandExecutor(filesystem, json_output=True) first_structure = json.loads(executor.execute("cat dsid_limited_pdf --structure")) assert len(first_structure["data"]["structure"]) == 25 assert first_structure["data"]["structure_pagination"]["has_more"] is True assert first_structure["data"]["structure_pagination"]["next_offset"] == 25 second_structure = json.loads( executor.execute("cat dsid_limited_pdf --structure --offset 25") ) assert len(second_structure["data"]["structure"]) == 5 assert second_structure["data"]["structure"][0]["node_id"] == "0026" pages = json.loads(executor.execute("cat dsid_limited_pdf --page 1-5")) assert pages["data"]["text"] == ( "Page 1 text\n\nPage 2 text\n\nPage 3 text\n\nPage 4 text\n\nPage 5 text" ) assert pages["data"]["page_pagination"]["limit"] == 5 with pytest.raises(PIFSCommandError, match="at most 5"): executor.execute("cat dsid_limited_pdf --page 1-6") with pytest.raises(PIFSCommandError, match="evidence is sufficient"): executor.execute("cat dsid_limited_pdf --page 1-6") nodes = json.loads( executor.execute( "cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 " "0006 0007 0008 0009 0010" ) ) assert nodes["data"]["node_ids"] == [ "0001", "0002", "0003", "0004", "0005", "0006", "0007", "0008", "0009", "0010", ] comma_nodes = json.loads( executor.execute("cat dsid_limited_pdf --node 0001,0002") ) assert comma_nodes["data"]["node_ids"] == ["0001", "0002"] with pytest.raises(PIFSCommandError, match="at most 10"): executor.execute( "cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 " "0006 0007 0008 0009 0010 0011" ) with pytest.raises(PIFSCommandError, match="continue with additional chunks"): executor.execute( "cat dsid_limited_pdf --node 0001 0002 0003 0004 0005 " "0006 0007 0008 0009 0010 0011" ) with pytest.raises(PIFSCommandError, match="quote the whole target"): executor.execute("cat dsid_limited_pdf 0001") text = json.loads(executor.execute("cat dsid_long_text --all")) assert "line 100" in text["data"]["text"] assert "line 101" not in text["data"]["text"] assert text["data"]["pagination"]["has_more"] is True assert text["data"]["pagination"]["next_range"] == "101-105" with pytest.raises(PIFSCommandError, match="at most 100"): executor.execute("cat dsid_long_text --range 1-101") def test_tree_folder_behavior_is_preserved(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem with tempfile.TemporaryDirectory() as tmp: filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") filesystem.register_file( storage_uri="file:///tmp/report.txt", source_path="docs/report.txt", folder_path="/docs/reports", external_id="dsid_folder_tree", title="Folder report", content="folder tree behavior remains intact", ) executor = PIFSCommandExecutor(filesystem, json_output=True) folder_tree = json.loads(executor.execute("tree /docs --depth 2")) assert folder_tree["data"]["path"] == "/docs" assert folder_tree["data"]["folders"][0]["path"] == "/docs/reports" def test_tree_does_not_read_file_internal_pageindex_structure(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "report.pdf" source.write_bytes(b"%PDF-1.4\n% test fixture\n") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") write_pageindex_client_doc( filesystem.pageindex_client_workspace, "doc_tree_is_folder_only", { "id": "doc_tree_is_folder_only", "type": "pdf", "path": str(source.resolve()), "doc_name": "report.pdf", "doc_description": "", "page_count": 1, "structure": [ {"title": "Introduction", "node_id": "0001", "nodes": []} ], "pages": [{"page": 1, "content": "Page one text"}], }, ) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/report.pdf", external_id="dsid_tree_is_folder_only", title="Cached structural report", content="text artifact remains available", ) executor = PIFSCommandExecutor(filesystem, json_output=True) with pytest.raises(PIFSCommandError): executor.execute("tree dsid_tree_is_folder_only") structure = json.loads(executor.execute("cat dsid_tree_is_folder_only --structure")) assert structure["data"]["structure"][0]["title"] == "Introduction" def test_cat_all_is_limited_to_text_files(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError with tempfile.TemporaryDirectory() as tmp: filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") filesystem.register_file( storage_uri="file:///tmp/readme.txt", source_path="docs/readme.txt", external_id="dsid_text_file", title="Text readme", content="plain text body", ) filesystem.register_file( storage_uri="file:///tmp/report.pdf", source_path="docs/report.pdf", external_id="dsid_pdf_file", title="PDF report", content="extracted text should not be served through cat --all", ) filesystem.register_file( storage_uri="file:///tmp/notes.md", source_path="docs/notes.md", external_id="dsid_md_file", title="Markdown notes", content="markdown text should use PageIndex structure reads", ) filesystem.register_file( storage_uri="file:///tmp/data.json", source_path="docs/data.json", external_id="dsid_json_file", title="JSON record", content='{"body":"json"}', content_type="application/json", ) executor = PIFSCommandExecutor(filesystem, json_output=True) text = json.loads(executor.execute("cat dsid_text_file --all")) assert text["data"]["text"] == "plain text body" with pytest.raises(PIFSCommandError, match="only supported for txt/text files"): executor.execute("cat dsid_pdf_file --all") with pytest.raises(ValueError, match="not supported for PDF/Markdown"): filesystem.open("dsid_pdf_file") with pytest.raises(PIFSCommandError, match="only supported for txt/text files"): executor.execute("cat dsid_md_file --all") with pytest.raises(ValueError, match="not supported for PDF/Markdown"): filesystem.open("dsid_md_file") with pytest.raises(PIFSCommandError, match="only supported for txt/text files"): executor.execute("cat dsid_json_file --all") opened_json = filesystem.open("dsid_json_file") assert opened_json.text == '{"body":"json"}' for command in ( "head dsid_pdf_file", "tail dsid_pdf_file", "sed -n 1,1p dsid_pdf_file", "head dsid_md_file", "tail dsid_md_file", "sed -n 1,1p dsid_md_file", ): with pytest.raises(PIFSCommandError, match="only supported for txt/text files"): executor.execute(command) def test_pageindex_structure_commands_are_limited_to_pdf_and_markdown(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError with tempfile.TemporaryDirectory() as tmp: filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") filesystem.register_file( storage_uri="file:///tmp/readme.txt", source_path="docs/readme.txt", external_id="dsid_text_only", title="Text readme", content="plain text body", ) executor = PIFSCommandExecutor(filesystem, json_output=True) for command in ( "cat dsid_text_only --structure", "cat dsid_text_only --page 1", "cat dsid_text_only --node 0001", ): with pytest.raises(PIFSCommandError, match="only supported for PDF/Markdown"): executor.execute(command) def test_existing_pageindex_status_allows_legacy_record_without_format_suffix(): from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem from pageindex.filesystem.commands import PIFSCommandError with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "uploaded" source.write_text("# Uploaded\n\nBody", encoding="utf-8") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") file_ref = filesystem.register_file( storage_uri=source.as_uri(), source_path="uploads/uploaded", external_id="dsid_legacy_pageindex", title="Legacy PageIndex record", content="text/plain is only a weak default here", ) write_pageindex_client_doc( filesystem.pageindex_client_workspace, "doc_legacy_pageindex", { "id": "doc_legacy_pageindex", "type": "md", "path": str(source.resolve()), "doc_name": "uploaded", "doc_description": "", "line_count": 3, "structure": [ {"title": "Uploaded", "node_id": "0001", "text": "Body", "nodes": []} ], }, ) filesystem.store.update_pageindex_pointer( file_ref, pageindex_doc_id="doc_legacy_pageindex", pageindex_tree_status="built", ) executor = PIFSCommandExecutor(filesystem, json_output=True) structure = json.loads(executor.execute("cat dsid_legacy_pageindex --structure")) assert structure["data"]["structure"][0]["title"] == "Uploaded" with pytest.raises(PIFSCommandError, match="only supported for txt/text files"): executor.execute("cat dsid_legacy_pageindex --all") def test_read_commands_do_not_link_pageindex_cache_when_pointer_is_missing(monkeypatch): from pageindex import PageIndexClient from pageindex.filesystem import PIFSCommandExecutor, PageIndexFileSystem with tempfile.TemporaryDirectory() as tmp: source = Path(tmp) / "late.md" source.write_text("# Late\n\nBody", encoding="utf-8") filesystem = PageIndexFileSystem(workspace=Path(tmp) / "workspace") def fail_index(*args, **kwargs): raise RuntimeError("index failed") monkeypatch.setattr(PageIndexClient, "index", fail_index) filesystem.register_file( storage_uri=source.as_uri(), source_path="docs/late.md", external_id="dsid_late_cache", title="Late cache", content=source.read_text(encoding="utf-8"), ) write_pageindex_client_doc( filesystem.pageindex_client_workspace, "doc_late_cache", { "id": "doc_late_cache", "type": "md", "path": str(source.resolve()), "doc_name": "late", "doc_description": "", "line_count": 3, "structure": [ {"title": "Late", "node_id": "0001", "text": "Body", "nodes": []} ], }, ) executor = PIFSCommandExecutor(filesystem, json_output=True) structure = json.loads(executor.execute("cat dsid_late_cache --structure")) stat = json.loads(executor.execute("stat dsid_late_cache")) assert structure["data"]["available"] is False assert stat["data"]["pageindex_doc_id"] is None assert stat["data"]["pageindex_tree_status"] == "failed"