From 2d55fd2f5a59149865cfc6fb14d202e797840eee Mon Sep 17 00:00:00 2001 From: BukeLy Date: Sun, 31 May 2026 21:11:26 +0800 Subject: [PATCH] fix(pifs): clean partial add pageindex cache --- pageindex/filesystem/core.py | 3 ++- tests/test_pifs_add_command.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/pageindex/filesystem/core.py b/pageindex/filesystem/core.py index ce2c336..d8f6310 100644 --- a/pageindex/filesystem/core.py +++ b/pageindex/filesystem/core.py @@ -1756,11 +1756,12 @@ class PageIndexFileSystem: records: list[dict[str, Any]], preexisting_doc_ids: set[str], ) -> None: - doc_ids: list[str] = [] + doc_ids = sorted(self._pageindex_cache_doc_ids() - preexisting_doc_ids) for record in records: doc_id = str(record.get("pageindex_doc_id") or "").strip() if doc_id and doc_id not in preexisting_doc_ids: doc_ids.append(doc_id) + doc_ids = sorted(set(doc_ids)) if not doc_ids: return workspace = self.pageindex_client_workspace diff --git a/tests/test_pifs_add_command.py b/tests/test_pifs_add_command.py index b221ae0..d2b8f9c 100644 --- a/tests/test_pifs_add_command.py +++ b/tests/test_pifs_add_command.py @@ -288,6 +288,48 @@ def test_add_markdown_insert_failure_removes_pageindex_cache(tmp_path, monkeypat assert not list((workspace / "artifacts" / "raw").glob("*.json")) +def test_add_markdown_index_failure_removes_pageindex_cache_delta(tmp_path, monkeypatch): + from pageindex import PageIndexClient + + def fake_index(self, file_path, mode="auto"): + doc_id = "doc_partial_before_raise" + doc = { + "id": doc_id, + "type": "md", + "path": str(Path(file_path).resolve()), + "doc_name": "partial.md", + "doc_description": "", + "line_count": 3, + "structure": [{"title": "Partial", "node_id": "0001", "nodes": []}], + } + self.documents[doc_id] = doc + self._save_doc(doc_id) + raise RuntimeError("index failed after cache write") + + monkeypatch.setattr(PageIndexClient, "index", fake_index) + source = tmp_path / "partial.md" + source.write_text("# Partial\n\nbody", encoding="utf-8") + workspace = tmp_path / "workspace" + filesystem = make_filesystem(workspace) + pageindex_workspace = workspace / "artifacts" / "pageindex_client" + + with pytest.raises(RuntimeError, match="failed to build PageIndex tree"): + filesystem.add_file(source, "/documents/reports") + + assert not (pageindex_workspace / "doc_partial_before_raise.json").exists() + meta_path = pageindex_workspace / "_meta.json" + if meta_path.exists(): + meta = json.loads(meta_path.read_text(encoding="utf-8")) + assert "doc_partial_before_raise" not in meta + listing = filesystem.browse("/", recursive=True) + assert listing["files"] == [] + assert listing["folders"] == [] + assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0 + assert not list((workspace / "artifacts" / "uploads").glob("**/*")) + assert not list((workspace / "artifacts" / "text").glob("*.txt")) + assert not list((workspace / "artifacts" / "raw").glob("*.json")) + + def test_add_markdown_failure_preserves_unrelated_pageindex_cache(tmp_path, monkeypatch): from pageindex import PageIndexClient