diff --git a/pageindex/filesystem/core.py b/pageindex/filesystem/core.py index 5c963b2..c38e137 100644 --- a/pageindex/filesystem/core.py +++ b/pageindex/filesystem/core.py @@ -207,6 +207,7 @@ class PageIndexFileSystem: raise RuntimeError("pifs add requires the summary projection index") self._ensure_add_completion_defaults() + add_created_folder_paths = self._add_created_folder_paths(folder_path) file_ref = make_file_ref(virtual_path.strip("/")) uploads_dir = self.workspace / "artifacts" / "uploads" final_dir = uploads_dir / file_ref @@ -262,6 +263,7 @@ class PageIndexFileSystem: self._cleanup_add_catalog_record(file_ref) self._cleanup_add_summary_projection(records) self._cleanup_failed_register_artifacts(records) + self._cleanup_add_created_folders(add_created_folder_paths) if final_dir_created: shutil.rmtree(final_dir, ignore_errors=True) raise @@ -425,6 +427,21 @@ class PageIndexFileSystem: if "summary" not in self.semantic_retrieval_channels(): raise RuntimeError("pifs add failed to configure summary semantic retrieval") + def _add_created_folder_paths(self, folder_path: str) -> list[str]: + paths = self._folder_ancestor_paths(folder_path) + return [path for path in paths if not self.store.folder_exists(path)] + + @staticmethod + def _folder_ancestor_paths(folder_path: str) -> list[str]: + normalized = normalize_path(folder_path) + if normalized == "/": + return [] + segments = [segment for segment in normalized.strip("/").split("/") if segment] + paths: list[str] = [] + for index in range(1, len(segments) + 1): + paths.append("/" + "/".join(segments[:index])) + return paths + def configure_existing_projection_retrieval(self) -> bool: """Attach semantic retrieval to already-built projection indexes. @@ -1711,6 +1728,13 @@ class PageIndexFileSystem: except Exception: continue + def _cleanup_add_created_folders(self, folder_paths: list[str]) -> None: + for folder_path in reversed(folder_paths): + try: + self.store.delete_empty_folder(folder_path) + except Exception: + continue + @staticmethod def _metadata_policy_is_batch(policy: dict[str, Any]) -> bool: return bool(policy.get("batch")) or policy.get("mode") == "batch" diff --git a/pageindex/filesystem/store.py b/pageindex/filesystem/store.py index 347c181..10e1e7a 100644 --- a/pageindex/filesystem/store.py +++ b/pageindex/filesystem/store.py @@ -1063,6 +1063,48 @@ class SQLiteFileSystemStore: conn.execute("DELETE FROM metadata_values WHERE file_ref = ?", (file_ref,)) conn.execute("DELETE FROM files WHERE file_ref = ?", (file_ref,)) + def folder_exists(self, path: str) -> bool: + path = normalize_path(path) + with self.connect() as conn: + row = conn.execute( + "SELECT 1 FROM folders WHERE path = ?", + (path,), + ).fetchone() + return row is not None + + def delete_empty_folder(self, path: str) -> bool: + path = normalize_path(path) + if path == "/": + return False + with self.connect() as conn: + folder = self._folder_by_path(conn, path) + if folder is None: + return False + has_files = conn.execute( + """ + SELECT 1 + FROM file_folders + WHERE folder_id = ? + LIMIT 1 + """, + (folder["folder_id"],), + ).fetchone() + if has_files is not None: + return False + has_children = conn.execute( + """ + SELECT 1 + FROM folders + WHERE parent_id = ? + LIMIT 1 + """, + (folder["folder_id"],), + ).fetchone() + if has_children is not None: + return False + conn.execute("DELETE FROM folders WHERE folder_id = ?", (folder["folder_id"],)) + return True + def resolve_file_ref(self, target: str) -> str: with self.connect() as conn: return self._resolve_file_ref(conn, target) diff --git a/tests/test_pifs_add_command.py b/tests/test_pifs_add_command.py index ac73c96..f608c89 100644 --- a/tests/test_pifs_add_command.py +++ b/tests/test_pifs_add_command.py @@ -256,6 +256,50 @@ def test_add_failure_after_summary_vector_rolls_back_catalog_and_vector( assert not list((workspace / "artifacts" / "raw").glob("*.json")) +def test_add_failure_removes_nested_folders_created_only_for_add(tmp_path, monkeypatch): + source = tmp_path / "nested.txt" + source.write_text("nested rollback body", encoding="utf-8") + workspace = tmp_path / "workspace" + filesystem = make_filesystem(workspace) + + def fail_status_update(*args, **kwargs): + raise RuntimeError("metadata status update failed") + + monkeypatch.setattr(filesystem.store, "update_file_metadata_status", fail_status_update) + + with pytest.raises(RuntimeError, match="metadata status update failed"): + filesystem.add_file(source, "/documents/reports") + + listing = filesystem.browse("/", recursive=True) + assert listing["files"] == [] + assert listing["folders"] == [] + assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0 + assert not list((workspace / "artifacts" / "uploads").glob("**/*")) + assert not list((workspace / "artifacts" / "text").glob("*.txt")) + assert not list((workspace / "artifacts" / "raw").glob("*.json")) + + +def test_add_failure_preserves_preexisting_parent_folder(tmp_path, monkeypatch): + source = tmp_path / "nested.txt" + source.write_text("nested rollback body", encoding="utf-8") + workspace = tmp_path / "workspace" + filesystem = make_filesystem(workspace) + filesystem.create_folder("/documents") + + def fail_status_update(*args, **kwargs): + raise RuntimeError("metadata status update failed") + + monkeypatch.setattr(filesystem.store, "update_file_metadata_status", fail_status_update) + + with pytest.raises(RuntimeError, match="metadata status update failed"): + filesystem.add_file(source, "/documents/reports") + + listing = filesystem.browse("/", recursive=True) + assert listing["files"] == [] + assert [folder["path"] for folder in listing["folders"]] == ["/documents"] + assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0 + + def test_cli_add_uses_workspace_and_prints_added_file(monkeypatch, capsys, tmp_path): from pageindex.filesystem import cli