fix(pifs): roll back add-created folders

This commit is contained in:
BukeLy 2026-05-31 17:43:18 +08:00
parent 1c5ed63ef8
commit 8cdddb5e5b
3 changed files with 110 additions and 0 deletions

View file

@ -207,6 +207,7 @@ class PageIndexFileSystem:
raise RuntimeError("pifs add requires the summary projection index")
self._ensure_add_completion_defaults()
add_created_folder_paths = self._add_created_folder_paths(folder_path)
file_ref = make_file_ref(virtual_path.strip("/"))
uploads_dir = self.workspace / "artifacts" / "uploads"
final_dir = uploads_dir / file_ref
@ -262,6 +263,7 @@ class PageIndexFileSystem:
self._cleanup_add_catalog_record(file_ref)
self._cleanup_add_summary_projection(records)
self._cleanup_failed_register_artifacts(records)
self._cleanup_add_created_folders(add_created_folder_paths)
if final_dir_created:
shutil.rmtree(final_dir, ignore_errors=True)
raise
@ -425,6 +427,21 @@ class PageIndexFileSystem:
if "summary" not in self.semantic_retrieval_channels():
raise RuntimeError("pifs add failed to configure summary semantic retrieval")
def _add_created_folder_paths(self, folder_path: str) -> list[str]:
paths = self._folder_ancestor_paths(folder_path)
return [path for path in paths if not self.store.folder_exists(path)]
@staticmethod
def _folder_ancestor_paths(folder_path: str) -> list[str]:
normalized = normalize_path(folder_path)
if normalized == "/":
return []
segments = [segment for segment in normalized.strip("/").split("/") if segment]
paths: list[str] = []
for index in range(1, len(segments) + 1):
paths.append("/" + "/".join(segments[:index]))
return paths
def configure_existing_projection_retrieval(self) -> bool:
"""Attach semantic retrieval to already-built projection indexes.
@ -1711,6 +1728,13 @@ class PageIndexFileSystem:
except Exception:
continue
def _cleanup_add_created_folders(self, folder_paths: list[str]) -> None:
for folder_path in reversed(folder_paths):
try:
self.store.delete_empty_folder(folder_path)
except Exception:
continue
@staticmethod
def _metadata_policy_is_batch(policy: dict[str, Any]) -> bool:
return bool(policy.get("batch")) or policy.get("mode") == "batch"

View file

@ -1063,6 +1063,48 @@ class SQLiteFileSystemStore:
conn.execute("DELETE FROM metadata_values WHERE file_ref = ?", (file_ref,))
conn.execute("DELETE FROM files WHERE file_ref = ?", (file_ref,))
def folder_exists(self, path: str) -> bool:
path = normalize_path(path)
with self.connect() as conn:
row = conn.execute(
"SELECT 1 FROM folders WHERE path = ?",
(path,),
).fetchone()
return row is not None
def delete_empty_folder(self, path: str) -> bool:
path = normalize_path(path)
if path == "/":
return False
with self.connect() as conn:
folder = self._folder_by_path(conn, path)
if folder is None:
return False
has_files = conn.execute(
"""
SELECT 1
FROM file_folders
WHERE folder_id = ?
LIMIT 1
""",
(folder["folder_id"],),
).fetchone()
if has_files is not None:
return False
has_children = conn.execute(
"""
SELECT 1
FROM folders
WHERE parent_id = ?
LIMIT 1
""",
(folder["folder_id"],),
).fetchone()
if has_children is not None:
return False
conn.execute("DELETE FROM folders WHERE folder_id = ?", (folder["folder_id"],))
return True
def resolve_file_ref(self, target: str) -> str:
with self.connect() as conn:
return self._resolve_file_ref(conn, target)

View file

@ -256,6 +256,50 @@ def test_add_failure_after_summary_vector_rolls_back_catalog_and_vector(
assert not list((workspace / "artifacts" / "raw").glob("*.json"))
def test_add_failure_removes_nested_folders_created_only_for_add(tmp_path, monkeypatch):
source = tmp_path / "nested.txt"
source.write_text("nested rollback body", encoding="utf-8")
workspace = tmp_path / "workspace"
filesystem = make_filesystem(workspace)
def fail_status_update(*args, **kwargs):
raise RuntimeError("metadata status update failed")
monkeypatch.setattr(filesystem.store, "update_file_metadata_status", fail_status_update)
with pytest.raises(RuntimeError, match="metadata status update failed"):
filesystem.add_file(source, "/documents/reports")
listing = filesystem.browse("/", recursive=True)
assert listing["files"] == []
assert listing["folders"] == []
assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0
assert not list((workspace / "artifacts" / "uploads").glob("**/*"))
assert not list((workspace / "artifacts" / "text").glob("*.txt"))
assert not list((workspace / "artifacts" / "raw").glob("*.json"))
def test_add_failure_preserves_preexisting_parent_folder(tmp_path, monkeypatch):
source = tmp_path / "nested.txt"
source.write_text("nested rollback body", encoding="utf-8")
workspace = tmp_path / "workspace"
filesystem = make_filesystem(workspace)
filesystem.create_folder("/documents")
def fail_status_update(*args, **kwargs):
raise RuntimeError("metadata status update failed")
monkeypatch.setattr(filesystem.store, "update_file_metadata_status", fail_status_update)
with pytest.raises(RuntimeError, match="metadata status update failed"):
filesystem.add_file(source, "/documents/reports")
listing = filesystem.browse("/", recursive=True)
assert listing["files"] == []
assert [folder["path"] for folder in listing["folders"]] == ["/documents"]
assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0
def test_cli_add_uses_workspace_and_prints_added_file(monkeypatch, capsys, tmp_path):
from pageindex.filesystem import cli