mirror of
https://github.com/VectifyAI/PageIndex.git
synced 2026-06-12 19:55:17 +02:00
fix(pifs): roll back add-created folders
This commit is contained in:
parent
1c5ed63ef8
commit
8cdddb5e5b
3 changed files with 110 additions and 0 deletions
|
|
@ -207,6 +207,7 @@ class PageIndexFileSystem:
|
|||
raise RuntimeError("pifs add requires the summary projection index")
|
||||
|
||||
self._ensure_add_completion_defaults()
|
||||
add_created_folder_paths = self._add_created_folder_paths(folder_path)
|
||||
file_ref = make_file_ref(virtual_path.strip("/"))
|
||||
uploads_dir = self.workspace / "artifacts" / "uploads"
|
||||
final_dir = uploads_dir / file_ref
|
||||
|
|
@ -262,6 +263,7 @@ class PageIndexFileSystem:
|
|||
self._cleanup_add_catalog_record(file_ref)
|
||||
self._cleanup_add_summary_projection(records)
|
||||
self._cleanup_failed_register_artifacts(records)
|
||||
self._cleanup_add_created_folders(add_created_folder_paths)
|
||||
if final_dir_created:
|
||||
shutil.rmtree(final_dir, ignore_errors=True)
|
||||
raise
|
||||
|
|
@ -425,6 +427,21 @@ class PageIndexFileSystem:
|
|||
if "summary" not in self.semantic_retrieval_channels():
|
||||
raise RuntimeError("pifs add failed to configure summary semantic retrieval")
|
||||
|
||||
def _add_created_folder_paths(self, folder_path: str) -> list[str]:
|
||||
paths = self._folder_ancestor_paths(folder_path)
|
||||
return [path for path in paths if not self.store.folder_exists(path)]
|
||||
|
||||
@staticmethod
|
||||
def _folder_ancestor_paths(folder_path: str) -> list[str]:
|
||||
normalized = normalize_path(folder_path)
|
||||
if normalized == "/":
|
||||
return []
|
||||
segments = [segment for segment in normalized.strip("/").split("/") if segment]
|
||||
paths: list[str] = []
|
||||
for index in range(1, len(segments) + 1):
|
||||
paths.append("/" + "/".join(segments[:index]))
|
||||
return paths
|
||||
|
||||
def configure_existing_projection_retrieval(self) -> bool:
|
||||
"""Attach semantic retrieval to already-built projection indexes.
|
||||
|
||||
|
|
@ -1711,6 +1728,13 @@ class PageIndexFileSystem:
|
|||
except Exception:
|
||||
continue
|
||||
|
||||
def _cleanup_add_created_folders(self, folder_paths: list[str]) -> None:
|
||||
for folder_path in reversed(folder_paths):
|
||||
try:
|
||||
self.store.delete_empty_folder(folder_path)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
@staticmethod
|
||||
def _metadata_policy_is_batch(policy: dict[str, Any]) -> bool:
|
||||
return bool(policy.get("batch")) or policy.get("mode") == "batch"
|
||||
|
|
|
|||
|
|
@ -1063,6 +1063,48 @@ class SQLiteFileSystemStore:
|
|||
conn.execute("DELETE FROM metadata_values WHERE file_ref = ?", (file_ref,))
|
||||
conn.execute("DELETE FROM files WHERE file_ref = ?", (file_ref,))
|
||||
|
||||
def folder_exists(self, path: str) -> bool:
|
||||
path = normalize_path(path)
|
||||
with self.connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT 1 FROM folders WHERE path = ?",
|
||||
(path,),
|
||||
).fetchone()
|
||||
return row is not None
|
||||
|
||||
def delete_empty_folder(self, path: str) -> bool:
|
||||
path = normalize_path(path)
|
||||
if path == "/":
|
||||
return False
|
||||
with self.connect() as conn:
|
||||
folder = self._folder_by_path(conn, path)
|
||||
if folder is None:
|
||||
return False
|
||||
has_files = conn.execute(
|
||||
"""
|
||||
SELECT 1
|
||||
FROM file_folders
|
||||
WHERE folder_id = ?
|
||||
LIMIT 1
|
||||
""",
|
||||
(folder["folder_id"],),
|
||||
).fetchone()
|
||||
if has_files is not None:
|
||||
return False
|
||||
has_children = conn.execute(
|
||||
"""
|
||||
SELECT 1
|
||||
FROM folders
|
||||
WHERE parent_id = ?
|
||||
LIMIT 1
|
||||
""",
|
||||
(folder["folder_id"],),
|
||||
).fetchone()
|
||||
if has_children is not None:
|
||||
return False
|
||||
conn.execute("DELETE FROM folders WHERE folder_id = ?", (folder["folder_id"],))
|
||||
return True
|
||||
|
||||
def resolve_file_ref(self, target: str) -> str:
|
||||
with self.connect() as conn:
|
||||
return self._resolve_file_ref(conn, target)
|
||||
|
|
|
|||
|
|
@ -256,6 +256,50 @@ def test_add_failure_after_summary_vector_rolls_back_catalog_and_vector(
|
|||
assert not list((workspace / "artifacts" / "raw").glob("*.json"))
|
||||
|
||||
|
||||
def test_add_failure_removes_nested_folders_created_only_for_add(tmp_path, monkeypatch):
|
||||
source = tmp_path / "nested.txt"
|
||||
source.write_text("nested rollback body", encoding="utf-8")
|
||||
workspace = tmp_path / "workspace"
|
||||
filesystem = make_filesystem(workspace)
|
||||
|
||||
def fail_status_update(*args, **kwargs):
|
||||
raise RuntimeError("metadata status update failed")
|
||||
|
||||
monkeypatch.setattr(filesystem.store, "update_file_metadata_status", fail_status_update)
|
||||
|
||||
with pytest.raises(RuntimeError, match="metadata status update failed"):
|
||||
filesystem.add_file(source, "/documents/reports")
|
||||
|
||||
listing = filesystem.browse("/", recursive=True)
|
||||
assert listing["files"] == []
|
||||
assert listing["folders"] == []
|
||||
assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0
|
||||
assert not list((workspace / "artifacts" / "uploads").glob("**/*"))
|
||||
assert not list((workspace / "artifacts" / "text").glob("*.txt"))
|
||||
assert not list((workspace / "artifacts" / "raw").glob("*.json"))
|
||||
|
||||
|
||||
def test_add_failure_preserves_preexisting_parent_folder(tmp_path, monkeypatch):
|
||||
source = tmp_path / "nested.txt"
|
||||
source.write_text("nested rollback body", encoding="utf-8")
|
||||
workspace = tmp_path / "workspace"
|
||||
filesystem = make_filesystem(workspace)
|
||||
filesystem.create_folder("/documents")
|
||||
|
||||
def fail_status_update(*args, **kwargs):
|
||||
raise RuntimeError("metadata status update failed")
|
||||
|
||||
monkeypatch.setattr(filesystem.store, "update_file_metadata_status", fail_status_update)
|
||||
|
||||
with pytest.raises(RuntimeError, match="metadata status update failed"):
|
||||
filesystem.add_file(source, "/documents/reports")
|
||||
|
||||
listing = filesystem.browse("/", recursive=True)
|
||||
assert listing["files"] == []
|
||||
assert [folder["path"] for folder in listing["folders"]] == ["/documents"]
|
||||
assert filesystem.summary_projection_indexer.index.info()["document_count"] == 0
|
||||
|
||||
|
||||
def test_cli_add_uses_workspace_and_prints_added_file(monkeypatch, capsys, tmp_path):
|
||||
from pageindex.filesystem import cli
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue