mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: enhance folder synchronization by integrating subtree ID retrieval and optimizing empty folder cleanup process
This commit is contained in:
parent
ae98f64760
commit
cab0d1bdfe
2 changed files with 29 additions and 47 deletions
|
|
@ -1675,6 +1675,7 @@ async def folder_sync_finalize(
|
|||
gets deleted.
|
||||
"""
|
||||
from app.indexing_pipeline.document_hashing import compute_identifier_hash
|
||||
from app.services.folder_service import get_folder_subtree_ids
|
||||
from app.tasks.connector_indexers.local_folder_indexer import (
|
||||
_cleanup_empty_folders,
|
||||
)
|
||||
|
|
@ -1687,6 +1688,11 @@ async def folder_sync_finalize(
|
|||
"You don't have permission to delete documents in this search space",
|
||||
)
|
||||
|
||||
if not request.root_folder_id:
|
||||
return {"deleted_count": 0}
|
||||
|
||||
subtree_ids = await get_folder_subtree_ids(session, request.root_folder_id)
|
||||
|
||||
seen_hashes: set[str] = set()
|
||||
for rel_path in request.all_relative_paths:
|
||||
unique_id = f"{request.folder_name}:{rel_path}"
|
||||
|
|
@ -1697,32 +1703,13 @@ async def folder_sync_finalize(
|
|||
)
|
||||
seen_hashes.add(uid_hash)
|
||||
|
||||
all_root_folder_ids: set[int] = set()
|
||||
if request.root_folder_id:
|
||||
all_root_folder_ids.add(request.root_folder_id)
|
||||
|
||||
all_db_folders = (
|
||||
(
|
||||
await session.execute(
|
||||
select(Folder.id).where(
|
||||
Folder.search_space_id == request.search_space_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
all_root_folder_ids.update(all_db_folders)
|
||||
|
||||
all_folder_docs = (
|
||||
(
|
||||
await session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == request.search_space_id,
|
||||
Document.folder_id.in_(list(all_root_folder_ids))
|
||||
if all_root_folder_ids
|
||||
else True,
|
||||
Document.folder_id.in_(subtree_ids),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
|
@ -1738,24 +1725,22 @@ async def folder_sync_finalize(
|
|||
|
||||
await session.flush()
|
||||
|
||||
if request.root_folder_id:
|
||||
existing_dirs: set[str] = set()
|
||||
for rel_path in request.all_relative_paths:
|
||||
parent = str(os.path.dirname(rel_path))
|
||||
if parent and parent != ".":
|
||||
existing_dirs.add(parent)
|
||||
existing_dirs: set[str] = set()
|
||||
for rel_path in request.all_relative_paths:
|
||||
parent = str(os.path.dirname(rel_path))
|
||||
if parent and parent != ".":
|
||||
existing_dirs.add(parent)
|
||||
|
||||
folder_mapping: dict[str, int] = {}
|
||||
if request.root_folder_id:
|
||||
folder_mapping[""] = request.root_folder_id
|
||||
folder_mapping: dict[str, int] = {"": request.root_folder_id}
|
||||
|
||||
await _cleanup_empty_folders(
|
||||
session,
|
||||
request.root_folder_id,
|
||||
request.search_space_id,
|
||||
existing_dirs,
|
||||
folder_mapping,
|
||||
)
|
||||
await _cleanup_empty_folders(
|
||||
session,
|
||||
request.root_folder_id,
|
||||
request.search_space_id,
|
||||
existing_dirs,
|
||||
folder_mapping,
|
||||
subtree_ids=subtree_ids,
|
||||
)
|
||||
|
||||
await session.commit()
|
||||
return {"deleted_count": deleted_count}
|
||||
|
|
|
|||
|
|
@ -387,24 +387,21 @@ async def _cleanup_empty_folders(
|
|||
search_space_id: int,
|
||||
existing_dirs_on_disk: set[str],
|
||||
folder_mapping: dict[str, int],
|
||||
subtree_ids: list[int] | None = None,
|
||||
) -> None:
|
||||
"""Delete Folder rows that are empty (no docs, no children) and no longer on disk."""
|
||||
from sqlalchemy import delete as sa_delete
|
||||
|
||||
id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel}
|
||||
|
||||
all_folders = (
|
||||
(
|
||||
await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.id != root_folder_id,
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
query = select(Folder).where(
|
||||
Folder.search_space_id == search_space_id,
|
||||
Folder.id != root_folder_id,
|
||||
)
|
||||
if subtree_ids is not None:
|
||||
query = query.where(Folder.id.in_(subtree_ids))
|
||||
|
||||
all_folders = (await session.execute(query)).scalars().all()
|
||||
|
||||
candidates: list[Folder] = []
|
||||
for folder in all_folders:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue