mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-11 16:52:38 +02:00
feat: enhance folder indexing by adding root folder ID support and implement folder creation and cleanup logic
This commit is contained in:
parent
caf2525ab5
commit
c27d24a117
5 changed files with 236 additions and 1 deletions
|
|
@ -1280,6 +1280,7 @@ class FolderIndexFileRequest(PydanticBaseModel):
|
|||
folder_name: str
|
||||
search_space_id: int
|
||||
target_file_path: str
|
||||
root_folder_id: int | None = None
|
||||
enable_summary: bool = False
|
||||
|
||||
|
||||
|
|
@ -1394,6 +1395,7 @@ async def folder_index_file(
|
|||
folder_path=request.folder_path,
|
||||
folder_name=request.folder_name,
|
||||
target_file_path=request.target_file_path,
|
||||
root_folder_id=request.root_folder_id,
|
||||
enable_summary=request.enable_summary,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -312,6 +312,92 @@ async def _mirror_folder_structure(
|
|||
return mapping, root_folder_id
|
||||
|
||||
|
||||
async def _resolve_folder_for_file(
|
||||
session: AsyncSession,
|
||||
rel_path: str,
|
||||
root_folder_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
) -> int:
|
||||
"""Given a file's relative path, ensure all parent Folder rows exist and
|
||||
return the folder_id for the file's immediate parent directory.
|
||||
|
||||
For a file at "notes/daily/today.md", this ensures Folder rows exist for
|
||||
"notes" and "notes/daily", and returns the id of "notes/daily".
|
||||
For a file at "readme.md" (root level), returns root_folder_id.
|
||||
"""
|
||||
parent_dir = str(Path(rel_path).parent)
|
||||
if parent_dir == ".":
|
||||
return root_folder_id
|
||||
|
||||
parts = Path(parent_dir).parts
|
||||
current_parent_id = root_folder_id
|
||||
|
||||
for part in parts:
|
||||
existing = (
|
||||
await session.execute(
|
||||
select(Folder).where(
|
||||
Folder.name == part,
|
||||
Folder.parent_id == current_parent_id,
|
||||
Folder.search_space_id == search_space_id,
|
||||
)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
current_parent_id = existing.id
|
||||
else:
|
||||
new_folder = Folder(
|
||||
name=part,
|
||||
parent_id=current_parent_id,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=user_id,
|
||||
position="a0",
|
||||
)
|
||||
session.add(new_folder)
|
||||
await session.flush()
|
||||
current_parent_id = new_folder.id
|
||||
|
||||
return current_parent_id
|
||||
|
||||
|
||||
async def _cleanup_empty_folder_chain(
|
||||
session: AsyncSession,
|
||||
folder_id: int,
|
||||
root_folder_id: int,
|
||||
) -> None:
|
||||
"""Walk up from folder_id toward root, deleting empty folders (no docs, no
|
||||
children). Stops at root_folder_id which is never deleted."""
|
||||
current_id = folder_id
|
||||
while current_id and current_id != root_folder_id:
|
||||
has_doc = (
|
||||
await session.execute(
|
||||
select(Document.id).where(Document.folder_id == current_id).limit(1)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if has_doc is not None:
|
||||
break
|
||||
|
||||
has_child = (
|
||||
await session.execute(
|
||||
select(Folder.id).where(Folder.parent_id == current_id).limit(1)
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if has_child is not None:
|
||||
break
|
||||
|
||||
folder = (
|
||||
await session.execute(select(Folder).where(Folder.id == current_id))
|
||||
).scalar_one_or_none()
|
||||
if not folder:
|
||||
break
|
||||
|
||||
parent_id = folder.parent_id
|
||||
await session.delete(folder)
|
||||
await session.flush()
|
||||
current_id = parent_id
|
||||
|
||||
|
||||
async def _cleanup_empty_folders(
|
||||
session: AsyncSession,
|
||||
root_folder_id: int,
|
||||
|
|
@ -427,6 +513,7 @@ async def index_local_folder(
|
|||
folder_name=folder_name,
|
||||
target_file_path=target_file_path,
|
||||
enable_summary=enable_summary,
|
||||
root_folder_id=root_folder_id,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
)
|
||||
|
|
@ -802,6 +889,7 @@ async def _index_single_file(
|
|||
folder_name: str,
|
||||
target_file_path: str,
|
||||
enable_summary: bool,
|
||||
root_folder_id: int | None,
|
||||
task_logger,
|
||||
log_entry,
|
||||
) -> tuple[int, int, str | None]:
|
||||
|
|
@ -816,7 +904,13 @@ async def _index_single_file(
|
|||
)
|
||||
existing = await check_document_by_unique_identifier(session, uid_hash)
|
||||
if existing:
|
||||
deleted_folder_id = existing.folder_id
|
||||
await session.delete(existing)
|
||||
await session.flush()
|
||||
if deleted_folder_id and root_folder_id:
|
||||
await _cleanup_empty_folder_chain(
|
||||
session, deleted_folder_id, root_folder_id
|
||||
)
|
||||
await session.commit()
|
||||
return 0, 0, None
|
||||
return 0, 0, None
|
||||
|
|
@ -880,6 +974,12 @@ async def _index_single_file(
|
|||
"mtime": mtime,
|
||||
}
|
||||
|
||||
folder_id = None
|
||||
if root_folder_id:
|
||||
folder_id = await _resolve_folder_for_file(
|
||||
session, rel_path, root_folder_id, search_space_id, user_id
|
||||
)
|
||||
|
||||
if existing:
|
||||
existing.title = title
|
||||
existing.content = document_string
|
||||
|
|
@ -887,6 +987,7 @@ async def _index_single_file(
|
|||
existing.source_markdown = content
|
||||
existing.embedding = embedding
|
||||
existing.document_metadata = doc_metadata
|
||||
existing.folder_id = folder_id
|
||||
await safe_set_chunks(session, existing, chunks)
|
||||
existing.updated_at = get_current_timestamp()
|
||||
existing.status = DocumentStatus.ready()
|
||||
|
|
@ -905,6 +1006,7 @@ async def _index_single_file(
|
|||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=None,
|
||||
folder_id=folder_id,
|
||||
)
|
||||
session.add(document)
|
||||
await session.flush()
|
||||
|
|
|
|||
|
|
@ -567,6 +567,136 @@ class TestFolderMirroring:
|
|||
).scalar_one_or_none()
|
||||
assert daily_after is not None
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_self_hosted",
|
||||
"patched_embed_for_indexer",
|
||||
"patched_chunks_for_indexer",
|
||||
"patched_summary_for_indexer",
|
||||
)
|
||||
async def test_f6_single_file_creates_subfolder(
|
||||
self,
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_search_space: SearchSpace,
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""F6: Single-file mode creates missing Folder rows and assigns correct folder_id."""
|
||||
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||
|
||||
(tmp_path / "root.md").write_text("root")
|
||||
|
||||
_, _, root_folder_id, _ = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
folder_path=str(tmp_path),
|
||||
folder_name="test-folder",
|
||||
)
|
||||
|
||||
sub = tmp_path / "notes" / "daily"
|
||||
sub.mkdir(parents=True)
|
||||
(sub / "new.md").write_text("new note in subfolder")
|
||||
|
||||
count, _, _, _ = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
folder_path=str(tmp_path),
|
||||
folder_name="test-folder",
|
||||
target_file_path=str(sub / "new.md"),
|
||||
root_folder_id=root_folder_id,
|
||||
)
|
||||
assert count == 1
|
||||
|
||||
doc = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.title == "new.md",
|
||||
)
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
daily_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "daily")
|
||||
)
|
||||
).scalar_one()
|
||||
|
||||
assert doc.folder_id == daily_folder.id
|
||||
assert daily_folder.parent_id is not None
|
||||
|
||||
notes_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "notes")
|
||||
)
|
||||
).scalar_one()
|
||||
assert daily_folder.parent_id == notes_folder.id
|
||||
assert notes_folder.parent_id == root_folder_id
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_self_hosted",
|
||||
"patched_embed_for_indexer",
|
||||
"patched_chunks_for_indexer",
|
||||
"patched_summary_for_indexer",
|
||||
)
|
||||
async def test_f7_single_file_delete_cleans_empty_folders(
|
||||
self,
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_search_space: SearchSpace,
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""F7: Deleting the only file in a subfolder via single-file mode removes empty Folder rows."""
|
||||
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||
|
||||
sub = tmp_path / "notes" / "ephemeral"
|
||||
sub.mkdir(parents=True)
|
||||
(sub / "temp.md").write_text("temporary")
|
||||
(tmp_path / "keep.md").write_text("keep this")
|
||||
|
||||
_, _, root_folder_id, _ = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
folder_path=str(tmp_path),
|
||||
folder_name="test-folder",
|
||||
)
|
||||
|
||||
eph_folder = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "ephemeral")
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
assert eph_folder is not None
|
||||
|
||||
target = sub / "temp.md"
|
||||
target.unlink()
|
||||
|
||||
await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
folder_path=str(tmp_path),
|
||||
folder_name="test-folder",
|
||||
target_file_path=str(target),
|
||||
root_folder_id=root_folder_id,
|
||||
)
|
||||
|
||||
eph_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "ephemeral")
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
assert eph_after is None
|
||||
|
||||
notes_after = (
|
||||
await db_session.execute(
|
||||
select(Folder).where(Folder.name == "notes")
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
assert notes_after is None
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Tier 5: Pipeline Integration (P1)
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ export function useFolderSync() {
|
|||
folder_name: event.folderName,
|
||||
search_space_id: event.searchSpaceId,
|
||||
target_file_path: event.fullPath,
|
||||
root_folder_id: event.rootFolderId,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("[FolderSync] Failed to trigger re-index:", err);
|
||||
|
|
|
|||
|
|
@ -399,7 +399,7 @@ class DocumentsApiService {
|
|||
return baseApiService.post(`/api/v1/documents/folder-index`, undefined, { body });
|
||||
};
|
||||
|
||||
folderIndexFile = async (searchSpaceId: number, body: { folder_path: string; folder_name: string; search_space_id: number; target_file_path: string; enable_summary?: boolean }) => {
|
||||
folderIndexFile = async (searchSpaceId: number, body: { folder_path: string; folder_name: string; search_space_id: number; target_file_path: string; root_folder_id?: number | null; enable_summary?: boolean }) => {
|
||||
return baseApiService.post(`/api/v1/documents/folder-index-file`, undefined, { body });
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue