mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: refactor folder indexing to support batch processing of multiple files, enhancing performance and error handling
This commit is contained in:
parent
e2ba509314
commit
1fa8e1cc83
6 changed files with 293 additions and 80 deletions
|
|
@ -1275,7 +1275,7 @@ def index_local_folder_task(
|
|||
file_extensions: list[str] | None = None,
|
||||
root_folder_id: int | None = None,
|
||||
enable_summary: bool = False,
|
||||
target_file_path: str | None = None,
|
||||
target_file_paths: list[str] | None = None,
|
||||
):
|
||||
"""Celery task to index a local folder. Config is passed directly — no connector row."""
|
||||
loop = asyncio.new_event_loop()
|
||||
|
|
@ -1292,7 +1292,7 @@ def index_local_folder_task(
|
|||
file_extensions=file_extensions,
|
||||
root_folder_id=root_folder_id,
|
||||
enable_summary=enable_summary,
|
||||
target_file_path=target_file_path,
|
||||
target_file_paths=target_file_paths,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
|
|
@ -1308,19 +1308,103 @@ async def _index_local_folder_async(
|
|||
file_extensions: list[str] | None = None,
|
||||
root_folder_id: int | None = None,
|
||||
enable_summary: bool = False,
|
||||
target_file_path: str | None = None,
|
||||
target_file_paths: list[str] | None = None,
|
||||
):
|
||||
"""Run local folder indexing with a fresh DB session."""
|
||||
"""Run local folder indexing with notification + heartbeat."""
|
||||
is_batch = bool(target_file_paths)
|
||||
is_full_scan = not target_file_paths
|
||||
file_count = len(target_file_paths) if target_file_paths else None
|
||||
|
||||
if is_batch:
|
||||
doc_name = f"{folder_name} ({file_count} file{'s' if file_count != 1 else ''})"
|
||||
else:
|
||||
doc_name = folder_name
|
||||
|
||||
notification = None
|
||||
heartbeat_task = None
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await index_local_folder(
|
||||
session=session,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
folder_path=folder_path,
|
||||
folder_name=folder_name,
|
||||
exclude_patterns=exclude_patterns,
|
||||
file_extensions=file_extensions,
|
||||
root_folder_id=root_folder_id,
|
||||
enable_summary=enable_summary,
|
||||
target_file_path=target_file_path,
|
||||
)
|
||||
try:
|
||||
notification = (
|
||||
await NotificationService.document_processing.notify_processing_started(
|
||||
session=session,
|
||||
user_id=UUID(user_id),
|
||||
document_type="LOCAL_FOLDER_FILE",
|
||||
document_name=doc_name,
|
||||
search_space_id=search_space_id,
|
||||
)
|
||||
)
|
||||
_start_heartbeat(notification.id)
|
||||
heartbeat_task = asyncio.create_task(
|
||||
_run_heartbeat_loop(notification.id)
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to create notification for local folder indexing",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
async def _heartbeat_progress(completed_count: int) -> None:
|
||||
"""Refresh heartbeat and optionally update notification progress."""
|
||||
if notification:
|
||||
try:
|
||||
await NotificationService.document_processing.notify_processing_progress(
|
||||
session=session,
|
||||
notification=notification,
|
||||
stage="indexing",
|
||||
stage_message=f"Syncing files ({completed_count}/{file_count or '?'})",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
indexed, skipped_or_failed, _rfid, err = await index_local_folder(
|
||||
session=session,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
folder_path=folder_path,
|
||||
folder_name=folder_name,
|
||||
exclude_patterns=exclude_patterns,
|
||||
file_extensions=file_extensions,
|
||||
root_folder_id=root_folder_id,
|
||||
enable_summary=enable_summary,
|
||||
target_file_paths=target_file_paths,
|
||||
on_heartbeat_callback=_heartbeat_progress if (is_batch or is_full_scan) else None,
|
||||
)
|
||||
|
||||
if notification:
|
||||
try:
|
||||
if err:
|
||||
await NotificationService.document_processing.notify_processing_completed(
|
||||
session=session,
|
||||
notification=notification,
|
||||
error_message=err,
|
||||
)
|
||||
else:
|
||||
await NotificationService.document_processing.notify_processing_completed(
|
||||
session=session,
|
||||
notification=notification,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to update notification after local folder indexing",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Local folder indexing failed: {e}")
|
||||
if notification:
|
||||
try:
|
||||
await NotificationService.document_processing.notify_processing_completed(
|
||||
session=session,
|
||||
notification=notification,
|
||||
error_message=str(e)[:200],
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
finally:
|
||||
if heartbeat_task:
|
||||
heartbeat_task.cancel()
|
||||
if notification:
|
||||
_stop_heartbeat(notification.id)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue