mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-12 20:45:20 +02:00
feat: enhance Google Drive indexing to track skipped files
- Updated the indexing function to return the count of skipped files alongside indexed files, improving tracking of indexing performance. - Added logic to accumulate skipped file counts during the indexing process, providing better insights into potential issues. - Enhanced notification updates to include skipped file counts, ensuring comprehensive progress reporting for users.
This commit is contained in:
parent
83d9c49a50
commit
eac4cb6075
2 changed files with 27 additions and 2 deletions
|
|
@ -2433,6 +2433,7 @@ async def run_google_drive_indexing(
|
|||
else:
|
||||
# Update notification to storing stage
|
||||
if notification:
|
||||
await session.refresh(notification)
|
||||
await NotificationService.connector_indexing.notify_indexing_progress(
|
||||
session=session,
|
||||
notification=notification,
|
||||
|
|
|
|||
|
|
@ -194,6 +194,31 @@ async def index_google_drive_files(
|
|||
on_heartbeat_callback=on_heartbeat_callback,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
documents_indexed, documents_skipped = result
|
||||
|
||||
# Reconciliation: full scan re-indexes documents that were manually
|
||||
# deleted from SurfSense but still exist in Google Drive.
|
||||
# Already-indexed files are skipped via md5/modifiedTime checks,
|
||||
# so the overhead is just one API listing call + fast DB lookups.
|
||||
logger.info("Running reconciliation scan after delta sync")
|
||||
reconcile_result = await _index_full_scan(
|
||||
drive_client=drive_client,
|
||||
session=session,
|
||||
connector=connector,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
folder_id=target_folder_id,
|
||||
folder_name=target_folder_name,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
max_files=max_files,
|
||||
include_subfolders=include_subfolders,
|
||||
on_heartbeat_callback=on_heartbeat_callback,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
documents_indexed += reconcile_result[0]
|
||||
documents_skipped += reconcile_result[1]
|
||||
else:
|
||||
logger.info(f"Using full scan for connector {connector_id}")
|
||||
result = await _index_full_scan(
|
||||
|
|
@ -212,8 +237,7 @@ async def index_google_drive_files(
|
|||
on_heartbeat_callback=on_heartbeat_callback,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
|
||||
documents_indexed, documents_skipped = result
|
||||
documents_indexed, documents_skipped = result
|
||||
|
||||
if documents_indexed > 0 or can_use_delta_sync:
|
||||
new_token, token_error = await get_start_page_token(drive_client)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue