feat: update Google Drive indexing to include skipped file tracking

This commit is contained in:
Anish Sarkar 2026-03-19 20:27:50 +05:30
parent eac4cb6075
commit e9485ab2df
3 changed files with 14 additions and 10 deletions

View file

@ -2338,6 +2338,7 @@ async def run_google_drive_indexing(
items = GoogleDriveIndexRequest(**items_dict)
indexing_options = items.indexing_options
total_indexed = 0
total_skipped = 0
errors = []
# Get connector info for notification
@ -2375,7 +2376,7 @@ async def run_google_drive_indexing(
# Index each folder with indexing options
for folder in items.folders:
try:
indexed_count, error_message = await index_google_drive_files(
indexed_count, skipped_count, error_message = await index_google_drive_files(
session,
connector_id,
search_space_id,
@ -2387,6 +2388,7 @@ async def run_google_drive_indexing(
max_files=indexing_options.max_files_per_folder,
include_subfolders=indexing_options.include_subfolders,
)
total_skipped += skipped_count
if error_message:
errors.append(f"Folder '{folder.name}': {error_message}")
else:
@ -2457,6 +2459,7 @@ async def run_google_drive_indexing(
notification=notification,
indexed_count=total_indexed,
error_message=error_message,
skipped_count=total_skipped,
)
except Exception as e:

View file

@ -62,7 +62,7 @@ async def index_google_drive_files(
max_files: int = 500,
include_subfolders: bool = False,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
) -> tuple[int, str | None]:
) -> tuple[int, int, str | None]:
"""
Index Google Drive files for a specific connector.
@ -80,7 +80,7 @@ async def index_google_drive_files(
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
Returns:
Tuple of (number_of_indexed_files, error_message)
Tuple of (number_of_indexed_files, number_of_skipped_files, error_message)
"""
task_logger = TaskLoggingService(session, search_space_id)
@ -110,7 +110,7 @@ async def index_google_drive_files(
await task_logger.log_task_failure(
log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}
)
return 0, error_msg
return 0, 0, error_msg
await task_logger.log_task_progress(
log_entry,
@ -130,7 +130,7 @@ async def index_google_drive_files(
log_entry, error_msg, "Missing Composio account",
{"error_type": "MissingComposioAccount"},
)
return 0, error_msg
return 0, 0, error_msg
pre_built_credentials = build_composio_credentials(connected_account_id)
else:
token_encrypted = connector.config.get("_token_encrypted", False)
@ -143,6 +143,7 @@ async def index_google_drive_files(
{"error_type": "MissingSecretKey"},
)
return (
0,
0,
"SECRET_KEY not configured but credentials are marked as encrypted",
)
@ -161,7 +162,7 @@ async def index_google_drive_files(
await task_logger.log_task_failure(
log_entry, error_msg, {"error_type": "MissingParameter"}
)
return 0, error_msg
return 0, 0, error_msg
target_folder_id = folder_id
target_folder_name = folder_name or "Selected Folder"
@ -271,7 +272,7 @@ async def index_google_drive_files(
logger.info(
f"Google Drive indexing completed: {documents_indexed} files indexed, {documents_skipped} skipped"
)
return documents_indexed, None
return documents_indexed, documents_skipped, None
except SQLAlchemyError as db_error:
await session.rollback()
@ -282,7 +283,7 @@ async def index_google_drive_files(
{"error_type": "SQLAlchemyError"},
)
logger.error(f"Database error: {db_error!s}", exc_info=True)
return 0, f"Database error: {db_error!s}"
return 0, 0, f"Database error: {db_error!s}"
except Exception as e:
await session.rollback()
await task_logger.log_task_failure(
@ -292,7 +293,7 @@ async def index_google_drive_files(
{"error_type": type(e).__name__},
)
logger.error(f"Failed to index Google Drive files: {e!s}", exc_info=True)
return 0, f"Failed to index Google Drive files: {e!s}"
return 0, 0, f"Failed to index Google Drive files: {e!s}"
async def index_google_drive_single_file(

View file

@ -119,7 +119,7 @@ async def test_composio_connector_without_account_id_returns_error(
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_drive_files(
count, _skipped, error = await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],