mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-15 18:25:18 +02:00
feat: update Google Drive indexing to include skipped file tracking
This commit is contained in:
parent
eac4cb6075
commit
e9485ab2df
3 changed files with 14 additions and 10 deletions
|
|
@ -2338,6 +2338,7 @@ async def run_google_drive_indexing(
|
|||
items = GoogleDriveIndexRequest(**items_dict)
|
||||
indexing_options = items.indexing_options
|
||||
total_indexed = 0
|
||||
total_skipped = 0
|
||||
errors = []
|
||||
|
||||
# Get connector info for notification
|
||||
|
|
@ -2375,7 +2376,7 @@ async def run_google_drive_indexing(
|
|||
# Index each folder with indexing options
|
||||
for folder in items.folders:
|
||||
try:
|
||||
indexed_count, error_message = await index_google_drive_files(
|
||||
indexed_count, skipped_count, error_message = await index_google_drive_files(
|
||||
session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
|
|
@ -2387,6 +2388,7 @@ async def run_google_drive_indexing(
|
|||
max_files=indexing_options.max_files_per_folder,
|
||||
include_subfolders=indexing_options.include_subfolders,
|
||||
)
|
||||
total_skipped += skipped_count
|
||||
if error_message:
|
||||
errors.append(f"Folder '{folder.name}': {error_message}")
|
||||
else:
|
||||
|
|
@ -2457,6 +2459,7 @@ async def run_google_drive_indexing(
|
|||
notification=notification,
|
||||
indexed_count=total_indexed,
|
||||
error_message=error_message,
|
||||
skipped_count=total_skipped,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ async def index_google_drive_files(
|
|||
max_files: int = 500,
|
||||
include_subfolders: bool = False,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
) -> tuple[int, str | None]:
|
||||
) -> tuple[int, int, str | None]:
|
||||
"""
|
||||
Index Google Drive files for a specific connector.
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ async def index_google_drive_files(
|
|||
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||
|
||||
Returns:
|
||||
Tuple of (number_of_indexed_files, error_message)
|
||||
Tuple of (number_of_indexed_files, number_of_skipped_files, error_message)
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ async def index_google_drive_files(
|
|||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, None, {"error_type": "ConnectorNotFound"}
|
||||
)
|
||||
return 0, error_msg
|
||||
return 0, 0, error_msg
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
|
|
@ -130,7 +130,7 @@ async def index_google_drive_files(
|
|||
log_entry, error_msg, "Missing Composio account",
|
||||
{"error_type": "MissingComposioAccount"},
|
||||
)
|
||||
return 0, error_msg
|
||||
return 0, 0, error_msg
|
||||
pre_built_credentials = build_composio_credentials(connected_account_id)
|
||||
else:
|
||||
token_encrypted = connector.config.get("_token_encrypted", False)
|
||||
|
|
@ -143,6 +143,7 @@ async def index_google_drive_files(
|
|||
{"error_type": "MissingSecretKey"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
0,
|
||||
"SECRET_KEY not configured but credentials are marked as encrypted",
|
||||
)
|
||||
|
|
@ -161,7 +162,7 @@ async def index_google_drive_files(
|
|||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "MissingParameter"}
|
||||
)
|
||||
return 0, error_msg
|
||||
return 0, 0, error_msg
|
||||
|
||||
target_folder_id = folder_id
|
||||
target_folder_name = folder_name or "Selected Folder"
|
||||
|
|
@ -271,7 +272,7 @@ async def index_google_drive_files(
|
|||
logger.info(
|
||||
f"Google Drive indexing completed: {documents_indexed} files indexed, {documents_skipped} skipped"
|
||||
)
|
||||
return documents_indexed, None
|
||||
return documents_indexed, documents_skipped, None
|
||||
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
|
|
@ -282,7 +283,7 @@ async def index_google_drive_files(
|
|||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
logger.error(f"Database error: {db_error!s}", exc_info=True)
|
||||
return 0, f"Database error: {db_error!s}"
|
||||
return 0, 0, f"Database error: {db_error!s}"
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
|
|
@ -292,7 +293,7 @@ async def index_google_drive_files(
|
|||
{"error_type": type(e).__name__},
|
||||
)
|
||||
logger.error(f"Failed to index Google Drive files: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Google Drive files: {e!s}"
|
||||
return 0, 0, f"Failed to index Google Drive files: {e!s}"
|
||||
|
||||
|
||||
async def index_google_drive_single_file(
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ async def test_composio_connector_without_account_id_returns_error(
|
|||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
count, error = await index_google_drive_files(
|
||||
count, _skipped, error = await index_google_drive_files(
|
||||
session=session,
|
||||
connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue