diff --git a/surfsense_backend/app/connectors/dropbox/content_extractor.py b/surfsense_backend/app/connectors/dropbox/content_extractor.py index 8e947eee7..8cbc3e417 100644 --- a/surfsense_backend/app/connectors/dropbox/content_extractor.py +++ b/surfsense_backend/app/connectors/dropbox/content_extractor.py @@ -53,7 +53,8 @@ async def download_and_extract_content( file_name = file.get("name", "Unknown") file_id = file.get("id", "") - if should_skip_file(file): + skip, _unsup_ext = should_skip_file(file) + if skip: return None, {}, "Skipping non-indexable item" logger.info(f"Downloading file for content extraction: {file_name}") diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py index 10f008594..9f49d491d 100644 --- a/surfsense_backend/app/connectors/google_drive/content_extractor.py +++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py @@ -43,7 +43,8 @@ async def download_and_extract_content( if should_skip_file(mime_type): return None, {}, f"Skipping {mime_type}" - if should_skip_by_extension(file_name): + ext_skip, _unsup_ext = should_skip_by_extension(file_name) + if ext_skip: return None, {}, f"Skipping unsupported extension: {file_name}" logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})") @@ -155,7 +156,8 @@ async def download_and_process_file( if should_skip_file(mime_type): return None, f"Skipping {mime_type}", None - if should_skip_by_extension(file_name): + ext_skip, _unsup_ext = should_skip_by_extension(file_name) + if ext_skip: return None, f"Skipping unsupported extension: {file_name}", None logger.info(f"Downloading file: {file_name} ({mime_type})") diff --git a/surfsense_backend/app/connectors/onedrive/content_extractor.py b/surfsense_backend/app/connectors/onedrive/content_extractor.py index 2355993eb..2238b8603 100644 --- a/surfsense_backend/app/connectors/onedrive/content_extractor.py +++ b/surfsense_backend/app/connectors/onedrive/content_extractor.py @@ -24,7 +24,8 @@ async def download_and_extract_content( item_id = file.get("id") file_name = file.get("name", "Unknown") - if should_skip_file(file): + skip, _unsup_ext = should_skip_file(file) + if skip: return None, {}, "Skipping non-indexable item" file_info = file.get("file", {}) diff --git a/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py index 5bb0b6137..e669fa143 100644 --- a/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py +++ b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py @@ -124,7 +124,7 @@ async def test_composio_connector_without_account_id_returns_error( maker = make_session_factory(async_engine) async with maker() as session: - count, _skipped, error = await index_google_drive_files( + count, _skipped, error, _unsupported = await index_google_drive_files( session=session, connector_id=data["connector_id"], search_space_id=data["search_space_id"],