refactor: update file skipping logic in Dropbox, Google Drive, and OneDrive connectors to return unsupported extension information

2026-07-22 23:31:12 +02:00 · 2026-04-07 05:11:15 +05:30 · 2026-04-07 05:11:15 +05:30 · a624c86b04
commit a624c86b04
parent 122be76133
4 changed files with 9 additions and 5 deletions
--- a/surfsense_backend/app/connectors/dropbox/content_extractor.py
+++ b/surfsense_backend/app/connectors/dropbox/content_extractor.py
@ -53,7 +53,8 @@ async def download_and_extract_content(
    file_name = file.get("name", "Unknown")
    file_id = file.get("id", "")
-    if should_skip_file(file):
+    skip, _unsup_ext = should_skip_file(file)
    if skip:
        return None, {}, "Skipping non-indexable item"
    logger.info(f"Downloading file for content extraction: {file_name}")
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@ -43,7 +43,8 @@ async def download_and_extract_content(
    if should_skip_file(mime_type):
        return None, {}, f"Skipping {mime_type}"
-    if should_skip_by_extension(file_name):
+    ext_skip, _unsup_ext = should_skip_by_extension(file_name)
    if ext_skip:
        return None, {}, f"Skipping unsupported extension: {file_name}"
    logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})")
@ -155,7 +156,8 @@ async def download_and_process_file(
    if should_skip_file(mime_type):
        return None, f"Skipping {mime_type}", None
-    if should_skip_by_extension(file_name):
+    ext_skip, _unsup_ext = should_skip_by_extension(file_name)
    if ext_skip:
        return None, f"Skipping unsupported extension: {file_name}", None
    logger.info(f"Downloading file: {file_name} ({mime_type})")
--- a/surfsense_backend/app/connectors/onedrive/content_extractor.py
+++ b/surfsense_backend/app/connectors/onedrive/content_extractor.py
@ -24,7 +24,8 @@ async def download_and_extract_content(
    item_id = file.get("id")
    file_name = file.get("name", "Unknown")
-    if should_skip_file(file):
+    skip, _unsup_ext = should_skip_file(file)
    if skip:
        return None, {}, "Skipping non-indexable item"
    file_info = file.get("file", {})
--- a/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py
+++ b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py
@ -124,7 +124,7 @@ async def test_composio_connector_without_account_id_returns_error(
    maker = make_session_factory(async_engine)
    async with maker() as session:
-        count, _skipped, error = await index_google_drive_files(
+        count, _skipped, error, _unsupported = await index_google_drive_files(
            session=session,
            connector_id=data["connector_id"],
            search_space_id=data["search_space_id"],