refactor: update file skipping logic in Dropbox, Google Drive, and OneDrive connectors to return unsupported extension information

This commit is contained in:
Anish Sarkar 2026-04-07 05:11:15 +05:30
parent 122be76133
commit a624c86b04
4 changed files with 9 additions and 5 deletions

View file

@ -53,7 +53,8 @@ async def download_and_extract_content(
file_name = file.get("name", "Unknown") file_name = file.get("name", "Unknown")
file_id = file.get("id", "") file_id = file.get("id", "")
if should_skip_file(file): skip, _unsup_ext = should_skip_file(file)
if skip:
return None, {}, "Skipping non-indexable item" return None, {}, "Skipping non-indexable item"
logger.info(f"Downloading file for content extraction: {file_name}") logger.info(f"Downloading file for content extraction: {file_name}")

View file

@ -43,7 +43,8 @@ async def download_and_extract_content(
if should_skip_file(mime_type): if should_skip_file(mime_type):
return None, {}, f"Skipping {mime_type}" return None, {}, f"Skipping {mime_type}"
if should_skip_by_extension(file_name): ext_skip, _unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return None, {}, f"Skipping unsupported extension: {file_name}" return None, {}, f"Skipping unsupported extension: {file_name}"
logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})") logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})")
@ -155,7 +156,8 @@ async def download_and_process_file(
if should_skip_file(mime_type): if should_skip_file(mime_type):
return None, f"Skipping {mime_type}", None return None, f"Skipping {mime_type}", None
if should_skip_by_extension(file_name): ext_skip, _unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return None, f"Skipping unsupported extension: {file_name}", None return None, f"Skipping unsupported extension: {file_name}", None
logger.info(f"Downloading file: {file_name} ({mime_type})") logger.info(f"Downloading file: {file_name} ({mime_type})")

View file

@ -24,7 +24,8 @@ async def download_and_extract_content(
item_id = file.get("id") item_id = file.get("id")
file_name = file.get("name", "Unknown") file_name = file.get("name", "Unknown")
if should_skip_file(file): skip, _unsup_ext = should_skip_file(file)
if skip:
return None, {}, "Skipping non-indexable item" return None, {}, "Skipping non-indexable item"
file_info = file.get("file", {}) file_info = file.get("file", {})

View file

@ -124,7 +124,7 @@ async def test_composio_connector_without_account_id_returns_error(
maker = make_session_factory(async_engine) maker = make_session_factory(async_engine)
async with maker() as session: async with maker() as session:
count, _skipped, error = await index_google_drive_files( count, _skipped, error, _unsupported = await index_google_drive_files(
session=session, session=session,
connector_id=data["connector_id"], connector_id=data["connector_id"],
search_space_id=data["search_space_id"], search_space_id=data["search_space_id"],