refactor: enhance file skipping logic in Google Drive connector to check for Google Workspace files before unsupported extensions

This commit is contained in:
Anish Sarkar 2026-04-07 05:36:29 +05:30
parent e4462292e4
commit 1b87719a92
2 changed files with 13 additions and 9 deletions

View file

@ -43,9 +43,10 @@ async def download_and_extract_content(
if should_skip_file(mime_type):
return None, {}, f"Skipping {mime_type}"
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return None, {}, f"Skipping unsupported extension: {file_name}"
if not is_google_workspace_file(mime_type):
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return None, {}, f"Skipping unsupported extension: {file_name}"
logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})")
@ -156,9 +157,10 @@ async def download_and_process_file(
if should_skip_file(mime_type):
return None, f"Skipping {mime_type}", None
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return None, f"Skipping unsupported extension: {file_name}", None
if not is_google_workspace_file(mime_type):
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return None, f"Skipping unsupported extension: {file_name}", None
logger.info(f"Downloading file: {file_name} ({mime_type})")

View file

@ -26,6 +26,7 @@ from app.connectors.google_drive import (
get_start_page_token,
)
from app.connectors.google_drive.file_types import (
is_google_workspace_file,
should_skip_by_extension,
should_skip_file as skip_mime,
)
@ -81,9 +82,10 @@ async def _should_skip_file(
if skip_mime(mime_type):
return True, "folder/shortcut"
ext_skip, unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return True, f"unsupported:{unsup_ext}"
if not is_google_workspace_file(mime_type):
ext_skip, unsup_ext = should_skip_by_extension(file_name)
if ext_skip:
return True, f"unsupported:{unsup_ext}"
if not file_id:
return True, "missing file_id"