diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py index 9f49d491d..3e0bb39e5 100644 --- a/surfsense_backend/app/connectors/google_drive/content_extractor.py +++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py @@ -43,9 +43,10 @@ async def download_and_extract_content( if should_skip_file(mime_type): return None, {}, f"Skipping {mime_type}" - ext_skip, _unsup_ext = should_skip_by_extension(file_name) - if ext_skip: - return None, {}, f"Skipping unsupported extension: {file_name}" + if not is_google_workspace_file(mime_type): + ext_skip, _unsup_ext = should_skip_by_extension(file_name) + if ext_skip: + return None, {}, f"Skipping unsupported extension: {file_name}" logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})") @@ -156,9 +157,10 @@ async def download_and_process_file( if should_skip_file(mime_type): return None, f"Skipping {mime_type}", None - ext_skip, _unsup_ext = should_skip_by_extension(file_name) - if ext_skip: - return None, f"Skipping unsupported extension: {file_name}", None + if not is_google_workspace_file(mime_type): + ext_skip, _unsup_ext = should_skip_by_extension(file_name) + if ext_skip: + return None, f"Skipping unsupported extension: {file_name}", None logger.info(f"Downloading file: {file_name} ({mime_type})") diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py index a33859af5..9916e70a0 100644 --- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py @@ -26,6 +26,7 @@ from app.connectors.google_drive import ( get_start_page_token, ) from app.connectors.google_drive.file_types import ( + is_google_workspace_file, should_skip_by_extension, should_skip_file as skip_mime, ) @@ -81,9 +82,10 @@ async def _should_skip_file( if skip_mime(mime_type): return True, "folder/shortcut" - ext_skip, unsup_ext = should_skip_by_extension(file_name) - if ext_skip: - return True, f"unsupported:{unsup_ext}" + if not is_google_workspace_file(mime_type): + ext_skip, unsup_ext = should_skip_by_extension(file_name) + if ext_skip: + return True, f"unsupported:{unsup_ext}" if not file_id: return True, "missing file_id"