mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
refactor: enhance file skipping logic in Google Drive connector to check for Google Workspace files before unsupported extensions
This commit is contained in:
parent
e4462292e4
commit
1b87719a92
2 changed files with 13 additions and 9 deletions
|
|
@ -43,9 +43,10 @@ async def download_and_extract_content(
|
|||
if should_skip_file(mime_type):
|
||||
return None, {}, f"Skipping {mime_type}"
|
||||
|
||||
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
|
||||
if ext_skip:
|
||||
return None, {}, f"Skipping unsupported extension: {file_name}"
|
||||
if not is_google_workspace_file(mime_type):
|
||||
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
|
||||
if ext_skip:
|
||||
return None, {}, f"Skipping unsupported extension: {file_name}"
|
||||
|
||||
logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})")
|
||||
|
||||
|
|
@ -156,9 +157,10 @@ async def download_and_process_file(
|
|||
if should_skip_file(mime_type):
|
||||
return None, f"Skipping {mime_type}", None
|
||||
|
||||
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
|
||||
if ext_skip:
|
||||
return None, f"Skipping unsupported extension: {file_name}", None
|
||||
if not is_google_workspace_file(mime_type):
|
||||
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
|
||||
if ext_skip:
|
||||
return None, f"Skipping unsupported extension: {file_name}", None
|
||||
|
||||
logger.info(f"Downloading file: {file_name} ({mime_type})")
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ from app.connectors.google_drive import (
|
|||
get_start_page_token,
|
||||
)
|
||||
from app.connectors.google_drive.file_types import (
|
||||
is_google_workspace_file,
|
||||
should_skip_by_extension,
|
||||
should_skip_file as skip_mime,
|
||||
)
|
||||
|
|
@ -81,9 +82,10 @@ async def _should_skip_file(
|
|||
|
||||
if skip_mime(mime_type):
|
||||
return True, "folder/shortcut"
|
||||
ext_skip, unsup_ext = should_skip_by_extension(file_name)
|
||||
if ext_skip:
|
||||
return True, f"unsupported:{unsup_ext}"
|
||||
if not is_google_workspace_file(mime_type):
|
||||
ext_skip, unsup_ext = should_skip_by_extension(file_name)
|
||||
if ext_skip:
|
||||
return True, f"unsupported:{unsup_ext}"
|
||||
if not file_id:
|
||||
return True, "missing file_id"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue