mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
refactor: update file skipping logic in Dropbox, Google Drive, and OneDrive connectors to return unsupported extension information
This commit is contained in:
parent
122be76133
commit
a624c86b04
4 changed files with 9 additions and 5 deletions
|
|
@ -53,7 +53,8 @@ async def download_and_extract_content(
|
||||||
file_name = file.get("name", "Unknown")
|
file_name = file.get("name", "Unknown")
|
||||||
file_id = file.get("id", "")
|
file_id = file.get("id", "")
|
||||||
|
|
||||||
if should_skip_file(file):
|
skip, _unsup_ext = should_skip_file(file)
|
||||||
|
if skip:
|
||||||
return None, {}, "Skipping non-indexable item"
|
return None, {}, "Skipping non-indexable item"
|
||||||
|
|
||||||
logger.info(f"Downloading file for content extraction: {file_name}")
|
logger.info(f"Downloading file for content extraction: {file_name}")
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,8 @@ async def download_and_extract_content(
|
||||||
if should_skip_file(mime_type):
|
if should_skip_file(mime_type):
|
||||||
return None, {}, f"Skipping {mime_type}"
|
return None, {}, f"Skipping {mime_type}"
|
||||||
|
|
||||||
if should_skip_by_extension(file_name):
|
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
|
||||||
|
if ext_skip:
|
||||||
return None, {}, f"Skipping unsupported extension: {file_name}"
|
return None, {}, f"Skipping unsupported extension: {file_name}"
|
||||||
|
|
||||||
logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})")
|
logger.info(f"Downloading file for content extraction: {file_name} ({mime_type})")
|
||||||
|
|
@ -155,7 +156,8 @@ async def download_and_process_file(
|
||||||
if should_skip_file(mime_type):
|
if should_skip_file(mime_type):
|
||||||
return None, f"Skipping {mime_type}", None
|
return None, f"Skipping {mime_type}", None
|
||||||
|
|
||||||
if should_skip_by_extension(file_name):
|
ext_skip, _unsup_ext = should_skip_by_extension(file_name)
|
||||||
|
if ext_skip:
|
||||||
return None, f"Skipping unsupported extension: {file_name}", None
|
return None, f"Skipping unsupported extension: {file_name}", None
|
||||||
|
|
||||||
logger.info(f"Downloading file: {file_name} ({mime_type})")
|
logger.info(f"Downloading file: {file_name} ({mime_type})")
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,8 @@ async def download_and_extract_content(
|
||||||
item_id = file.get("id")
|
item_id = file.get("id")
|
||||||
file_name = file.get("name", "Unknown")
|
file_name = file.get("name", "Unknown")
|
||||||
|
|
||||||
if should_skip_file(file):
|
skip, _unsup_ext = should_skip_file(file)
|
||||||
|
if skip:
|
||||||
return None, {}, "Skipping non-indexable item"
|
return None, {}, "Skipping non-indexable item"
|
||||||
|
|
||||||
file_info = file.get("file", {})
|
file_info = file.get("file", {})
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ async def test_composio_connector_without_account_id_returns_error(
|
||||||
|
|
||||||
maker = make_session_factory(async_engine)
|
maker = make_session_factory(async_engine)
|
||||||
async with maker() as session:
|
async with maker() as session:
|
||||||
count, _skipped, error = await index_google_drive_files(
|
count, _skipped, error, _unsupported = await index_google_drive_files(
|
||||||
session=session,
|
session=session,
|
||||||
connector_id=data["connector_id"],
|
connector_id=data["connector_id"],
|
||||||
search_space_id=data["search_space_id"],
|
search_space_id=data["search_space_id"],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue