refactor: enhance file skipping logic across Dropbox, Google Drive, and OneDrive connectors to return unsupported extensions, improving error reporting and maintainability

This commit is contained in:
Anish Sarkar 2026-04-07 03:16:34 +05:30
parent e7beeb2a36
commit 3a1d700817
14 changed files with 344 additions and 160 deletions

View file

@ -265,7 +265,10 @@ def full_scan_mocks(mock_dropbox_client, monkeypatch):
async def _fake_skip(session, file, search_space_id):
from app.connectors.dropbox.file_types import should_skip_file as _skip
if _skip(file):
item_skip, unsup_ext = _skip(file)
if item_skip:
if unsup_ext:
return True, f"unsupported:{unsup_ext}"
return True, "folder/non-downloadable"
return skip_results.get(file.get("id", ""), (False, None))
@ -541,7 +544,7 @@ async def test_delta_sync_deletions_call_remove_document(monkeypatch):
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
indexed, skipped, cursor = await _index_with_delta_sync(
indexed, skipped, unsupported, cursor = await _index_with_delta_sync(
mock_client,
AsyncMock(),
_CONNECTOR_ID,
@ -578,7 +581,7 @@ async def test_delta_sync_upserts_filtered_and_downloaded(monkeypatch):
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
indexed, skipped, cursor = await _index_with_delta_sync(
indexed, skipped, unsupported, cursor = await _index_with_delta_sync(
mock_client,
AsyncMock(),
_CONNECTOR_ID,
@ -628,7 +631,7 @@ async def test_delta_sync_mix_deletions_and_upserts(monkeypatch):
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
indexed, skipped, cursor = await _index_with_delta_sync(
indexed, skipped, unsupported, cursor = await _index_with_delta_sync(
mock_client,
AsyncMock(),
_CONNECTOR_ID,
@ -662,7 +665,7 @@ async def test_delta_sync_returns_new_cursor(monkeypatch):
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
indexed, skipped, cursor = await _index_with_delta_sync(
indexed, skipped, unsupported, cursor = await _index_with_delta_sync(
mock_client,
AsyncMock(),
_CONNECTOR_ID,

View file

@ -497,7 +497,7 @@ async def test_delta_sync_removals_serial_rest_parallel(monkeypatch):
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
indexed, skipped = await _index_with_delta_sync(
indexed, skipped, unsupported = await _index_with_delta_sync(
MagicMock(),
mock_session,
MagicMock(),

View file

@ -384,7 +384,7 @@ async def test_gdrive_full_scan_skips_over_quota(gdrive_full_scan_mocks, monkeyp
m["download_mock"].return_value = ([], 0)
m["batch_mock"].return_value = ([], 2, 0)
_indexed, skipped = await _run_gdrive_full_scan(m)
_indexed, skipped, _unsup = await _run_gdrive_full_scan(m)
call_files = m["download_mock"].call_args[0][1]
assert len(call_files) == 2
@ -459,7 +459,7 @@ async def test_gdrive_delta_sync_skips_over_quota(monkeypatch):
mock_task_logger = MagicMock()
mock_task_logger.log_task_progress = AsyncMock()
_indexed, skipped = await _mod._index_with_delta_sync(
_indexed, skipped, _unsupported = await _mod._index_with_delta_sync(
MagicMock(),
session,
MagicMock(),