refactor: implement file type classification for supported extensions across Dropbox, Google Drive, and OneDrive connectors, enhancing file handling and error management

This commit is contained in:
Anish Sarkar 2026-04-06 22:03:47 +05:30
parent 47f4be08d9
commit dc7047f64d
14 changed files with 250 additions and 27 deletions

View file

@ -0,0 +1,22 @@
"""Tests for Google Drive file type filtering."""
import pytest
from app.connectors.google_drive.file_types import should_skip_by_extension
pytestmark = pytest.mark.unit
@pytest.mark.parametrize("filename", [
"malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend",
])
def test_unsupported_extensions_are_skipped(filename):
assert should_skip_by_extension(filename) is True
@pytest.mark.parametrize("filename", [
"report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx",
"readme.txt", "data.csv", "photo.png", "notes.md",
])
def test_parseable_extensions_are_not_skipped(filename):
assert should_skip_by_extension(filename) is False

View file

@ -0,0 +1,44 @@
"""Tests for OneDrive file type filtering."""
import pytest
from app.connectors.onedrive.file_types import should_skip_file
pytestmark = pytest.mark.unit
def test_folder_is_skipped():
item = {"folder": {}, "name": "My Folder"}
assert should_skip_file(item) is True
def test_remote_item_is_skipped():
item = {"remoteItem": {}, "name": "shared.docx"}
assert should_skip_file(item) is True
def test_package_is_skipped():
item = {"package": {}, "name": "notebook"}
assert should_skip_file(item) is True
def test_onenote_is_skipped():
item = {"name": "notes", "file": {"mimeType": "application/msonenote"}}
assert should_skip_file(item) is True
@pytest.mark.parametrize("filename", [
"malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend",
])
def test_unsupported_extensions_are_skipped(filename):
item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}
assert should_skip_file(item) is True, f"{filename} should be skipped"
@pytest.mark.parametrize("filename", [
"report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx",
"readme.txt", "data.csv", "photo.png", "notes.md",
])
def test_parseable_files_are_not_skipped(filename):
item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}
assert should_skip_file(item) is False, f"{filename} should NOT be skipped"