chore: ran linting

This commit is contained in:
Anish Sarkar 2026-04-07 05:55:39 +05:30
parent 5803fe79da
commit 0a26a6c5bb
54 changed files with 1015 additions and 672 deletions

View file

@ -19,6 +19,7 @@ def _make_client() -> DropboxClient:
# ---------- C1: get_latest_cursor ----------
async def test_get_latest_cursor_returns_cursor_string(monkeypatch):
client = _make_client()
@ -34,12 +35,17 @@ async def test_get_latest_cursor_returns_cursor_string(monkeypatch):
assert error is None
client._request.assert_called_once_with(
"/2/files/list_folder/get_latest_cursor",
{"path": "/my-folder", "recursive": False, "include_non_downloadable_files": True},
{
"path": "/my-folder",
"recursive": False,
"include_non_downloadable_files": True,
},
)
# ---------- C2: get_changes returns entries and new cursor ----------
async def test_get_changes_returns_entries_and_cursor(monkeypatch):
client = _make_client()
@ -66,6 +72,7 @@ async def test_get_changes_returns_entries_and_cursor(monkeypatch):
# ---------- C3: get_changes handles pagination ----------
async def test_get_changes_handles_pagination(monkeypatch):
client = _make_client()
@ -98,6 +105,7 @@ async def test_get_changes_handles_pagination(monkeypatch):
# ---------- C4: get_changes raises on 401 ----------
async def test_get_changes_returns_error_on_401(monkeypatch):
client = _make_client()

View file

@ -41,15 +41,40 @@ def test_non_downloadable_item_is_skipped():
@pytest.mark.parametrize(
"filename",
[
"archive.zip", "backup.tar", "data.gz", "stuff.rar", "pack.7z",
"program.exe", "lib.dll", "module.so", "image.dmg", "disk.iso",
"movie.mov", "clip.avi", "video.mkv", "film.wmv", "stream.flv",
"archive.zip",
"backup.tar",
"data.gz",
"stuff.rar",
"pack.7z",
"program.exe",
"lib.dll",
"module.so",
"image.dmg",
"disk.iso",
"movie.mov",
"clip.avi",
"video.mkv",
"film.wmv",
"stream.flv",
"favicon.ico",
"raw.cr2", "photo.nef", "image.arw", "pic.dng",
"design.psd", "vector.ai", "mockup.sketch", "proto.fig",
"font.ttf", "font.otf", "font.woff", "font.woff2",
"model.stl", "scene.fbx", "mesh.blend",
"local.db", "data.sqlite", "access.mdb",
"raw.cr2",
"photo.nef",
"image.arw",
"pic.dng",
"design.psd",
"vector.ai",
"mockup.sketch",
"proto.fig",
"font.ttf",
"font.otf",
"font.woff",
"font.woff2",
"model.stl",
"scene.fbx",
"mesh.blend",
"local.db",
"data.sqlite",
"access.mdb",
],
)
def test_non_parseable_extensions_are_skipped(filename, mocker):
@ -63,9 +88,16 @@ def test_non_parseable_extensions_are_skipped(filename, mocker):
@pytest.mark.parametrize(
"filename",
[
"report.pdf", "document.docx", "sheet.xlsx", "slides.pptx",
"readme.txt", "data.csv", "page.html", "notes.md",
"config.json", "feed.xml",
"report.pdf",
"document.docx",
"sheet.xlsx",
"slides.pptx",
"readme.txt",
"data.csv",
"page.html",
"notes.md",
"config.json",
"feed.xml",
],
)
def test_parseable_documents_are_not_skipped(filename, mocker):
@ -92,30 +124,33 @@ def test_universal_images_are_not_skipped(filename, mocker):
assert ext is None
@pytest.mark.parametrize("filename,service,expected_skip", [
("old.doc", "DOCLING", True),
("old.doc", "LLAMACLOUD", False),
("old.doc", "UNSTRUCTURED", False),
("legacy.xls", "DOCLING", True),
("legacy.xls", "LLAMACLOUD", False),
("legacy.xls", "UNSTRUCTURED", False),
("deck.ppt", "DOCLING", True),
("deck.ppt", "LLAMACLOUD", False),
("deck.ppt", "UNSTRUCTURED", False),
("icon.svg", "DOCLING", True),
("icon.svg", "LLAMACLOUD", False),
("anim.gif", "DOCLING", True),
("anim.gif", "LLAMACLOUD", False),
("photo.webp", "DOCLING", False),
("photo.webp", "LLAMACLOUD", False),
("photo.webp", "UNSTRUCTURED", True),
("live.heic", "DOCLING", True),
("live.heic", "UNSTRUCTURED", False),
("macro.docm", "DOCLING", True),
("macro.docm", "LLAMACLOUD", False),
("mail.eml", "DOCLING", True),
("mail.eml", "UNSTRUCTURED", False),
])
@pytest.mark.parametrize(
"filename,service,expected_skip",
[
("old.doc", "DOCLING", True),
("old.doc", "LLAMACLOUD", False),
("old.doc", "UNSTRUCTURED", False),
("legacy.xls", "DOCLING", True),
("legacy.xls", "LLAMACLOUD", False),
("legacy.xls", "UNSTRUCTURED", False),
("deck.ppt", "DOCLING", True),
("deck.ppt", "LLAMACLOUD", False),
("deck.ppt", "UNSTRUCTURED", False),
("icon.svg", "DOCLING", True),
("icon.svg", "LLAMACLOUD", False),
("anim.gif", "DOCLING", True),
("anim.gif", "LLAMACLOUD", False),
("photo.webp", "DOCLING", False),
("photo.webp", "LLAMACLOUD", False),
("photo.webp", "UNSTRUCTURED", True),
("live.heic", "DOCLING", True),
("live.heic", "UNSTRUCTURED", False),
("macro.docm", "DOCLING", True),
("macro.docm", "LLAMACLOUD", False),
("mail.eml", "DOCLING", True),
("mail.eml", "UNSTRUCTURED", False),
],
)
def test_parser_specific_extensions(filename, service, expected_skip, mocker):
mocker.patch("app.config.config.ETL_SERVICE", service)
item = {".tag": "file", "name": filename}

View file

@ -7,21 +7,37 @@ from app.connectors.google_drive.file_types import should_skip_by_extension
pytestmark = pytest.mark.unit
@pytest.mark.parametrize("filename", [
"malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend",
])
@pytest.mark.parametrize(
"filename",
[
"malware.exe",
"archive.zip",
"video.mov",
"font.woff2",
"model.blend",
],
)
def test_unsupported_extensions_are_skipped_regardless_of_service(filename, mocker):
"""Truly unsupported files are skipped no matter which ETL service is configured."""
for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
mocker.patch("app.config.config.ETL_SERVICE", service)
skip, ext = should_skip_by_extension(filename)
skip, _ext = should_skip_by_extension(filename)
assert skip is True
@pytest.mark.parametrize("filename", [
"report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx",
"readme.txt", "data.csv", "photo.png", "notes.md",
])
@pytest.mark.parametrize(
"filename",
[
"report.pdf",
"doc.docx",
"sheet.xlsx",
"slides.pptx",
"readme.txt",
"data.csv",
"photo.png",
"notes.md",
],
)
def test_universal_extensions_are_not_skipped(filename, mocker):
"""Files supported by all parsers (or handled by plaintext/direct_convert) are never skipped."""
for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
@ -31,16 +47,19 @@ def test_universal_extensions_are_not_skipped(filename, mocker):
assert ext is None
@pytest.mark.parametrize("filename,service,expected_skip", [
("macro.docm", "DOCLING", True),
("macro.docm", "LLAMACLOUD", False),
("mail.eml", "DOCLING", True),
("mail.eml", "UNSTRUCTURED", False),
("photo.gif", "DOCLING", True),
("photo.gif", "LLAMACLOUD", False),
("photo.heic", "UNSTRUCTURED", False),
("photo.heic", "DOCLING", True),
])
@pytest.mark.parametrize(
"filename,service,expected_skip",
[
("macro.docm", "DOCLING", True),
("macro.docm", "LLAMACLOUD", False),
("mail.eml", "DOCLING", True),
("mail.eml", "UNSTRUCTURED", False),
("photo.gif", "DOCLING", True),
("photo.gif", "LLAMACLOUD", False),
("photo.heic", "UNSTRUCTURED", False),
("photo.heic", "DOCLING", True),
],
)
def test_parser_specific_extensions(filename, service, expected_skip, mocker):
mocker.patch("app.config.config.ETL_SERVICE", service)
skip, ext = should_skip_by_extension(filename)

View file

@ -45,9 +45,16 @@ def test_onenote_is_skipped():
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("filename", [
"malware.exe", "archive.zip", "video.mov", "font.woff2", "model.blend",
])
@pytest.mark.parametrize(
"filename",
[
"malware.exe",
"archive.zip",
"video.mov",
"font.woff2",
"model.blend",
],
)
def test_unsupported_extensions_are_skipped(filename, mocker):
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}
@ -56,10 +63,19 @@ def test_unsupported_extensions_are_skipped(filename, mocker):
assert ext is not None
@pytest.mark.parametrize("filename", [
"report.pdf", "doc.docx", "sheet.xlsx", "slides.pptx",
"readme.txt", "data.csv", "photo.png", "notes.md",
])
@pytest.mark.parametrize(
"filename",
[
"report.pdf",
"doc.docx",
"sheet.xlsx",
"slides.pptx",
"readme.txt",
"data.csv",
"photo.png",
"notes.md",
],
)
def test_universal_files_are_not_skipped(filename, mocker):
for service in ("DOCLING", "LLAMACLOUD", "UNSTRUCTURED"):
mocker.patch("app.config.config.ETL_SERVICE", service)
@ -69,14 +85,17 @@ def test_universal_files_are_not_skipped(filename, mocker):
assert ext is None
@pytest.mark.parametrize("filename,service,expected_skip", [
("macro.docm", "DOCLING", True),
("macro.docm", "LLAMACLOUD", False),
("mail.eml", "DOCLING", True),
("mail.eml", "UNSTRUCTURED", False),
("photo.heic", "UNSTRUCTURED", False),
("photo.heic", "DOCLING", True),
])
@pytest.mark.parametrize(
"filename,service,expected_skip",
[
("macro.docm", "DOCLING", True),
("macro.docm", "LLAMACLOUD", False),
("mail.eml", "DOCLING", True),
("mail.eml", "UNSTRUCTURED", False),
("photo.heic", "UNSTRUCTURED", False),
("photo.heic", "DOCLING", True),
],
)
def test_parser_specific_extensions(filename, service, expected_skip, mocker):
mocker.patch("app.config.config.ETL_SERVICE", service)
item = {"name": filename, "file": {"mimeType": "application/octet-stream"}}