refactor: unify file skipping logic across Dropbox, Google Drive, and OneDrive connectors by replacing classification checks with a centralized service-based approach, enhancing maintainability and consistency in file handling

This commit is contained in:
Anish Sarkar 2026-04-07 02:19:31 +05:30
parent f03bf05aaa
commit e7beeb2a36
13 changed files with 388 additions and 67 deletions

View file

@ -1,6 +1,6 @@
"""File type handlers for Microsoft OneDrive."""
from app.etl_pipeline.file_classifier import FileCategory, classify_file
from app.etl_pipeline.file_classifier import should_skip_for_service
ONEDRIVE_FOLDER_FACET = "folder"
ONENOTE_MIME = "application/msonenote"
@ -51,5 +51,7 @@ def should_skip_file(item: dict) -> bool:
mime = item.get("file", {}).get("mimeType", "")
if mime in SKIP_MIME_TYPES:
return True
from app.config import config as app_config
name = item.get("name", "")
return classify_file(name) == FileCategory.UNSUPPORTED
return should_skip_for_service(name, app_config.ETL_SERVICE)