mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-09 07:42:39 +02:00
refactor: unify file skipping logic across Dropbox, Google Drive, and OneDrive connectors by replacing classification checks with a centralized service-based approach, enhancing maintainability and consistency in file handling
This commit is contained in:
parent
f03bf05aaa
commit
e7beeb2a36
13 changed files with 388 additions and 67 deletions
|
|
@ -1,6 +1,6 @@
|
|||
"""File type handlers for Microsoft OneDrive."""
|
||||
|
||||
from app.etl_pipeline.file_classifier import FileCategory, classify_file
|
||||
from app.etl_pipeline.file_classifier import should_skip_for_service
|
||||
|
||||
ONEDRIVE_FOLDER_FACET = "folder"
|
||||
ONENOTE_MIME = "application/msonenote"
|
||||
|
|
@ -51,5 +51,7 @@ def should_skip_file(item: dict) -> bool:
|
|||
mime = item.get("file", {}).get("mimeType", "")
|
||||
if mime in SKIP_MIME_TYPES:
|
||||
return True
|
||||
from app.config import config as app_config
|
||||
|
||||
name = item.get("name", "")
|
||||
return classify_file(name) == FileCategory.UNSUPPORTED
|
||||
return should_skip_for_service(name, app_config.ETL_SERVICE)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue