mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-02 19:55:18 +02:00
refactor: unify file skipping logic across Dropbox, Google Drive, and OneDrive connectors by replacing classification checks with a centralized service-based approach, enhancing maintainability and consistency in file handling
This commit is contained in:
parent
f03bf05aaa
commit
e7beeb2a36
13 changed files with 388 additions and 67 deletions
|
|
@ -1,6 +1,6 @@
|
|||
"""File type handlers for Google Drive."""
|
||||
|
||||
from app.etl_pipeline.file_classifier import FileCategory, classify_file
|
||||
from app.etl_pipeline.file_classifier import should_skip_for_service
|
||||
|
||||
GOOGLE_DOC = "application/vnd.google-apps.document"
|
||||
GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
|
||||
|
|
@ -49,8 +49,10 @@ def should_skip_file(mime_type: str) -> bool:
|
|||
|
||||
|
||||
def should_skip_by_extension(filename: str) -> bool:
|
||||
"""Return True if the file extension is not parseable by any ETL pipeline."""
|
||||
return classify_file(filename) == FileCategory.UNSUPPORTED
|
||||
"""Return True if the file extension is not parseable by the configured ETL service."""
|
||||
from app.config import config as app_config
|
||||
|
||||
return should_skip_for_service(filename, app_config.ETL_SERVICE)
|
||||
|
||||
|
||||
def get_export_mime_type(mime_type: str) -> str | None:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue