mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-08 07:12:39 +02:00
refactor: implement file type classification for supported extensions across Dropbox, Google Drive, and OneDrive connectors, enhancing file handling and error management
This commit is contained in:
parent
47f4be08d9
commit
dc7047f64d
14 changed files with 250 additions and 27 deletions
|
|
@ -1,5 +1,7 @@
|
|||
"""File type handlers for Microsoft OneDrive."""
|
||||
|
||||
from app.etl_pipeline.file_classifier import FileCategory, classify_file
|
||||
|
||||
ONEDRIVE_FOLDER_FACET = "folder"
|
||||
ONENOTE_MIME = "application/msonenote"
|
||||
|
||||
|
|
@ -39,7 +41,7 @@ def is_folder(item: dict) -> bool:
|
|||
|
||||
|
||||
def should_skip_file(item: dict) -> bool:
|
||||
"""Skip folders, OneNote files, remote items (shared links), and packages."""
|
||||
"""Skip folders, OneNote files, remote items (shared links), packages, and unsupported extensions."""
|
||||
if is_folder(item):
|
||||
return True
|
||||
if "remoteItem" in item:
|
||||
|
|
@ -47,4 +49,7 @@ def should_skip_file(item: dict) -> bool:
|
|||
if "package" in item:
|
||||
return True
|
||||
mime = item.get("file", {}).get("mimeType", "")
|
||||
return mime in SKIP_MIME_TYPES
|
||||
if mime in SKIP_MIME_TYPES:
|
||||
return True
|
||||
name = item.get("name", "")
|
||||
return classify_file(name) == FileCategory.UNSUPPORTED
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue