From 701c3409b386e8a85d725cef37664f95c39157b3 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Sun, 28 Dec 2025 15:54:42 +0200 Subject: [PATCH] feat(connectors): add Google Drive file type detection and mapping - Detect Google Workspace files (Docs, Sheets, Slides) - Map to PDF export format to preserve rich content (images, formatting) - Identify files to skip (shortcuts, unsupported types) --- .../app/connectors/google_drive/file_types.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 surfsense_backend/app/connectors/google_drive/file_types.py diff --git a/surfsense_backend/app/connectors/google_drive/file_types.py b/surfsense_backend/app/connectors/google_drive/file_types.py new file mode 100644 index 000000000..f66680c6c --- /dev/null +++ b/surfsense_backend/app/connectors/google_drive/file_types.py @@ -0,0 +1,37 @@ +""" +File Type Handlers for Google Drive. + +Simple module for basic file type detection. +""" + +# Google Workspace MIME types that need export +GOOGLE_DOC = "application/vnd.google-apps.document" +GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet" +GOOGLE_SLIDE = "application/vnd.google-apps.presentation" +GOOGLE_FOLDER = "application/vnd.google-apps.folder" +GOOGLE_SHORTCUT = "application/vnd.google-apps.shortcut" + +# Export MIME types for Google Workspace files +# Export as PDF to preserve formatting, images, and structure +EXPORT_FORMATS = { + GOOGLE_DOC: "application/pdf", + GOOGLE_SHEET: "application/pdf", + GOOGLE_SLIDE: "application/pdf", +} + + +def is_google_workspace_file(mime_type: str) -> bool: + """Check if file is a Google Workspace file that needs export.""" + return mime_type.startswith("application/vnd.google-apps") + + +def should_skip_file(mime_type: str) -> bool: + """Check if file should be skipped (folders, shortcuts, etc).""" + return mime_type in [GOOGLE_FOLDER, GOOGLE_SHORTCUT] + + +def get_export_mime_type(mime_type: str) -> str | None: + """Get export MIME type for Google Workspace files.""" + return EXPORT_FORMATS.get(mime_type) + +