feat(connectors): add Google Drive file type detection and mapping

- Detect Google Workspace files (Docs, Sheets, Slides)
- Map to PDF export format to preserve rich content (images, formatting)
- Identify files to skip (shortcuts, unsupported types)
This commit is contained in:
CREDO23 2025-12-28 15:54:42 +02:00
parent 74386affdc
commit 701c3409b3

View file

@ -0,0 +1,37 @@
"""
File Type Handlers for Google Drive.
Simple module for basic file type detection.
"""
# Google Workspace MIME types that need export
GOOGLE_DOC = "application/vnd.google-apps.document"
GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
GOOGLE_SLIDE = "application/vnd.google-apps.presentation"
GOOGLE_FOLDER = "application/vnd.google-apps.folder"
GOOGLE_SHORTCUT = "application/vnd.google-apps.shortcut"
# Export MIME types for Google Workspace files
# Export as PDF to preserve formatting, images, and structure
EXPORT_FORMATS = {
GOOGLE_DOC: "application/pdf",
GOOGLE_SHEET: "application/pdf",
GOOGLE_SLIDE: "application/pdf",
}
def is_google_workspace_file(mime_type: str) -> bool:
"""Check if file is a Google Workspace file that needs export."""
return mime_type.startswith("application/vnd.google-apps")
def should_skip_file(mime_type: str) -> bool:
"""Check if file should be skipped (folders, shortcuts, etc)."""
return mime_type in [GOOGLE_FOLDER, GOOGLE_SHORTCUT]
def get_export_mime_type(mime_type: str) -> str | None:
"""Get export MIME type for Google Workspace files."""
return EXPORT_FORMATS.get(mime_type)