mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-15 18:25:18 +02:00
feat(connectors): add Google Drive file type detection and mapping
- Detect Google Workspace files (Docs, Sheets, Slides) - Map to PDF export format to preserve rich content (images, formatting) - Identify files to skip (shortcuts, unsupported types)
This commit is contained in:
parent
74386affdc
commit
701c3409b3
1 changed files with 37 additions and 0 deletions
37
surfsense_backend/app/connectors/google_drive/file_types.py
Normal file
37
surfsense_backend/app/connectors/google_drive/file_types.py
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
"""
|
||||||
|
File Type Handlers for Google Drive.
|
||||||
|
|
||||||
|
Simple module for basic file type detection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Google Workspace MIME types that need export
|
||||||
|
GOOGLE_DOC = "application/vnd.google-apps.document"
|
||||||
|
GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
|
||||||
|
GOOGLE_SLIDE = "application/vnd.google-apps.presentation"
|
||||||
|
GOOGLE_FOLDER = "application/vnd.google-apps.folder"
|
||||||
|
GOOGLE_SHORTCUT = "application/vnd.google-apps.shortcut"
|
||||||
|
|
||||||
|
# Export MIME types for Google Workspace files
|
||||||
|
# Export as PDF to preserve formatting, images, and structure
|
||||||
|
EXPORT_FORMATS = {
|
||||||
|
GOOGLE_DOC: "application/pdf",
|
||||||
|
GOOGLE_SHEET: "application/pdf",
|
||||||
|
GOOGLE_SLIDE: "application/pdf",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def is_google_workspace_file(mime_type: str) -> bool:
|
||||||
|
"""Check if file is a Google Workspace file that needs export."""
|
||||||
|
return mime_type.startswith("application/vnd.google-apps")
|
||||||
|
|
||||||
|
|
||||||
|
def should_skip_file(mime_type: str) -> bool:
|
||||||
|
"""Check if file should be skipped (folders, shortcuts, etc)."""
|
||||||
|
return mime_type in [GOOGLE_FOLDER, GOOGLE_SHORTCUT]
|
||||||
|
|
||||||
|
|
||||||
|
def get_export_mime_type(mime_type: str) -> str | None:
|
||||||
|
"""Get export MIME type for Google Workspace files."""
|
||||||
|
return EXPORT_FORMATS.get(mime_type)
|
||||||
|
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue