mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-02 20:32:39 +02:00
feat(connectors): add Google Drive folder browsing and file listing
- List folder contents with full pagination support - Query root folder or specific parent folder - Return both folders and files with metadata (size, icons, links) - Filter out shortcuts and trashed items
This commit is contained in:
parent
40304c6795
commit
84bde67979
1 changed files with 243 additions and 0 deletions
243
surfsense_backend/app/connectors/google_drive/folder_manager.py
Normal file
243
surfsense_backend/app/connectors/google_drive/folder_manager.py
Normal file
|
|
@ -0,0 +1,243 @@
|
||||||
|
"""
|
||||||
|
Folder Management for Google Drive.
|
||||||
|
|
||||||
|
Handles folder listing, selection, and hierarchy operations.
|
||||||
|
Small, focused module for folder-related operations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .client import GoogleDriveClient
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def list_folders(
|
||||||
|
client: GoogleDriveClient,
|
||||||
|
parent_id: str | None = None,
|
||||||
|
) -> tuple[list[dict[str, Any]], str | None]:
|
||||||
|
"""
|
||||||
|
List folders in Google Drive.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: GoogleDriveClient instance
|
||||||
|
parent_id: Parent folder ID (None for root)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (folders list, error message)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Build query to get only folders
|
||||||
|
query_parts = ["mimeType = 'application/vnd.google-apps.folder'", "trashed = false"]
|
||||||
|
|
||||||
|
if parent_id:
|
||||||
|
query_parts.append(f"'{parent_id}' in parents")
|
||||||
|
|
||||||
|
query = " and ".join(query_parts)
|
||||||
|
|
||||||
|
folders, _, error = await client.list_files(
|
||||||
|
query=query,
|
||||||
|
fields="files(id, name, parents, createdTime, modifiedTime)",
|
||||||
|
page_size=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
return [], error
|
||||||
|
|
||||||
|
return folders, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error listing folders: {e!s}", exc_info=True)
|
||||||
|
return [], f"Error listing folders: {e!s}"
|
||||||
|
|
||||||
|
|
||||||
|
async def get_folder_hierarchy(
|
||||||
|
client: GoogleDriveClient,
|
||||||
|
folder_id: str,
|
||||||
|
) -> tuple[list[dict[str, str]], str | None]:
|
||||||
|
"""
|
||||||
|
Get the full path hierarchy for a folder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: GoogleDriveClient instance
|
||||||
|
folder_id: Folder ID to get hierarchy for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (hierarchy list [{'id': ..., 'name': ...}], error message)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
hierarchy = []
|
||||||
|
current_id = folder_id
|
||||||
|
|
||||||
|
# Traverse up to root
|
||||||
|
while current_id:
|
||||||
|
file, error = await client.get_file_metadata(
|
||||||
|
current_id,
|
||||||
|
fields="id, name, parents, mimeType"
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
return [], error
|
||||||
|
|
||||||
|
if not file:
|
||||||
|
break
|
||||||
|
|
||||||
|
hierarchy.insert(0, {"id": file["id"], "name": file["name"]})
|
||||||
|
|
||||||
|
# Get parent
|
||||||
|
parents = file.get("parents", [])
|
||||||
|
current_id = parents[0] if parents else None
|
||||||
|
|
||||||
|
return hierarchy, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting folder hierarchy: {e!s}", exc_info=True)
|
||||||
|
return [], f"Error getting folder hierarchy: {e!s}"
|
||||||
|
|
||||||
|
|
||||||
|
async def get_files_in_folder(
|
||||||
|
client: GoogleDriveClient,
|
||||||
|
folder_id: str,
|
||||||
|
include_subfolders: bool = True,
|
||||||
|
page_token: str | None = None,
|
||||||
|
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||||
|
"""
|
||||||
|
Get all indexable files in a folder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: GoogleDriveClient instance
|
||||||
|
folder_id: Folder ID to search in
|
||||||
|
include_subfolders: Whether to include subfolders
|
||||||
|
page_token: Pagination token
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (files list, next_page_token, error message)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Build query
|
||||||
|
query_parts = [
|
||||||
|
f"'{folder_id}' in parents",
|
||||||
|
"trashed = false",
|
||||||
|
"mimeType != 'application/vnd.google-apps.shortcut'", # Skip shortcuts
|
||||||
|
]
|
||||||
|
|
||||||
|
if not include_subfolders:
|
||||||
|
query_parts.append("mimeType != 'application/vnd.google-apps.folder'")
|
||||||
|
|
||||||
|
query = " and ".join(query_parts)
|
||||||
|
|
||||||
|
files, next_token, error = await client.list_files(
|
||||||
|
query=query,
|
||||||
|
page_size=100,
|
||||||
|
page_token=page_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
return [], None, error
|
||||||
|
|
||||||
|
return files, next_token, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting files in folder: {e!s}", exc_info=True)
|
||||||
|
return [], None, f"Error getting files in folder: {e!s}"
|
||||||
|
|
||||||
|
|
||||||
|
def format_folder_path(hierarchy: list[dict[str, str]]) -> str:
|
||||||
|
"""
|
||||||
|
Format folder hierarchy as a path string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hierarchy: List of folder dicts with 'id' and 'name'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted path (e.g., "My Drive / Projects / Documents")
|
||||||
|
"""
|
||||||
|
if not hierarchy:
|
||||||
|
return "My Drive"
|
||||||
|
|
||||||
|
folder_names = [folder["name"] for folder in hierarchy]
|
||||||
|
return " / ".join(folder_names)
|
||||||
|
|
||||||
|
|
||||||
|
async def list_folder_contents(
|
||||||
|
client: GoogleDriveClient,
|
||||||
|
parent_id: str | None = None,
|
||||||
|
) -> tuple[list[dict[str, Any]], str | None]:
|
||||||
|
"""
|
||||||
|
List both folders and files in a Google Drive folder.
|
||||||
|
|
||||||
|
Fetches ALL items using pagination (handles folders with >100 items).
|
||||||
|
Returns items sorted with folders first, then files.
|
||||||
|
Each item includes 'isFolder' boolean for frontend rendering.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: GoogleDriveClient instance
|
||||||
|
parent_id: Parent folder ID (None for root)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (items list with folders and files, error message)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Build query to get folders and files (exclude shortcuts)
|
||||||
|
query_parts = [
|
||||||
|
"trashed = false",
|
||||||
|
"mimeType != 'application/vnd.google-apps.shortcut'",
|
||||||
|
]
|
||||||
|
|
||||||
|
# For root, we need to explicitly query for items in 'root'
|
||||||
|
# For subfolders, query for items with that parent
|
||||||
|
if parent_id:
|
||||||
|
query_parts.append(f"'{parent_id}' in parents")
|
||||||
|
else:
|
||||||
|
# Query for root-level items
|
||||||
|
query_parts.append("'root' in parents")
|
||||||
|
|
||||||
|
query = " and ".join(query_parts)
|
||||||
|
|
||||||
|
# Fetch all items with pagination (max 1000 per page)
|
||||||
|
all_items = []
|
||||||
|
page_token = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
items, next_token, error = await client.list_files(
|
||||||
|
query=query,
|
||||||
|
fields="files(id, name, mimeType, parents, createdTime, modifiedTime, size, webViewLink, iconLink)",
|
||||||
|
page_size=1000, # Max allowed by Google Drive API
|
||||||
|
page_token=page_token,
|
||||||
|
)
|
||||||
|
|
||||||
|
if error:
|
||||||
|
return [], error
|
||||||
|
|
||||||
|
all_items.extend(items)
|
||||||
|
|
||||||
|
# If no more pages, break
|
||||||
|
if not next_token:
|
||||||
|
break
|
||||||
|
|
||||||
|
page_token = next_token
|
||||||
|
|
||||||
|
# Add 'isFolder' flag and sort (folders first, then files)
|
||||||
|
for item in all_items:
|
||||||
|
item["isFolder"] = item["mimeType"] == "application/vnd.google-apps.folder"
|
||||||
|
|
||||||
|
# Sort: folders first (alphabetically), then files (alphabetically)
|
||||||
|
all_items.sort(key=lambda x: (not x["isFolder"], x["name"].lower()))
|
||||||
|
|
||||||
|
# Count folders and files for logging
|
||||||
|
folder_count = sum(1 for item in all_items if item["isFolder"])
|
||||||
|
file_count = len(all_items) - folder_count
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Listed {len(all_items)} items ({folder_count} folders, {file_count} files) "
|
||||||
|
+ (f"in folder {parent_id}" if parent_id else "in root (My Drive)")
|
||||||
|
)
|
||||||
|
|
||||||
|
return all_items, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error listing folder contents: {e!s}", exc_info=True)
|
||||||
|
return [], f"Error listing folder contents: {e!s}"
|
||||||
|
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue