From f54079643f61fbdbbfc52eb067eb1deed1bd7d76 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:53:35 +0200
Subject: [PATCH 01/39] feat(db): add GOOGLE_DRIVE_CONNECTOR to DocumentType
 and SearchSourceConnectorType enums

---
 surfsense_backend/app/db.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index a2a424c26..a6bc3b938 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -46,6 +46,7 @@ class DocumentType(str, Enum):
     CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
     GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
     GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
+    GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR"
     AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
     LUMA_CONNECTOR = "LUMA_CONNECTOR"
     ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
@@ -69,6 +70,7 @@ class SearchSourceConnectorType(str, Enum):
     CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
     GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
     GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
+    GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR"
     AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
     LUMA_CONNECTOR = "LUMA_CONNECTOR"
     ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"

From 5dd88386383c7f53dc42b253e35c32cedefaed5e Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:53:44 +0200
Subject: [PATCH 02/39] feat(db): add idempotent Alembic migration for
 GOOGLE_DRIVE_CONNECTOR enums

---
 .../54_add_google_drive_connector_enums.py    | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 surfsense_backend/alembic/versions/54_add_google_drive_connector_enums.py

diff --git a/surfsense_backend/alembic/versions/54_add_google_drive_connector_enums.py b/surfsense_backend/alembic/versions/54_add_google_drive_connector_enums.py
new file mode 100644
index 000000000..8e7d69340
--- /dev/null
+++ b/surfsense_backend/alembic/versions/54_add_google_drive_connector_enums.py
@@ -0,0 +1,74 @@
+"""Add Google Drive connector enums
+
+Revision ID: 54
+Revises: 53
+Create Date: 2025-12-28 12:00:00.000000
+
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "54"
+down_revision: str | None = "53"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Safely add 'GOOGLE_DRIVE_CONNECTOR' to enum types if missing."""
+
+    # Add to searchsourceconnectortype enum
+    op.execute(
+        """
+    DO $$
+    BEGIN
+        IF NOT EXISTS (
+            SELECT 1 FROM pg_type t
+            JOIN pg_enum e ON t.oid = e.enumtypid
+            WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'GOOGLE_DRIVE_CONNECTOR'
+        ) THEN
+            ALTER TYPE searchsourceconnectortype ADD VALUE 'GOOGLE_DRIVE_CONNECTOR';
+        END IF;
+    END
+    $$;
+    """
+    )
+
+    # Add to documenttype enum
+    op.execute(
+        """
+    DO $$
+    BEGIN
+        IF NOT EXISTS (
+            SELECT 1 FROM pg_type t
+            JOIN pg_enum e ON t.oid = e.enumtypid
+            WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_DRIVE_CONNECTOR'
+        ) THEN
+            ALTER TYPE documenttype ADD VALUE 'GOOGLE_DRIVE_CONNECTOR';
+        END IF;
+    END
+    $$;
+    """
+    )
+
+
+def downgrade() -> None:
+    """Remove 'GOOGLE_DRIVE_CONNECTOR' from enum types.
+    
+    Note: PostgreSQL doesn't support removing enum values directly.
+    This would require recreating the enum type, which is complex and risky.
+    For now, we'll leave the enum values in place.
+    
+    In a production environment with strict downgrade requirements, you would need to:
+    1. Create new enum types without the value
+    2. Convert all columns to use the new type
+    3. Drop the old enum type
+    4. Rename the new type to the old name
+    
+    This is left as pass to avoid accidental data loss.
+    """
+    pass
+

From 28979851270674e2d855d46e94456c91d0d9d89b Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:53:51 +0200
Subject: [PATCH 03/39] feat(config): add GOOGLE_DRIVE_REDIRECT_URI environment
 variable

---
 surfsense_backend/app/config/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index 08be26de1..9c503fb18 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -82,6 +82,9 @@ class Config:
     # Google Gmail redirect URI
     GOOGLE_GMAIL_REDIRECT_URI = os.getenv("GOOGLE_GMAIL_REDIRECT_URI")
 
+    # Google Drive redirect URI
+    GOOGLE_DRIVE_REDIRECT_URI = os.getenv("GOOGLE_DRIVE_REDIRECT_URI")
+
     # Airtable OAuth
     AIRTABLE_CLIENT_ID = os.getenv("AIRTABLE_CLIENT_ID")
     AIRTABLE_CLIENT_SECRET = os.getenv("AIRTABLE_CLIENT_SECRET")

From 2c8717b14bf8455bfc113bbe13a0db793f9a8c99 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:54:26 +0200
Subject: [PATCH 04/39] feat(connectors): add Google Drive credentials module
 for OAuth management

- Handle Google OAuth credential initialization and validation
- Automatic token refresh with database persistence
- Reuse existing tokens when valid
---
 .../app/connectors/google_drive/__init__.py   |  24 ++++
 .../connectors/google_drive/credentials.py    | 109 ++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_drive/__init__.py
 create mode 100644 surfsense_backend/app/connectors/google_drive/credentials.py

diff --git a/surfsense_backend/app/connectors/google_drive/__init__.py b/surfsense_backend/app/connectors/google_drive/__init__.py
new file mode 100644
index 000000000..c50135155
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/__init__.py
@@ -0,0 +1,24 @@
+"""
+Google Drive Connector Module.
+
+Simple, modular approach to Google Drive indexing.
+"""
+
+from .change_tracker import categorize_change, fetch_all_changes, get_start_page_token
+from .client import GoogleDriveClient
+from .content_extractor import download_and_process_file
+from .credentials import get_valid_credentials, validate_credentials
+from .folder_manager import get_files_in_folder, list_folder_contents
+
+__all__ = [
+    "GoogleDriveClient",
+    "get_valid_credentials",
+    "validate_credentials",
+    "download_and_process_file",
+    "get_files_in_folder",
+    "list_folder_contents",
+    "get_start_page_token",
+    "fetch_all_changes",
+    "categorize_change",
+]
+
diff --git a/surfsense_backend/app/connectors/google_drive/credentials.py b/surfsense_backend/app/connectors/google_drive/credentials.py
new file mode 100644
index 000000000..5d09df881
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/credentials.py
@@ -0,0 +1,109 @@
+"""
+Google Drive OAuth Credentials Management.
+
+Handles credential validation, token refresh, and persistence to database.
+Small, focused module for credential operations only.
+"""
+
+import json
+from datetime import datetime
+
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+from sqlalchemy.orm.attributes import flag_modified
+
+from app.db import SearchSourceConnector, SearchSourceConnectorType
+
+
+async def get_valid_credentials(
+    session: AsyncSession,
+    connector_id: int,
+) -> Credentials:
+    """
+    Get valid Google OAuth credentials, refreshing if needed.
+
+    Args:
+        session: Database session
+        connector_id: Connector ID
+
+    Returns:
+        Valid Google OAuth credentials
+
+    Raises:
+        ValueError: If credentials are missing or invalid
+        Exception: If token refresh fails
+    """
+    # Fetch connector from database
+    result = await session.execute(
+        select(SearchSourceConnector).filter(
+            SearchSourceConnector.id == connector_id
+        )
+    )
+    connector = result.scalars().first()
+
+    if not connector:
+        raise ValueError(f"Connector {connector_id} not found")
+
+    # Extract credentials from config
+    config_data = connector.config
+    exp = config_data.get("expiry", "").replace("Z", "")
+
+    # Validate required fields
+    if not all(
+        [
+            config_data.get("client_id"),
+            config_data.get("client_secret"),
+            config_data.get("refresh_token"),
+        ]
+    ):
+        raise ValueError(
+            "Google OAuth credentials (client_id, client_secret, refresh_token) must be set"
+        )
+
+    # Create credentials object
+    credentials = Credentials(
+        token=config_data.get("token"),
+        refresh_token=config_data.get("refresh_token"),
+        token_uri=config_data.get("token_uri"),
+        client_id=config_data.get("client_id"),
+        client_secret=config_data.get("client_secret"),
+        scopes=config_data.get("scopes", []),
+        expiry=datetime.fromisoformat(exp) if exp else None,
+    )
+
+    # Refresh token if expired
+    if credentials.expired or not credentials.valid:
+        try:
+            credentials.refresh(Request())
+
+            # Persist refreshed token to database
+            connector.config = json.loads(credentials.to_json())
+            flag_modified(connector, "config")
+            await session.commit()
+
+        except Exception as e:
+            raise Exception(f"Failed to refresh Google OAuth credentials: {e!s}") from e
+
+    return credentials
+
+
+def validate_credentials(credentials: Credentials) -> bool:
+    """
+    Validate that credentials have required fields.
+
+    Args:
+        credentials: Google OAuth credentials
+
+    Returns:
+        True if valid, False otherwise
+    """
+    return all(
+        [
+            credentials.client_id,
+            credentials.client_secret,
+            credentials.refresh_token,
+        ]
+    )
+

From 74386affdcebbdf422235b94a67729f7f73b4304 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:54:32 +0200
Subject: [PATCH 05/39] feat(connectors): add Google Drive API client wrapper

- Build and manage Google Drive service with credentials
- List files with query support and pagination
- Download binary files and export Google Workspace files as PDF
- Handle HTTP errors gracefully
---
 .../app/connectors/google_drive/client.py     | 194 ++++++++++++++++++
 1 file changed, 194 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_drive/client.py

diff --git a/surfsense_backend/app/connectors/google_drive/client.py b/surfsense_backend/app/connectors/google_drive/client.py
new file mode 100644
index 000000000..6d2d0abfd
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/client.py
@@ -0,0 +1,194 @@
+"""
+Google Drive API Client.
+
+Core client for interacting with Google Drive API.
+Handles service initialization and basic file operations.
+"""
+
+from typing import Any
+
+from google.oauth2.credentials import Credentials
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from .credentials import get_valid_credentials
+
+
+class GoogleDriveClient:
+    """
+    Main client for Google Drive API operations.
+
+    Handles service initialization and provides methods for
+    listing files, getting metadata, and downloading content.
+    """
+
+    def __init__(self, session: AsyncSession, connector_id: int):
+        """
+        Initialize Google Drive client.
+
+        Args:
+            session: Database session
+            connector_id: ID of the Drive connector
+        """
+        self.session = session
+        self.connector_id = connector_id
+        self.service = None
+
+    async def get_service(self):
+        """
+        Get or create the Drive service instance.
+
+        Returns:
+            Google Drive service instance
+
+        Raises:
+            Exception: If service creation fails
+        """
+        if self.service:
+            return self.service
+
+        try:
+            credentials = await get_valid_credentials(self.session, self.connector_id)
+            self.service = build("drive", "v3", credentials=credentials)
+            return self.service
+        except Exception as e:
+            raise Exception(f"Failed to create Google Drive service: {e!s}") from e
+
+    async def list_files(
+        self,
+        query: str = "",
+        fields: str = "nextPageToken, files(id, name, mimeType, modifiedTime, size, webViewLink, parents, owners, createdTime, description)",
+        page_size: int = 100,
+        page_token: str | None = None,
+    ) -> tuple[list[dict[str, Any]], str | None, str | None]:
+        """
+        List files from Google Drive with pagination.
+
+        Args:
+            query: Search query (e.g., "mimeType != 'application/vnd.google-apps.folder'")
+            fields: Fields to retrieve
+            page_size: Number of files per page (max 1000)
+            page_token: Token for next page
+
+        Returns:
+            Tuple of (files list, next_page_token, error message)
+        """
+        try:
+            service = await self.get_service()
+
+            params = {
+                "pageSize": min(page_size, 1000),
+                "fields": fields,
+                "supportsAllDrives": True,
+                "includeItemsFromAllDrives": True,
+            }
+
+            if query:
+                params["q"] = query
+            if page_token:
+                params["pageToken"] = page_token
+
+            result = service.files().list(**params).execute()
+
+            files = result.get("files", [])
+            next_token = result.get("nextPageToken")
+
+            return files, next_token, None
+
+        except HttpError as e:
+            error_msg = f"HTTP error listing files: {e.resp.status} - {e.error_details}"
+            return [], None, error_msg
+        except Exception as e:
+            return [], None, f"Error listing files: {e!s}"
+
+    async def get_file_metadata(
+        self, file_id: str, fields: str = "*"
+    ) -> tuple[dict[str, Any] | None, str | None]:
+        """
+        Get metadata for a specific file.
+
+        Args:
+            file_id: ID of the file
+            fields: Fields to retrieve
+
+        Returns:
+            Tuple of (file metadata, error message)
+        """
+        try:
+            service = await self.get_service()
+            file = service.files().get(fileId=file_id, fields=fields, supportsAllDrives=True).execute()
+            return file, None
+        except HttpError as e:
+            return None, f"HTTP error getting file metadata: {e.resp.status}"
+        except Exception as e:
+            return None, f"Error getting file metadata: {e!s}"
+
+    async def download_file(
+        self, file_id: str
+    ) -> tuple[bytes | None, str | None]:
+        """
+        Download binary file content.
+
+        Args:
+            file_id: ID of the file to download
+
+        Returns:
+            Tuple of (file content bytes, error message)
+        """
+        try:
+            service = await self.get_service()
+            request = service.files().get_media(fileId=file_id)
+
+            # Execute the download
+            import io
+
+            fh = io.BytesIO()
+            from googleapiclient.http import MediaIoBaseDownload
+
+            downloader = MediaIoBaseDownload(fh, request)
+
+            done = False
+            while not done:
+                _, done = downloader.next_chunk()
+
+            return fh.getvalue(), None
+
+        except HttpError as e:
+            return None, f"HTTP error downloading file: {e.resp.status}"
+        except Exception as e:
+            return None, f"Error downloading file: {e!s}"
+
+    async def export_google_file(
+        self, file_id: str, mime_type: str
+    ) -> tuple[bytes | None, str | None]:
+        """
+        Export Google Workspace file to specified format.
+
+        Args:
+            file_id: ID of the Google file
+            mime_type: Target MIME type (e.g., 'application/pdf', 'text/plain')
+
+        Returns:
+            Tuple of (exported content as bytes, error message)
+        """
+        try:
+            service = await self.get_service()
+            content = (
+                service.files()
+                .export(fileId=file_id, mimeType=mime_type)
+                .execute()
+            )
+
+            # Content is already bytes from the API
+            # Keep as bytes to support both text and binary formats (like PDF)
+            if not isinstance(content, bytes):
+                content = content.encode("utf-8")
+
+            return content, None
+
+        except HttpError as e:
+            return None, f"HTTP error exporting file: {e.resp.status}"
+        except Exception as e:
+            return None, f"Error exporting file: {e!s}"
+

From 701c3409b386e8a85d725cef37664f95c39157b3 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:54:42 +0200
Subject: [PATCH 06/39] feat(connectors): add Google Drive file type detection
 and mapping

- Detect Google Workspace files (Docs, Sheets, Slides)
- Map to PDF export format to preserve rich content (images, formatting)
- Identify files to skip (shortcuts, unsupported types)
---
 .../app/connectors/google_drive/file_types.py | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_drive/file_types.py

diff --git a/surfsense_backend/app/connectors/google_drive/file_types.py b/surfsense_backend/app/connectors/google_drive/file_types.py
new file mode 100644
index 000000000..f66680c6c
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/file_types.py
@@ -0,0 +1,37 @@
+"""
+File Type Handlers for Google Drive.
+
+Simple module for basic file type detection.
+"""
+
+# Google Workspace MIME types that need export
+GOOGLE_DOC = "application/vnd.google-apps.document"
+GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
+GOOGLE_SLIDE = "application/vnd.google-apps.presentation"
+GOOGLE_FOLDER = "application/vnd.google-apps.folder"
+GOOGLE_SHORTCUT = "application/vnd.google-apps.shortcut"
+
+# Export MIME types for Google Workspace files
+# Export as PDF to preserve formatting, images, and structure
+EXPORT_FORMATS = {
+    GOOGLE_DOC: "application/pdf",
+    GOOGLE_SHEET: "application/pdf",
+    GOOGLE_SLIDE: "application/pdf",
+}
+
+
+def is_google_workspace_file(mime_type: str) -> bool:
+    """Check if file is a Google Workspace file that needs export."""
+    return mime_type.startswith("application/vnd.google-apps")
+
+
+def should_skip_file(mime_type: str) -> bool:
+    """Check if file should be skipped (folders, shortcuts, etc)."""
+    return mime_type in [GOOGLE_FOLDER, GOOGLE_SHORTCUT]
+
+
+def get_export_mime_type(mime_type: str) -> str | None:
+    """Get export MIME type for Google Workspace files."""
+    return EXPORT_FORMATS.get(mime_type)
+
+

From 40304c6795b9ab669fb594ee140abf6d5ce2d41e Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:54:50 +0200
Subject: [PATCH 07/39] feat(connectors): add Google Drive content extraction
 using existing ETL

- Download files from Google Drive to temporary location
- Export Google Workspace files as PDF
- Delegate content extraction to existing process_file_in_background
- Reuse Surfsense's ETL services (Unstructured, LlamaCloud, Docling)
---
 .../google_drive/content_extractor.py         | 122 ++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_drive/content_extractor.py

diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
new file mode 100644
index 000000000..82b8d42b3
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -0,0 +1,122 @@
+"""
+Content Extraction for Google Drive Files.
+
+Downloads files and delegates to Surfsense's existing file processors.
+"""
+
+import logging
+import os
+import tempfile
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import Log
+from app.services.task_logging_service import TaskLoggingService
+
+from .client import GoogleDriveClient
+from .file_types import get_export_mime_type, is_google_workspace_file, should_skip_file
+
+logger = logging.getLogger(__name__)
+
+
+async def download_and_process_file(
+    client: GoogleDriveClient,
+    file: dict[str, Any],
+    search_space_id: int,
+    user_id: str,
+    session: AsyncSession,
+    task_logger: TaskLoggingService,
+    log_entry: Log,
+) -> tuple[Any, str | None]:
+    """
+    Download Google Drive file and process using Surfsense's existing infrastructure.
+
+    This is the ONLY function needed - it delegates everything to process_file_in_background.
+
+    Args:
+        client: GoogleDriveClient instance
+        file: File metadata from Drive API
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        session: Database session
+        task_logger: Task logging service
+        log_entry: Log entry for tracking
+
+    Returns:
+        Tuple of (Document object if successful, error message if failed)
+    """
+    file_id = file.get("id")
+    file_name = file.get("name", "Unknown")
+    mime_type = file.get("mimeType", "")
+
+    # Skip folders and shortcuts
+    if should_skip_file(mime_type):
+        return None, f"Skipping {mime_type}"
+
+    logger.info(f"Downloading file: {file_name} ({mime_type})")
+
+    temp_file_path = None
+    try:
+        # Step 1: Download or export the file
+        if is_google_workspace_file(mime_type):
+            # Google Workspace files need export (as PDF to preserve formatting & images)
+            export_mime = get_export_mime_type(mime_type)
+            if not export_mime:
+                return None, f"Cannot export Google Workspace type: {mime_type}"
+
+            logger.info(f"Exporting Google Workspace file as {export_mime}")
+            content_bytes, error = await client.export_google_file(file_id, export_mime)
+            if error:
+                return None, error
+
+            # Set extension based on export format
+            extension = ".pdf" if export_mime == "application/pdf" else ".txt"
+        else:
+            # Regular files - download directly
+            content_bytes, error = await client.download_file(file_id)
+            if error:
+                return None, error
+
+            # Preserve original file extension
+            extension = Path(file_name).suffix or ".bin"
+
+        # Save to temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp_file:
+            tmp_file.write(content_bytes)
+            temp_file_path = tmp_file.name
+
+        # Step 2: Delegate to Surfsense's existing file processor
+        # This handles ALL file types: markdown, audio, PDFs, Office docs, images, etc.
+        from app.tasks.document_processors.file_processors import (
+            process_file_in_background,
+        )
+
+        logger.info(f"Processing {file_name} with Surfsense's file processor")
+        result = await process_file_in_background(
+            file_path=temp_file_path,
+            filename=file_name,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            session=session,
+            task_logger=task_logger,
+            log_entry=log_entry,
+        )
+
+        # process_file_in_background returns None on duplicate/error, Document on success
+        return result, None
+
+    except Exception as e:
+        logger.warning(f"Failed to process {file_name}: {e!s}")
+        return None, str(e)
+
+    finally:
+        # Cleanup temp file (if process_file_in_background didn't already delete it)
+        if temp_file_path and os.path.exists(temp_file_path):
+            try:
+                os.unlink(temp_file_path)
+            except Exception as e:
+                logger.debug(f"Could not delete temp file {temp_file_path}: {e}")
+
+

From 84bde67979e82cd4010baa340506499a7d1830db Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:54:58 +0200
Subject: [PATCH 08/39] feat(connectors): add Google Drive folder browsing and
 file listing

- List folder contents with full pagination support
- Query root folder or specific parent folder
- Return both folders and files with metadata (size, icons, links)
- Filter out shortcuts and trashed items
---
 .../connectors/google_drive/folder_manager.py | 243 ++++++++++++++++++
 1 file changed, 243 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_drive/folder_manager.py

diff --git a/surfsense_backend/app/connectors/google_drive/folder_manager.py b/surfsense_backend/app/connectors/google_drive/folder_manager.py
new file mode 100644
index 000000000..da9deb75d
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/folder_manager.py
@@ -0,0 +1,243 @@
+"""
+Folder Management for Google Drive.
+
+Handles folder listing, selection, and hierarchy operations.
+Small, focused module for folder-related operations.
+"""
+
+import logging
+from typing import Any
+
+from .client import GoogleDriveClient
+
+logger = logging.getLogger(__name__)
+
+
+async def list_folders(
+    client: GoogleDriveClient,
+    parent_id: str | None = None,
+) -> tuple[list[dict[str, Any]], str | None]:
+    """
+    List folders in Google Drive.
+
+    Args:
+        client: GoogleDriveClient instance
+        parent_id: Parent folder ID (None for root)
+
+    Returns:
+        Tuple of (folders list, error message)
+    """
+    try:
+        # Build query to get only folders
+        query_parts = ["mimeType = 'application/vnd.google-apps.folder'", "trashed = false"]
+
+        if parent_id:
+            query_parts.append(f"'{parent_id}' in parents")
+
+        query = " and ".join(query_parts)
+
+        folders, _, error = await client.list_files(
+            query=query,
+            fields="files(id, name, parents, createdTime, modifiedTime)",
+            page_size=100,
+        )
+
+        if error:
+            return [], error
+
+        return folders, None
+
+    except Exception as e:
+        logger.error(f"Error listing folders: {e!s}", exc_info=True)
+        return [], f"Error listing folders: {e!s}"
+
+
+async def get_folder_hierarchy(
+    client: GoogleDriveClient,
+    folder_id: str,
+) -> tuple[list[dict[str, str]], str | None]:
+    """
+    Get the full path hierarchy for a folder.
+
+    Args:
+        client: GoogleDriveClient instance
+        folder_id: Folder ID to get hierarchy for
+
+    Returns:
+        Tuple of (hierarchy list [{'id': ..., 'name': ...}], error message)
+    """
+    try:
+        hierarchy = []
+        current_id = folder_id
+
+        # Traverse up to root
+        while current_id:
+            file, error = await client.get_file_metadata(
+                current_id,
+                fields="id, name, parents, mimeType"
+            )
+
+            if error:
+                return [], error
+
+            if not file:
+                break
+
+            hierarchy.insert(0, {"id": file["id"], "name": file["name"]})
+
+            # Get parent
+            parents = file.get("parents", [])
+            current_id = parents[0] if parents else None
+
+        return hierarchy, None
+
+    except Exception as e:
+        logger.error(f"Error getting folder hierarchy: {e!s}", exc_info=True)
+        return [], f"Error getting folder hierarchy: {e!s}"
+
+
+async def get_files_in_folder(
+    client: GoogleDriveClient,
+    folder_id: str,
+    include_subfolders: bool = True,
+    page_token: str | None = None,
+) -> tuple[list[dict[str, Any]], str | None, str | None]:
+    """
+    Get all indexable files in a folder.
+
+    Args:
+        client: GoogleDriveClient instance
+        folder_id: Folder ID to search in
+        include_subfolders: Whether to include subfolders
+        page_token: Pagination token
+
+    Returns:
+        Tuple of (files list, next_page_token, error message)
+    """
+    try:
+        # Build query
+        query_parts = [
+            f"'{folder_id}' in parents",
+            "trashed = false",
+            "mimeType != 'application/vnd.google-apps.shortcut'",  # Skip shortcuts
+        ]
+
+        if not include_subfolders:
+            query_parts.append("mimeType != 'application/vnd.google-apps.folder'")
+
+        query = " and ".join(query_parts)
+
+        files, next_token, error = await client.list_files(
+            query=query,
+            page_size=100,
+            page_token=page_token,
+        )
+
+        if error:
+            return [], None, error
+
+        return files, next_token, None
+
+    except Exception as e:
+        logger.error(f"Error getting files in folder: {e!s}", exc_info=True)
+        return [], None, f"Error getting files in folder: {e!s}"
+
+
+def format_folder_path(hierarchy: list[dict[str, str]]) -> str:
+    """
+    Format folder hierarchy as a path string.
+
+    Args:
+        hierarchy: List of folder dicts with 'id' and 'name'
+
+    Returns:
+        Formatted path (e.g., "My Drive / Projects / Documents")
+    """
+    if not hierarchy:
+        return "My Drive"
+
+    folder_names = [folder["name"] for folder in hierarchy]
+    return " / ".join(folder_names)
+
+
+async def list_folder_contents(
+    client: GoogleDriveClient,
+    parent_id: str | None = None,
+) -> tuple[list[dict[str, Any]], str | None]:
+    """
+    List both folders and files in a Google Drive folder.
+    
+    Fetches ALL items using pagination (handles folders with >100 items).
+    Returns items sorted with folders first, then files.
+    Each item includes 'isFolder' boolean for frontend rendering.
+
+    Args:
+        client: GoogleDriveClient instance
+        parent_id: Parent folder ID (None for root)
+
+    Returns:
+        Tuple of (items list with folders and files, error message)
+    """
+    try:
+        # Build query to get folders and files (exclude shortcuts)
+        query_parts = [
+            "trashed = false",
+            "mimeType != 'application/vnd.google-apps.shortcut'",
+        ]
+
+        # For root, we need to explicitly query for items in 'root'
+        # For subfolders, query for items with that parent
+        if parent_id:
+            query_parts.append(f"'{parent_id}' in parents")
+        else:
+            # Query for root-level items
+            query_parts.append("'root' in parents")
+
+        query = " and ".join(query_parts)
+
+        # Fetch all items with pagination (max 1000 per page)
+        all_items = []
+        page_token = None
+        
+        while True:
+            items, next_token, error = await client.list_files(
+                query=query,
+                fields="files(id, name, mimeType, parents, createdTime, modifiedTime, size, webViewLink, iconLink)",
+                page_size=1000,  # Max allowed by Google Drive API
+                page_token=page_token,
+            )
+
+            if error:
+                return [], error
+
+            all_items.extend(items)
+            
+            # If no more pages, break
+            if not next_token:
+                break
+                
+            page_token = next_token
+
+        # Add 'isFolder' flag and sort (folders first, then files)
+        for item in all_items:
+            item["isFolder"] = item["mimeType"] == "application/vnd.google-apps.folder"
+
+        # Sort: folders first (alphabetically), then files (alphabetically)
+        all_items.sort(key=lambda x: (not x["isFolder"], x["name"].lower()))
+
+        # Count folders and files for logging
+        folder_count = sum(1 for item in all_items if item["isFolder"])
+        file_count = len(all_items) - folder_count
+
+        logger.info(
+            f"Listed {len(all_items)} items ({folder_count} folders, {file_count} files) "
+            + (f"in folder {parent_id}" if parent_id else "in root (My Drive)")
+        )
+
+        return all_items, None
+
+    except Exception as e:
+        logger.error(f"Error listing folder contents: {e!s}", exc_info=True)
+        return [], f"Error listing folder contents: {e!s}"
+
+

From 3e67d5f31ec9792c5a063f2ebcc7172b3c2fc57a Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:55:06 +0200
Subject: [PATCH 09/39] feat(connectors): add Google Drive delta sync with
 change tracking

- Get start page token for change tracking baseline
- Fetch incremental changes using Google Drive Changes API
- Categorize changes into added, modified, and removed files
- Enable efficient re-indexing of only changed content
---
 .../connectors/google_drive/change_tracker.py | 213 ++++++++++++++++++
 1 file changed, 213 insertions(+)
 create mode 100644 surfsense_backend/app/connectors/google_drive/change_tracker.py

diff --git a/surfsense_backend/app/connectors/google_drive/change_tracker.py b/surfsense_backend/app/connectors/google_drive/change_tracker.py
new file mode 100644
index 000000000..1c697af5f
--- /dev/null
+++ b/surfsense_backend/app/connectors/google_drive/change_tracker.py
@@ -0,0 +1,213 @@
+"""
+Change Tracking for Google Drive - Delta Sync Support.
+
+Handles change detection and incremental syncing using Drive API's changes endpoint.
+Small, focused module for tracking file modifications.
+"""
+
+import logging
+from datetime import datetime
+from typing import Any
+
+from .client import GoogleDriveClient
+
+logger = logging.getLogger(__name__)
+
+
+async def get_start_page_token(
+    client: GoogleDriveClient,
+) -> tuple[str | None, str | None]:
+    """
+    Get the starting page token for change tracking.
+
+    This token represents the current state and is used for future delta syncs.
+
+    Args:
+        client: GoogleDriveClient instance
+
+    Returns:
+        Tuple of (start_page_token, error message)
+    """
+    try:
+        service = await client.get_service()
+        response = service.changes().getStartPageToken(supportsAllDrives=True).execute()
+        token = response.get("startPageToken")
+
+        logger.info(f"Got start page token: {token}")
+        return token, None
+
+    except Exception as e:
+        logger.error(f"Error getting start page token: {e!s}", exc_info=True)
+        return None, f"Error getting start page token: {e!s}"
+
+
+async def get_changes(
+    client: GoogleDriveClient,
+    page_token: str,
+    folder_id: str | None = None,
+) -> tuple[list[dict[str, Any]], str | None, str | None]:
+    """
+    Get list of changes since the given page token.
+
+    Args:
+        client: GoogleDriveClient instance
+        page_token: Page token from previous sync
+        folder_id: Optional folder ID to filter changes
+
+    Returns:
+        Tuple of (changes list, new_page_token, error message)
+    """
+    try:
+        service = await client.get_service()
+
+        params = {
+            "pageToken": page_token,
+            "pageSize": 100,
+            "fields": "nextPageToken, newStartPageToken, changes(fileId, removed, file(id, name, mimeType, modifiedTime, size, webViewLink, parents, trashed))",
+            "supportsAllDrives": True,
+            "includeItemsFromAllDrives": True,
+        }
+
+        response = service.changes().list(**params).execute()
+
+        changes = response.get("changes", [])
+        next_token = response.get("nextPageToken")
+        new_start_token = response.get("newStartPageToken")
+
+        # Use new start token if this is the last page
+        token_to_return = new_start_token if new_start_token else next_token
+
+        # Filter changes by folder if specified
+        if folder_id:
+            changes = await _filter_changes_by_folder(client, changes, folder_id)
+
+        logger.info(f"Got {len(changes)} changes, next token: {token_to_return}")
+        return changes, token_to_return, None
+
+    except Exception as e:
+        logger.error(f"Error getting changes: {e!s}", exc_info=True)
+        return [], None, f"Error getting changes: {e!s}"
+
+
+async def _filter_changes_by_folder(
+    client: GoogleDriveClient,
+    changes: list[dict[str, Any]],
+    folder_id: str,
+) -> list[dict[str, Any]]:
+    """
+    Filter changes to only include files within the specified folder.
+
+    Args:
+        client: GoogleDriveClient instance
+        changes: List of changes from API
+        folder_id: Folder ID to filter by
+
+    Returns:
+        Filtered list of changes
+    """
+    filtered = []
+
+    for change in changes:
+        file = change.get("file")
+        if not file:
+            # File was removed
+            filtered.append(change)
+            continue
+
+        # Check if file is in the folder (or subfolder)
+        parents = file.get("parents", [])
+        if folder_id in parents:
+            filtered.append(change)
+        else:
+            # Check if any parent is a descendant of folder_id
+            # This is a simplified check - full implementation would traverse hierarchy
+            # For now, we'll include it and let indexer validate
+            filtered.append(change)
+
+    return filtered
+
+
+def categorize_change(change: dict[str, Any]) -> str:
+    """
+    Categorize a change event.
+
+    Args:
+        change: Change event from Drive API
+
+    Returns:
+        Category: 'removed', 'trashed', 'modified', 'new'
+    """
+    if change.get("removed"):
+        return "removed"
+
+    file = change.get("file")
+    if not file:
+        return "removed"
+
+    if file.get("trashed"):
+        return "trashed"
+
+    # Check if file was recently created
+    created_time = file.get("createdTime")
+    modified_time = file.get("modifiedTime")
+
+    if created_time and modified_time:
+        try:
+            created = datetime.fromisoformat(created_time.replace("Z", "+00:00"))
+            modified = datetime.fromisoformat(modified_time.replace("Z", "+00:00"))
+
+            # If created and modified times are very close, it's likely a new file
+            time_diff = abs((modified - created).total_seconds())
+            if time_diff < 60:  # Within 1 minute
+                return "new"
+        except Exception:
+            pass
+
+    return "modified"
+
+
+async def fetch_all_changes(
+    client: GoogleDriveClient,
+    start_token: str,
+    folder_id: str | None = None,
+) -> tuple[list[dict[str, Any]], str | None, str | None]:
+    """
+    Fetch all changes from start token, handling pagination.
+
+    Args:
+        client: GoogleDriveClient instance
+        start_token: Starting page token
+        folder_id: Optional folder ID to filter changes
+
+    Returns:
+        Tuple of (all changes, final_page_token, error message)
+    """
+    all_changes = []
+    current_token = start_token
+    error = None
+
+    try:
+        while current_token:
+            changes, next_token, err = await get_changes(
+                client, current_token, folder_id
+            )
+
+            if err:
+                error = err
+                break
+
+            all_changes.extend(changes)
+
+            # If next_token is None, we've reached the end
+            if not next_token or next_token == current_token:
+                break
+
+            current_token = next_token
+
+        logger.info(f"Fetched total of {len(all_changes)} changes")
+        return all_changes, current_token, error
+
+    except Exception as e:
+        logger.error(f"Error fetching all changes: {e!s}", exc_info=True)
+        return all_changes, current_token, f"Error fetching all changes: {e!s}"
+

From bf02005d82ddb5c8329176b5469492535753c5f7 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:55:13 +0200
Subject: [PATCH 10/39] feat(routes): add Google Drive OAuth and folder listing
 endpoints

- OAuth initialization and callback handling
- Folder and file browsing with parent_id support
- Validate credentials and handle token refresh
- Return folder contents with metadata for UI tree view
---
 .../google_drive_add_connector_route.py       | 315 ++++++++++++++++++
 1 file changed, 315 insertions(+)
 create mode 100644 surfsense_backend/app/routes/google_drive_add_connector_route.py

diff --git a/surfsense_backend/app/routes/google_drive_add_connector_route.py b/surfsense_backend/app/routes/google_drive_add_connector_route.py
new file mode 100644
index 000000000..d11404781
--- /dev/null
+++ b/surfsense_backend/app/routes/google_drive_add_connector_route.py
@@ -0,0 +1,315 @@
+"""
+Google Drive Connector OAuth Routes.
+
+Handles OAuth 2.0 authentication flow for Google Drive connector.
+Folder selection happens at index time on the manage connector page.
+
+Endpoints:
+- GET /auth/google/drive/connector/add - Initiate OAuth
+- GET /auth/google/drive/connector/callback - Handle OAuth callback
+- GET /connectors/{connector_id}/google-drive/folders - List user's folders (for index-time selection)
+"""
+
+import base64
+import json
+import logging
+import os
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import RedirectResponse
+from google_auth_oauthlib.flow import Flow
+from pydantic import ValidationError
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.config import config
+from app.connectors.google_drive import (
+    GoogleDriveClient,
+    get_start_page_token,
+    get_valid_credentials,
+    list_folder_contents,
+)
+from app.connectors.google_drive.folder_manager import list_folders
+from app.db import (
+    SearchSourceConnector,
+    SearchSourceConnectorType,
+    User,
+    get_async_session,
+)
+from app.users import current_active_user
+
+# Relax token scope validation for Google OAuth
+os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+# Google Drive OAuth scopes
+SCOPES = [
+    "https://www.googleapis.com/auth/drive.readonly",  # Read-only access to Drive
+    "https://www.googleapis.com/auth/userinfo.email",  # User email
+    "https://www.googleapis.com/auth/userinfo.profile",  # User profile
+    "openid",
+]
+
+
+def get_google_flow():
+    """Create and return a Google OAuth flow for Drive API."""
+    try:
+        return Flow.from_client_config(
+            {
+                "web": {
+                    "client_id": config.GOOGLE_OAUTH_CLIENT_ID,
+                    "client_secret": config.GOOGLE_OAUTH_CLIENT_SECRET,
+                    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+                    "token_uri": "https://oauth2.googleapis.com/token",
+                    "redirect_uris": [config.GOOGLE_DRIVE_REDIRECT_URI],
+                }
+            },
+            scopes=SCOPES,
+            redirect_uri=config.GOOGLE_DRIVE_REDIRECT_URI,
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"Failed to create Google OAuth flow: {e!s}"
+        ) from e
+
+
+@router.get("/auth/google/drive/connector/add")
+async def connect_drive(space_id: int, user: User = Depends(current_active_user)):
+    """
+    Initiate Google Drive OAuth flow.
+
+    Query params:
+        space_id: Search space ID to add connector to
+
+    Returns:
+        JSON with auth_url to redirect user to Google authorization
+    """
+    try:
+        if not space_id:
+            raise HTTPException(status_code=400, detail="space_id is required")
+
+        flow = get_google_flow()
+
+        # Encode space_id and user_id in state parameter
+        state_payload = json.dumps(
+            {
+                "space_id": space_id,
+                "user_id": str(user.id),
+            }
+        )
+        state_encoded = base64.urlsafe_b64encode(state_payload.encode()).decode()
+
+        # Generate authorization URL
+        auth_url, _ = flow.authorization_url(
+            access_type="offline",  # Get refresh token
+            prompt="consent",  # Force consent screen to get refresh token
+            include_granted_scopes="true",
+            state=state_encoded,
+        )
+
+        logger.info(f"Initiating Google Drive OAuth for user {user.id}, space {space_id}")
+        return {"auth_url": auth_url}
+
+    except Exception as e:
+        logger.error(f"Failed to initiate Google Drive OAuth: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=500, detail=f"Failed to initiate Google OAuth: {e!s}"
+        ) from e
+
+
+@router.get("/auth/google/drive/connector/callback")
+async def drive_callback(
+    request: Request,
+    code: str,
+    state: str,
+    session: AsyncSession = Depends(get_async_session),
+):
+    """
+    Handle Google Drive OAuth callback.
+
+    Query params:
+        code: Authorization code from Google
+        state: Encoded state with space_id and user_id
+
+    Returns:
+        Redirect to frontend success page
+    """
+    try:
+        # Decode and parse state
+        decoded_state = base64.urlsafe_b64decode(state.encode()).decode()
+        data = json.loads(decoded_state)
+
+        user_id = UUID(data["user_id"])
+        space_id = data["space_id"]
+
+        logger.info(f"Processing Google Drive callback for user {user_id}, space {space_id}")
+
+        # Exchange authorization code for tokens
+        flow = get_google_flow()
+        flow.fetch_token(code=code)
+
+        creds = flow.credentials
+        creds_dict = json.loads(creds.to_json())
+
+        # Check if connector already exists for this space/user
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.search_space_id == space_id,
+                SearchSourceConnector.user_id == user_id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
+            )
+        )
+        existing_connector = result.scalars().first()
+
+        if existing_connector:
+            raise HTTPException(
+                status_code=409,
+                detail="A GOOGLE_DRIVE_CONNECTOR already exists in this search space. Each search space can have only one connector of each type per user.",
+            )
+
+        # Create new connector (NO folder selection here - happens at index time)
+        db_connector = SearchSourceConnector(
+            name="Google Drive Connector",
+            connector_type=SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
+            config={
+                **creds_dict,
+                "start_page_token": None,  # Will be set on first index
+            },
+            search_space_id=space_id,
+            user_id=user_id,
+            is_indexable=True,
+        )
+
+        session.add(db_connector)
+        await session.commit()
+        await session.refresh(db_connector)
+
+        # Get initial start page token for delta sync
+        try:
+            drive_client = GoogleDriveClient(session, db_connector.id)
+            start_token, token_error = await get_start_page_token(drive_client)
+
+            if start_token and not token_error:
+                db_connector.config["start_page_token"] = start_token
+                from sqlalchemy.orm.attributes import flag_modified
+
+                flag_modified(db_connector, "config")
+                await session.commit()
+                logger.info(f"Set initial start page token for connector {db_connector.id}")
+        except Exception as e:
+            logger.warning(f"Failed to get initial start page token: {e!s}")
+
+        logger.info(
+            f"Successfully created Google Drive connector {db_connector.id} for user {user_id}"
+        )
+
+        # Redirect to connectors management page (not to folder selection)
+        return RedirectResponse(
+            url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors?success=google-drive-connected"
+        )
+
+    except HTTPException:
+        await session.rollback()
+        raise
+    except ValidationError as e:
+        await session.rollback()
+        logger.error(f"Validation error: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=400, detail=f"Invalid connector configuration: {e!s}"
+        ) from e
+    except IntegrityError as e:
+        await session.rollback()
+        logger.error(f"Database integrity error: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=409,
+            detail="A connector with this configuration already exists.",
+        ) from e
+    except Exception as e:
+        await session.rollback()
+        logger.error(f"Unexpected error in Drive callback: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=500, detail=f"Failed to complete Google OAuth: {e!s}"
+        ) from e
+
+
+@router.get("/connectors/{connector_id}/google-drive/folders")
+async def list_google_drive_folders(
+    connector_id: int,
+    parent_id: str | None = None,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    List folders AND files in user's Google Drive with hierarchical support.
+    
+    This is called at index time from the manage connector page to display
+    the complete file system (folders and files). Only folders are selectable.
+
+    Args:
+        connector_id: ID of the Google Drive connector
+        parent_id: Optional parent folder ID to list contents (None for root)
+
+    Returns:
+        JSON with list of items: {
+            "items": [
+                {"id": str, "name": str, "mimeType": str, "isFolder": bool, ...},
+                ...
+            ]
+        }
+    """
+    try:
+        # Get connector and verify ownership
+        result = await session.execute(
+            select(SearchSourceConnector).filter(
+                SearchSourceConnector.id == connector_id,
+                SearchSourceConnector.user_id == user.id,
+                SearchSourceConnector.connector_type
+                == SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
+            )
+        )
+        connector = result.scalars().first()
+
+        if not connector:
+            raise HTTPException(
+                status_code=404,
+                detail="Google Drive connector not found or access denied",
+            )
+
+        # Initialize Drive client (credentials will be loaded on first API call)
+        drive_client = GoogleDriveClient(session, connector_id)
+
+        # List both folders and files (sorted: folders first)
+        items, error = await list_folder_contents(drive_client, parent_id=parent_id)
+
+        if error:
+            raise HTTPException(
+                status_code=500, detail=f"Failed to list folder contents: {error}"
+            )
+
+        # Count folders and files for better logging
+        folder_count = sum(1 for item in items if item.get("isFolder", False))
+        file_count = len(items) - folder_count
+
+        logger.info(
+            f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
+            + (f" in folder {parent_id}" if parent_id else " in ROOT")
+        )
+        
+        # Log first few items for debugging
+        if items:
+            logger.info(f"First 3 items: {[item.get('name') for item in items[:3]]}")
+
+        return {"items": items}
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error listing Drive contents: {e!s}", exc_info=True)
+        raise HTTPException(
+            status_code=500, detail=f"Failed to list Drive contents: {e!s}"
+        ) from e

From 1696c7056a8e448ca7bec7c7f00bf046a3e54e26 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:55:25 +0200
Subject: [PATCH 11/39] feat(indexer): add Google Drive folder indexing with
 delta sync

- Full folder scan on first index
- Delta sync using change tracking for subsequent indexes
- Process files in parallel batches
- Handle file additions, modifications, and deletions
- Store change tracking token for efficient re-indexing
---
 .../google_drive_indexer.py                   | 448 ++++++++++++++++++
 1 file changed, 448 insertions(+)
 create mode 100644 surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py

diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
new file mode 100644
index 000000000..9c4d446de
--- /dev/null
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -0,0 +1,448 @@
+"""
+Google Drive Indexer - Delegates all processing to Surfsense's file processors.
+
+Handles:
+- Folder-specific indexing (user selects folder)
+- Delta sync (only index changed files)
+- Delegates file processing to process_file_in_background
+"""
+
+import logging
+from datetime import datetime
+
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.connectors.google_drive import (
+    GoogleDriveClient,
+    categorize_change,
+    download_and_process_file,
+    fetch_all_changes,
+    get_files_in_folder,
+    get_start_page_token,
+)
+from app.db import DocumentType, SearchSourceConnectorType
+from app.services.task_logging_service import TaskLoggingService
+from app.tasks.connector_indexers.base import (
+    check_document_by_unique_identifier,
+    get_connector_by_id,
+    update_connector_last_indexed,
+)
+from app.utils.document_converters import generate_unique_identifier_hash
+
+logger = logging.getLogger(__name__)
+
+
+async def index_google_drive_files(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    folder_id: str | None = None,
+    folder_name: str | None = None,
+    use_delta_sync: bool = True,
+    update_last_indexed: bool = True,
+    max_files: int = 500,
+) -> tuple[int, str | None]:
+    """
+    Index Google Drive files for a specific connector.
+
+    Args:
+        session: Database session
+        connector_id: ID of the Drive connector
+        search_space_id: ID of the search space
+        user_id: ID of the user
+        folder_id: Specific folder to index (from UI/request, takes precedence)
+        folder_name: Folder name for display (from UI/request)
+        use_delta_sync: Whether to use change tracking for incremental sync
+        update_last_indexed: Whether to update last_indexed_at timestamp
+        max_files: Maximum number of files to index
+
+    Returns:
+        Tuple of (number_of_indexed_files, error_message)
+    """
+    task_logger = TaskLoggingService(session, search_space_id)
+
+    # Log task start
+    log_entry = await task_logger.log_task_start(
+        task_name="google_drive_files_indexing",
+        source="connector_indexing_task",
+        message=f"Starting Google Drive indexing for connector {connector_id}",
+        metadata={
+            "connector_id": connector_id,
+            "user_id": str(user_id),
+            "folder_id": folder_id,
+            "use_delta_sync": use_delta_sync,
+            "max_files": max_files,
+        },
+    )
+
+    try:
+        # Get connector from database
+        connector = await get_connector_by_id(
+            session, connector_id, SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
+        )
+
+        if not connector:
+            error_msg = f"Google Drive connector with ID {connector_id} not found"
+            await task_logger.log_task_failure(
+                log_entry, error_msg, {"error_type": "ConnectorNotFound"}
+            )
+            return 0, error_msg
+
+        # Initialize Drive client
+        await task_logger.log_task_progress(
+            log_entry,
+            f"Initializing Google Drive client for connector {connector_id}",
+            {"stage": "client_initialization"},
+        )
+
+        drive_client = GoogleDriveClient(session, connector_id)
+
+        # Use folder from request params (required for Google Drive)
+        if not folder_id:
+            error_msg = "folder_id is required for Google Drive indexing"
+            await task_logger.log_task_failure(
+                log_entry, error_msg, {"error_type": "MissingParameter"}
+            )
+            return 0, error_msg
+
+        target_folder_id = folder_id
+        target_folder_name = folder_name or "Selected Folder"
+
+        logger.info(f"Indexing Google Drive folder: {target_folder_name} ({target_folder_id})")
+
+        # Decide sync strategy
+        start_page_token = connector.config.get("start_page_token")
+        can_use_delta_sync = use_delta_sync and start_page_token and connector.last_indexed_at
+
+        if can_use_delta_sync:
+            logger.info(f"Using delta sync for connector {connector_id}")
+            result = await _index_with_delta_sync(
+                drive_client=drive_client,
+                session=session,
+                connector=connector,
+                connector_id=connector_id,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                folder_id=target_folder_id,
+                start_page_token=start_page_token,
+                task_logger=task_logger,
+                log_entry=log_entry,
+                max_files=max_files,
+            )
+        else:
+            logger.info(f"Using full scan for connector {connector_id}")
+            result = await _index_full_scan(
+                drive_client=drive_client,
+                session=session,
+                connector=connector,
+                connector_id=connector_id,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                folder_id=target_folder_id,
+                folder_name=target_folder_name,
+                task_logger=task_logger,
+                log_entry=log_entry,
+                max_files=max_files,
+            )
+
+        documents_indexed, documents_skipped = result
+
+        # Update last indexed timestamp and get new start page token
+        if documents_indexed > 0 or can_use_delta_sync:
+            # Get new start page token for next sync
+            new_token, token_error = await get_start_page_token(drive_client)
+            if new_token and not token_error:
+                from sqlalchemy.orm.attributes import flag_modified
+
+                connector.config["start_page_token"] = new_token
+                flag_modified(connector, "config")
+
+            await update_connector_last_indexed(session, connector, update_last_indexed)
+
+        # Final commit
+        await session.commit()
+        logger.info(
+            f"Successfully committed Google Drive indexing changes to database"
+        )
+
+        # Log success
+        await task_logger.log_task_success(
+            log_entry,
+            f"Successfully completed Google Drive indexing for connector {connector_id}",
+            {
+                "files_processed": documents_indexed,
+                "files_skipped": documents_skipped,
+                "sync_type": "delta" if can_use_delta_sync else "full",
+                "folder": target_folder_name,
+            },
+        )
+
+        logger.info(
+            f"Google Drive indexing completed: {documents_indexed} files indexed, {documents_skipped} skipped"
+        )
+        return documents_indexed, None
+
+    except SQLAlchemyError as db_error:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Database error during Google Drive indexing for connector {connector_id}",
+            str(db_error),
+            {"error_type": "SQLAlchemyError"},
+        )
+        logger.error(f"Database error: {db_error!s}", exc_info=True)
+        return 0, f"Database error: {db_error!s}"
+    except Exception as e:
+        await session.rollback()
+        await task_logger.log_task_failure(
+            log_entry,
+            f"Failed to index Google Drive files for connector {connector_id}",
+            str(e),
+            {"error_type": type(e).__name__},
+        )
+        logger.error(f"Failed to index Google Drive files: {e!s}", exc_info=True)
+        return 0, f"Failed to index Google Drive files: {e!s}"
+
+
+async def _index_full_scan(
+    drive_client: GoogleDriveClient,
+    session: AsyncSession,
+    connector: any,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    folder_id: str | None,
+    folder_name: str,
+    task_logger: TaskLoggingService,
+    log_entry: any,
+    max_files: int,
+) -> tuple[int, int]:
+    """Perform full scan indexing of a folder."""
+    await task_logger.log_task_progress(
+        log_entry,
+        f"Starting full scan of folder: {folder_name}",
+        {"stage": "full_scan", "folder_id": folder_id},
+    )
+
+    documents_indexed = 0
+    documents_skipped = 0
+    page_token = None
+    files_processed = 0
+
+    # Paginate through all files in folder
+    while files_processed < max_files:
+        files, next_token, error = await get_files_in_folder(
+            drive_client, folder_id, include_subfolders=False, page_token=page_token
+        )
+
+        if error:
+            logger.error(f"Error listing files: {error}")
+            break
+
+        if not files:
+            break
+
+        for file in files:
+            if files_processed >= max_files:
+                break
+
+            files_processed += 1
+
+            # Process file
+            indexed, skipped = await _process_single_file(
+                drive_client=drive_client,
+                session=session,
+                file=file,
+                connector_id=connector_id,
+                search_space_id=search_space_id,
+                user_id=user_id,
+                task_logger=task_logger,
+                log_entry=log_entry,
+            )
+
+            documents_indexed += indexed
+            documents_skipped += skipped
+
+            # Batch commit every 10 files
+            if documents_indexed % 10 == 0 and documents_indexed > 0:
+                await session.commit()
+                logger.info(f"Committed batch: {documents_indexed} files indexed so far")
+
+        page_token = next_token
+        if not page_token:
+            break
+
+    logger.info(
+        f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
+    )
+    return documents_indexed, documents_skipped
+
+
+async def _index_with_delta_sync(
+    drive_client: GoogleDriveClient,
+    session: AsyncSession,
+    connector: any,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    folder_id: str | None,
+    start_page_token: str,
+    task_logger: TaskLoggingService,
+    log_entry: any,
+    max_files: int,
+) -> tuple[int, int]:
+    """Perform delta sync indexing using change tracking."""
+    await task_logger.log_task_progress(
+        log_entry,
+        f"Starting delta sync from token: {start_page_token[:20]}...",
+        {"stage": "delta_sync", "start_token": start_page_token},
+    )
+
+    # Fetch all changes since last sync
+    changes, final_token, error = await fetch_all_changes(
+        drive_client, start_page_token, folder_id
+    )
+
+    if error:
+        logger.error(f"Error fetching changes: {error}")
+        return 0, 0
+
+    if not changes:
+        logger.info("No changes detected since last sync")
+        return 0, 0
+
+    logger.info(f"Processing {len(changes)} changes")
+
+    documents_indexed = 0
+    documents_skipped = 0
+    files_processed = 0
+
+    for change in changes:
+        if files_processed >= max_files:
+            break
+
+        files_processed += 1
+        change_type = categorize_change(change)
+
+        # Handle removed/trashed files
+        if change_type in ["removed", "trashed"]:
+            file_id = change.get("fileId")
+            if file_id:
+                await _remove_document(session, file_id, search_space_id)
+            continue
+
+        # Handle modified/new files
+        file = change.get("file")
+        if not file:
+            continue
+
+        indexed, skipped = await _process_single_file(
+            drive_client=drive_client,
+            session=session,
+            file=file,
+            connector_id=connector_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            task_logger=task_logger,
+            log_entry=log_entry,
+        )
+
+        documents_indexed += indexed
+        documents_skipped += skipped
+
+        # Batch commit every 10 files
+        if documents_indexed % 10 == 0 and documents_indexed > 0:
+            await session.commit()
+            logger.info(f"Committed batch: {documents_indexed} changes processed")
+
+    logger.info(
+        f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
+    )
+    return documents_indexed, documents_skipped
+
+
+async def _process_single_file(
+    drive_client: GoogleDriveClient,
+    session: AsyncSession,
+    file: dict,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    task_logger: TaskLoggingService,
+    log_entry: any,
+) -> tuple[int, int]:
+    """
+    Process a single file by downloading and using Surfsense's file processor.
+
+    Returns:
+        Tuple of (indexed_count, skipped_count)
+    """
+    file_name = file.get("name", "Unknown")
+    mime_type = file.get("mimeType", "")
+
+    try:
+        logger.info(f"Processing file: {file_name} ({mime_type})")
+
+        # Download and process using Surfsense's existing infrastructure
+        # This handles: markdown, audio, PDFs, Office docs, images, etc.
+        # It also handles: deduplication, chunking, summarization, embedding
+        document, error = await download_and_process_file(
+            client=drive_client,
+            file=file,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            session=session,
+            task_logger=task_logger,
+            log_entry=log_entry,
+        )
+
+        if error:
+            # Log and skip - not an error, just unsupported or empty
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Skipped {file_name}: {error}",
+                {"status": "skipped", "reason": error},
+            )
+            return 0, 1
+
+        if document:
+            # Successfully indexed
+            await task_logger.log_task_progress(
+                log_entry,
+                f"Successfully indexed: {file_name}",
+                {
+                    "status": "indexed",
+                    "document_id": document.id,
+                    "file_name": file_name,
+                },
+            )
+            return 1, 0
+        else:
+            # Likely a duplicate or unsupported type
+            logger.info(f"No document created for {file_name} (duplicate or unsupported)")
+            return 0, 1
+
+    except Exception as e:
+        logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True)
+        return 0, 1
+
+
+async def _remove_document(
+    session: AsyncSession, file_id: str, search_space_id: int
+):
+    """Remove a document that was deleted in Drive."""
+    unique_identifier_hash = generate_unique_identifier_hash(
+        DocumentType.GOOGLE_DRIVE_CONNECTOR, file_id, search_space_id
+    )
+
+    existing_document = await check_document_by_unique_identifier(
+        session, unique_identifier_hash
+    )
+
+    if existing_document:
+        await session.delete(existing_document)
+        logger.info(f"Removed deleted file document: {file_id}")
+
+

From 501d08f2f4b52d939a6adede37b7f6bb96ce1326 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:55:38 +0200
Subject: [PATCH 12/39] feat(routes): register Google Drive OAuth router

---
 surfsense_backend/app/routes/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index a055bf549..24751e596 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -11,6 +11,9 @@ from .google_calendar_add_connector_route import (
 from .google_gmail_add_connector_route import (
     router as google_gmail_add_connector_router,
 )
+from .google_drive_add_connector_route import (
+    router as google_drive_add_connector_router,
+)
 from .logs_routes import router as logs_router
 from .luma_add_connector_route import router as luma_add_connector_router
 from .new_chat_routes import router as new_chat_router
@@ -33,6 +36,7 @@ router.include_router(podcasts_router)  # Podcast task status and audio
 router.include_router(search_source_connectors_router)
 router.include_router(google_calendar_add_connector_router)
 router.include_router(google_gmail_add_connector_router)
+router.include_router(google_drive_add_connector_router)
 router.include_router(airtable_add_connector_router)
 router.include_router(luma_add_connector_router)
 router.include_router(new_llm_config_router)  # LLM configs with prompt configuration

From 7b8900d51f119c9c0549eec37f6a8756aeda8221 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:55:46 +0200
Subject: [PATCH 13/39] feat(indexer): export Google Drive indexer function

---
 surfsense_backend/app/tasks/connector_indexers/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py
index dcfca33c3..80a9eaf19 100644
--- a/surfsense_backend/app/tasks/connector_indexers/__init__.py
+++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py
@@ -35,6 +35,7 @@ from .elasticsearch_indexer import index_elasticsearch_documents
 from .github_indexer import index_github_repos
 from .google_calendar_indexer import index_google_calendar_events
 from .google_gmail_indexer import index_google_gmail_messages
+from .google_drive_indexer import index_google_drive_files
 from .jira_indexer import index_jira_issues
 
 # Issue tracking and project management
@@ -57,6 +58,7 @@ __all__ = [  # noqa: RUF022
     "index_github_repos",
     # Calendar and scheduling
     "index_google_calendar_events",
+    "index_google_drive_files",
     "index_luma_events",
     "index_jira_issues",
     # Issue tracking and project management

From 358abdf02f4124d99c280e5ee019874f582bf62b Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:55:57 +0200
Subject: [PATCH 14/39] feat(routes): add Google Drive indexing support with
 folder selection

- Accept folder_id and folder_name as indexing parameters
- Hide date range for Google Drive connectors
- Create wrapper function to avoid circular imports
- Trigger Google Drive indexing Celery task
---
 .../routes/search_source_connectors_routes.py | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index 5a7db7f37..d530163f4 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -45,6 +45,7 @@ from app.tasks.connector_indexers import (
     index_github_repos,
     index_google_calendar_events,
     index_google_gmail_messages,
+    index_google_drive_files,
     index_jira_issues,
     index_linear_issues,
     index_luma_events,
@@ -542,6 +543,14 @@ async def index_connector_content(
         None,
         description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date",
     ),
+    folder_id: str = Query(
+        None,
+        description="[Google Drive only] Folder ID to index. If not provided, uses the connector's saved selected_folder_id",
+    ),
+    folder_name: str = Query(
+        None,
+        description="[Google Drive only] Folder name for display purposes",
+    ),
     session: AsyncSession = Depends(get_async_session),
     user: User = Depends(current_active_user),
 ):
@@ -747,6 +756,25 @@ async def index_connector_content(
             )
             response_message = "Google Gmail indexing started in the background."
 
+        elif (
+            connector.connector_type == SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
+        ):
+            from app.tasks.celery_tasks.connector_tasks import (
+                index_google_drive_files_task,
+            )
+
+            logger.info(
+                f"Triggering Google Drive indexing for connector {connector_id} into search space {search_space_id}, folder: {folder_name or 'default'}"
+            )
+            index_google_drive_files_task.delay(
+                connector_id,
+                search_space_id,
+                str(user.id),
+                folder_id,
+                folder_name,
+            )
+            response_message = "Google Drive indexing started in the background."
+
         elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
             from app.tasks.celery_tasks.connector_tasks import (
                 index_discord_messages_task,
@@ -1515,6 +1543,50 @@ async def run_google_gmail_indexing(
         # Optionally update status in DB to indicate failure
 
 
+async def run_google_drive_indexing(
+    session: AsyncSession,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    folder_id: str,
+    folder_name: str,
+):
+    """Runs the Google Drive indexing task and updates the timestamp."""
+    try:
+        from app.tasks.connector_indexers.google_drive_indexer import (
+            index_google_drive_files,
+        )
+
+        indexed_count, error_message = await index_google_drive_files(
+            session,
+            connector_id,
+            search_space_id,
+            user_id,
+            folder_id,
+            folder_name,
+            use_delta_sync=True,
+            update_last_indexed=False,
+        )
+        if error_message:
+            logger.error(
+                f"Google Drive indexing failed for connector {connector_id}: {error_message}"
+            )
+            # Optionally update status in DB to indicate failure
+        else:
+            logger.info(
+                f"Google Drive indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
+            )
+            # Update the last indexed timestamp only on success
+            await update_connector_last_indexed(session, connector_id)
+            await session.commit()  # Commit timestamp update
+    except Exception as e:
+        logger.error(
+            f"Critical error in run_google_drive_indexing for connector {connector_id}: {e}",
+            exc_info=True,
+        )
+        # Optionally update status in DB to indicate failure
+
+
 # Add new helper functions for luma indexing
 async def run_luma_indexing_with_new_session(
     connector_id: int,

From 1c83327fc7dc6c3272c27503e61269cbf543d463 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:56:11 +0200
Subject: [PATCH 15/39] feat(celery): add Google Drive indexing Celery task

- Create async task for Google Drive folder indexing
- Accept folder_id and folder_name parameters
- Call indexing wrapper to avoid circular imports
---
 .../app/tasks/celery_tasks/connector_tasks.py | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
index 6cd557dc4..8e507915f 100644
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@@ -473,6 +473,58 @@ async def _index_google_gmail_messages(
         )
 
 
+@celery_app.task(name="index_google_drive_files", bind=True)
+def index_google_drive_files_task(
+    self,
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    folder_id: str,
+    folder_name: str,
+):
+    """Celery task to index Google Drive files."""
+    import asyncio
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        loop.run_until_complete(
+            _index_google_drive_files(
+                connector_id,
+                search_space_id,
+                user_id,
+                folder_id,
+                folder_name,
+            )
+        )
+    finally:
+        loop.close()
+
+
+async def _index_google_drive_files(
+    connector_id: int,
+    search_space_id: int,
+    user_id: str,
+    folder_id: str,
+    folder_name: str,
+):
+    """Index Google Drive files with new session."""
+    from app.routes.search_source_connectors_routes import (
+        run_google_drive_indexing,
+    )
+
+    async with get_celery_session_maker()() as session:
+        await run_google_drive_indexing(
+            session,
+            connector_id,
+            search_space_id,
+            user_id,
+            folder_id,
+            folder_name,
+        )
+
+
 @celery_app.task(name="index_discord_messages", bind=True)
 def index_discord_messages_task(
     self,

From 2d24f9ac7921d4c8cc1f3296e43c27b303ca1e3d Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:56:30 +0200
Subject: [PATCH 16/39] feat(types): add GOOGLE_DRIVE_CONNECTOR to frontend
 enum

---
 surfsense_web/contracts/enums/connector.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts
index 6cdbc5656..eb2cf7ad8 100644
--- a/surfsense_web/contracts/enums/connector.ts
+++ b/surfsense_web/contracts/enums/connector.ts
@@ -14,6 +14,7 @@ export enum EnumConnectorName {
 	CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR",
 	GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR",
 	GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR",
+	GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR",
 	AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR",
 	LUMA_CONNECTOR = "LUMA_CONNECTOR",
 	ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR",

From 11d94e0ea6ed8a5146001c2c228674aa2071b30d Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:56:36 +0200
Subject: [PATCH 17/39] feat(ui): add Google Drive icon to connector icons
 mapping

---
 surfsense_web/contracts/enums/connectorIcons.tsx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx
index 87840d7e4..661be5253 100644
--- a/surfsense_web/contracts/enums/connectorIcons.tsx
+++ b/surfsense_web/contracts/enums/connectorIcons.tsx
@@ -26,6 +26,7 @@ import {
 	Sparkles,
 	Telescope,
 	Webhook,
+	HardDrive,
 } from "lucide-react";
 import { EnumConnectorName } from "./connector";
 
@@ -57,6 +58,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
 			return <IconCalendar {...iconProps} />;
 		case EnumConnectorName.GOOGLE_GMAIL_CONNECTOR:
 			return <IconMail {...iconProps} />;
+		case EnumConnectorName.GOOGLE_DRIVE_CONNECTOR:
+			return <HardDrive {...iconProps} />;
 		case EnumConnectorName.AIRTABLE_CONNECTOR:
 			return <IconTable {...iconProps} />;
 		case EnumConnectorName.CONFLUENCE_CONNECTOR:

From bfbd813f4297605522b665cb532731739447dee0 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:56:42 +0200
Subject: [PATCH 18/39] feat(i18n): add Google Drive connector translation keys

---
 surfsense_web/messages/en.json | 1 +
 surfsense_web/messages/zh.json | 1 +
 2 files changed, 2 insertions(+)

diff --git a/surfsense_web/messages/en.json b/surfsense_web/messages/en.json
index eac362b9c..f70c854e0 100644
--- a/surfsense_web/messages/en.json
+++ b/surfsense_web/messages/en.json
@@ -303,6 +303,7 @@
 		"luma_desc": "Connect to Luma to search events, meetups and gatherings.",
 		"calendar_desc": "Connect to Google Calendar to search events, meetings and schedules.",
 		"gmail_desc": "Connect to your Gmail account to search through your emails.",
+		"google_drive_desc": "Connect to Google Drive to search and index your files and documents.",
 		"zoom_desc": "Connect to Zoom to access meeting recordings and transcripts.",
 		"webcrawler_desc": "Crawl and index content from any public web pages."
 	},
diff --git a/surfsense_web/messages/zh.json b/surfsense_web/messages/zh.json
index b943a3c2c..483a10a10 100644
--- a/surfsense_web/messages/zh.json
+++ b/surfsense_web/messages/zh.json
@@ -303,6 +303,7 @@
 		"luma_desc": "连接到 Luma 以搜索活动、聚会和集会。",
 		"calendar_desc": "连接到 Google 日历以搜索活动、会议和日程。",
 		"gmail_desc": "连接到您的 Gmail 账户以搜索您的电子邮件。",
+		"google_drive_desc": "连接到 Google 云端硬盘以搜索和索引您的文件和文档。",
 		"zoom_desc": "连接到 Zoom 以访问会议录制和转录。",
 		"webcrawler_desc": "爬取和索引任何公开网页的内容。"
 	},

From 48112f66df4096b6b44f898e11ec01d18f175e7c Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:56:52 +0200
Subject: [PATCH 19/39] feat(ui): add Google Drive connector card to
 Productivity category

---
 surfsense_web/components/sources/connector-data.tsx | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/surfsense_web/components/sources/connector-data.tsx b/surfsense_web/components/sources/connector-data.tsx
index 338c3ae20..7fca3e6b9 100644
--- a/surfsense_web/components/sources/connector-data.tsx
+++ b/surfsense_web/components/sources/connector-data.tsx
@@ -183,6 +183,13 @@ export const connectorCategories: ConnectorCategory[] = [
 				icon: getConnectorIcon(EnumConnectorName.GOOGLE_GMAIL_CONNECTOR, "h-6 w-6"),
 				status: "available",
 			},
+			{
+				id: "google-drive-connector",
+				title: "Google Drive",
+				description: "google_drive_desc",
+				icon: getConnectorIcon(EnumConnectorName.GOOGLE_DRIVE_CONNECTOR, "h-6 w-6"),
+				status: "available",
+			},
 			{
 				id: "luma-connector",
 				title: "Luma",

From 90b3474b47d9d34e8182b0adda2251faee8feaed Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:57:02 +0200
Subject: [PATCH 20/39] feat(hooks): add folder parameters to indexConnector
 function

- Accept folderId and folderName for Google Drive indexing
- Pass folder parameters to backend API
---
 surfsense_web/hooks/use-search-source-connectors.ts | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/surfsense_web/hooks/use-search-source-connectors.ts b/surfsense_web/hooks/use-search-source-connectors.ts
index 2f77d7d82..ee8ce5518 100644
--- a/surfsense_web/hooks/use-search-source-connectors.ts
+++ b/surfsense_web/hooks/use-search-source-connectors.ts
@@ -267,7 +267,9 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
 		connectorId: number,
 		searchSpaceId: string | number,
 		startDate?: string,
-		endDate?: string
+		endDate?: string,
+		folderId?: string,
+		folderName?: string
 	) => {
 		try {
 			// Build query parameters
@@ -280,6 +282,12 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
 			if (endDate) {
 				params.append("end_date", endDate);
 			}
+			if (folderId) {
+				params.append("folder_id", folderId);
+			}
+			if (folderName) {
+				params.append("folder_name", folderName);
+			}
 
 			const response = await authenticatedFetch(
 				`${

From ad4d424d3815b35335c703975dedd561ceb7aadb Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:57:10 +0200
Subject: [PATCH 21/39] feat(ui): add Google Drive OAuth connection page

- Handle OAuth flow similar to Gmail/Calendar
- Show connection status and redirect to manage page
- Display connector features and file type support
- No folder selection at connection time (done at index time)
---
 .../add/google-drive-connector/page.tsx       | 218 ++++++++++++++++++
 1 file changed, 218 insertions(+)
 create mode 100644 surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx

diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx
new file mode 100644
index 000000000..b9fb8d953
--- /dev/null
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx
@@ -0,0 +1,218 @@
+"use client";
+
+import { ArrowLeft, Check, ExternalLink, Loader2 } from "lucide-react";
+import { motion } from "motion/react";
+import Link from "next/link";
+import { useParams, useRouter, useSearchParams } from "next/navigation";
+import { useEffect, useState } from "react";
+import { toast } from "sonner";
+import { Button } from "@/components/ui/button";
+import {
+	Card,
+	CardContent,
+	CardDescription,
+	CardFooter,
+	CardHeader,
+	CardTitle,
+} from "@/components/ui/card";
+import { EnumConnectorName } from "@/contracts/enums/connector";
+import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
+import {
+	type SearchSourceConnector,
+	useSearchSourceConnectors,
+} from "@/hooks/use-search-source-connectors";
+import { authenticatedFetch } from "@/lib/auth-utils";
+
+export default function GoogleDriveConnectorPage() {
+	const router = useRouter();
+	const params = useParams();
+	const searchParams = useSearchParams();
+	const searchSpaceId = params.search_space_id as string;
+	
+	const [isConnecting, setIsConnecting] = useState(false);
+	const [doesConnectorExist, setDoesConnectorExist] = useState(false);
+
+	const { fetchConnectors } = useSearchSourceConnectors(true, Number.parseInt(searchSpaceId));
+
+	// Check if connector exists and handle OAuth success
+	useEffect(() => {
+		const success = searchParams.get("success");
+		
+		fetchConnectors(Number.parseInt(searchSpaceId)).then((data) => {
+			const driveConnector = data.find(
+				(c: SearchSourceConnector) => c.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR
+			);
+			
+			if (driveConnector) {
+				setDoesConnectorExist(true);
+				
+				// If just connected, show success and redirect
+				if (success === "true") {
+					toast.success("Google Drive connected successfully!");
+					setTimeout(() => {
+						router.push(`/dashboard/${searchSpaceId}/connectors`);
+					}, 1500);
+				}
+			}
+		});
+	}, [searchParams, fetchConnectors, searchSpaceId, router]);
+
+	const handleConnectGoogle = async () => {
+		try {
+			setIsConnecting(true);
+			const response = await authenticatedFetch(
+				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/google/drive/connector/add/?space_id=${searchSpaceId}`,
+				{ method: "GET" }
+			);
+
+			if (!response.ok) {
+				throw new Error("Failed to initiate Google OAuth");
+			}
+
+			const data = await response.json();
+			window.location.href = data.auth_url;
+		} catch (error) {
+			console.error("Error connecting to Google:", error);
+			toast.error("Failed to connect to Google Drive");
+		} finally {
+			setIsConnecting(false);
+		}
+	};
+
+	return (
+		<div className="container mx-auto py-8 max-w-2xl">
+			<motion.div
+				initial={{ opacity: 0, y: 20 }}
+				animate={{ opacity: 1, y: 0 }}
+				transition={{ duration: 0.5 }}
+			>
+				{/* Header */}
+				<div className="mb-8">
+					<Link
+						href={`/dashboard/${searchSpaceId}/connectors/add`}
+						className="inline-flex items-center text-sm text-muted-foreground hover:text-foreground mb-4"
+					>
+						<ArrowLeft className="mr-2 h-4 w-4" />
+						Back to connectors
+					</Link>
+					<div className="flex items-center gap-4">
+						<div className="flex h-12 w-12 items-center justify-center rounded-lg">
+							{getConnectorIcon(EnumConnectorName.GOOGLE_DRIVE_CONNECTOR, "h-6 w-6")}
+						</div>
+						<div>
+							<h1 className="text-3xl font-bold tracking-tight">Connect Google Drive</h1>
+							<p className="text-muted-foreground">
+								Securely connect your Google Drive account
+							</p>
+						</div>
+					</div>
+				</div>
+
+				{/* Connection Card */}
+				{!doesConnectorExist ? (
+					<Card>
+						<CardHeader>
+							<CardTitle>Connect Your Google Account</CardTitle>
+							<CardDescription>
+								Authorize read-only access to your Google Drive. You'll select which folder to
+								index when you start indexing.
+							</CardDescription>
+						</CardHeader>
+						<CardContent className="space-y-4">
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Read-only access to your Drive files</span>
+							</div>
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Index documents, spreadsheets, presentations, PDFs & more</span>
+							</div>
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Automatic updates with change tracking</span>
+							</div>
+							<div className="flex items-center gap-3 text-sm text-muted-foreground">
+								<Check className="h-4 w-4 text-green-500" />
+								<span>Secure OAuth 2.0 authentication</span>
+							</div>
+						</CardContent>
+						<CardFooter className="flex justify-between">
+							<Button
+								type="button"
+								variant="outline"
+								onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
+							>
+								Cancel
+							</Button>
+							<Button onClick={handleConnectGoogle} disabled={isConnecting}>
+								{isConnecting ? (
+									<>
+										<Loader2 className="mr-2 h-4 w-4 animate-spin" />
+										Connecting...
+									</>
+								) : (
+									<>
+										<ExternalLink className="mr-2 h-4 w-4" />
+										Connect Google Drive
+									</>
+								)}
+							</Button>
+						</CardFooter>
+					</Card>
+				) : (
+					<Card>
+						<CardHeader>
+							<CardTitle>✅ Already Connected</CardTitle>
+							<CardDescription>
+								Your Google Drive connector is already set up. Go to the connectors page to
+								start indexing.
+							</CardDescription>
+						</CardHeader>
+						<CardFooter>
+							<Button onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors`)}>
+								Go to Connectors
+							</Button>
+						</CardFooter>
+					</Card>
+				)}
+
+				{/* Information Card */}
+				<Card className="mt-6">
+					<CardHeader>
+						<CardTitle>How Google Drive Integration Works</CardTitle>
+					</CardHeader>
+					<CardContent className="space-y-4">
+						<div className="space-y-2">
+							<h4 className="font-medium">1️⃣ Connect Your Account</h4>
+							<p className="text-sm text-muted-foreground">
+								First, securely connect your Google Drive account using OAuth 2.0. We only
+								request read-only access.
+							</p>
+						</div>
+						<div className="space-y-2">
+							<h4 className="font-medium">2️⃣ Select Folder to Index</h4>
+							<p className="text-sm text-muted-foreground">
+								When you're ready to index, go to the connectors page and click "Index". You'll
+								choose which folder to process.
+							</p>
+						</div>
+						<div className="space-y-2">
+							<h4 className="font-medium">3️⃣ Automatic Change Detection</h4>
+							<p className="text-sm text-muted-foreground">
+								We use Google Drive's change tracking API to detect when files are modified,
+								added, or deleted. Only changed files are re-indexed.
+							</p>
+						</div>
+						<div className="space-y-2">
+							<h4 className="font-medium">📄 Comprehensive File Support</h4>
+							<p className="text-sm text-muted-foreground">
+								Supports Google Workspace files (Docs, Sheets, Slides), Microsoft Office
+								documents, PDFs, text files, images (with OCR), and more.
+							</p>
+						</div>
+					</CardContent>
+				</Card>
+			</motion.div>
+		</div>
+	);
+}

From 5df04c3caa54573723c0a0158cebf6e6a4d2647c Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:57:18 +0200
Subject: [PATCH 22/39] feat(ui): add hierarchical Google Drive folder tree
 browser

- Display folders and files with lazy loading
- Show different icons for file types (docs, sheets, slides, etc)
- Expandable folder tree with proper indentation
- Selectable folders for indexing
- Handle overflow with proper truncation
- Full pagination support for large folder structures
---
 .../connectors/google-drive-folder-tree.tsx   | 340 ++++++++++++++++++
 1 file changed, 340 insertions(+)
 create mode 100644 surfsense_web/components/connectors/google-drive-folder-tree.tsx

diff --git a/surfsense_web/components/connectors/google-drive-folder-tree.tsx b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
new file mode 100644
index 000000000..22ef97556
--- /dev/null
+++ b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
@@ -0,0 +1,340 @@
+"use client";
+
+import {
+	ChevronDown,
+	ChevronRight,
+	File,
+	FileText,
+	Folder,
+	FolderOpen,
+	HardDrive,
+	Image,
+	Loader2,
+	Sheet,
+	Presentation,
+} from "lucide-react";
+import { useState } from "react";
+import { Button } from "@/components/ui/button";
+import { ScrollArea } from "@/components/ui/scroll-area";
+import { cn } from "@/lib/utils";
+import { authenticatedFetch } from "@/lib/auth-utils";
+
+interface DriveItem {
+	id: string;
+	name: string;
+	mimeType: string;
+	isFolder: boolean;
+	parents?: string[];
+	size?: number;
+	iconLink?: string;
+}
+
+interface ItemTreeNode {
+	item: DriveItem;
+	children: DriveItem[] | null; // null = not loaded, [] = loaded but empty
+	isExpanded: boolean;
+	isLoading: boolean;
+}
+
+interface GoogleDriveFolderTreeProps {
+	connectorId: number;
+	selectedFolderId: string | null;
+	onSelectFolder: (folderId: string, folderName: string) => void;
+}
+
+// Helper to get appropriate icon for file type
+function getFileIcon(mimeType: string, className: string = "h-4 w-4") {
+	if (mimeType.includes("spreadsheet") || mimeType.includes("excel")) {
+		return <Sheet className={`${className} text-green-600`} />;
+	}
+	if (mimeType.includes("presentation") || mimeType.includes("powerpoint")) {
+		return <Presentation className={`${className} text-orange-600`} />;
+	}
+	if (mimeType.includes("document") || mimeType.includes("word") || mimeType.includes("text")) {
+		return <FileText className={`${className} text-blue-600`} />;
+	}
+	if (mimeType.includes("image")) {
+		return <Image className={`${className} text-purple-600`} />;
+	}
+	return <File className={`${className} text-gray-500`} />;
+}
+
+// Helper to format file size
+function formatFileSize(bytes: number | undefined): string {
+	if (!bytes) return "";
+	if (bytes < 1024) return `${bytes} B`;
+	if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+	if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+	return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
+}
+
+export function GoogleDriveFolderTree({
+	connectorId,
+	selectedFolderId,
+	onSelectFolder,
+}: GoogleDriveFolderTreeProps) {
+	const [rootItems, setRootItems] = useState<DriveItem[]>([]);
+	const [itemStates, setItemStates] = useState<Map<string, ItemTreeNode>>(new Map());
+	const [isLoadingRoot, setIsLoadingRoot] = useState(false);
+	const [isInitialized, setIsInitialized] = useState(false);
+
+	// Load root items (folders and files) on mount
+	const loadRootItems = async () => {
+		if (isInitialized) return; // Already loaded
+
+		setIsLoadingRoot(true);
+		try {
+			const response = await authenticatedFetch(
+				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/connectors/${connectorId}/google-drive/folders`
+			);
+			if (!response.ok) throw new Error("Failed to load items");
+
+			const data = await response.json();
+			setRootItems(data.items || []);
+			setIsInitialized(true);
+		} catch (error) {
+			console.error("Error loading root items:", error);
+		} finally {
+			setIsLoadingRoot(false);
+		}
+	};
+
+	// Helper function to find an item recursively through all loaded items
+	const findItem = (itemId: string): DriveItem | undefined => {
+		// First check if we have it in itemStates
+		const state = itemStates.get(itemId);
+		if (state?.item) return state.item;
+
+		// Check root items
+		const rootItem = rootItems.find((item) => item.id === itemId);
+		if (rootItem) return rootItem;
+
+		// Recursively search through all loaded children
+		for (const [, nodeState] of itemStates) {
+			if (nodeState.children) {
+				const found = nodeState.children.find((child) => child.id === itemId);
+				if (found) return found;
+			}
+		}
+
+		return undefined;
+	};
+
+	// Load children (folders and files) for a specific folder
+	const loadFolderContents = async (folderId: string) => {
+		try {
+			// Set loading state
+			setItemStates((prev) => {
+				const newMap = new Map(prev);
+				const existing = newMap.get(folderId);
+				if (existing) {
+					newMap.set(folderId, { ...existing, isLoading: true });
+				} else {
+					// First time loading this folder - create initial state
+					const item = findItem(folderId);
+					if (item) {
+						newMap.set(folderId, {
+							item,
+							children: null,
+							isExpanded: false,
+							isLoading: true,
+						});
+					}
+				}
+				return newMap;
+			});
+
+			const response = await authenticatedFetch(
+				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/connectors/${connectorId}/google-drive/folders?parent_id=${folderId}`
+			);
+			if (!response.ok) throw new Error("Failed to load folder contents");
+
+			const data = await response.json();
+			const items = data.items || [];
+
+			// Check if folder only contains files (no subfolders)
+			const hasSubfolders = items.some((item: DriveItem) => item.isFolder);
+
+			// Update item state with loaded children
+			setItemStates((prev) => {
+				const newMap = new Map(prev);
+				const existing = newMap.get(folderId);
+				const item = existing?.item || findItem(folderId);
+
+				if (item) {
+					newMap.set(folderId, {
+						item,
+						children: items,
+						isExpanded: true, // Always expand after loading
+						isLoading: false,
+					});
+				} else {
+					console.error(`Could not find item for folderId: ${folderId}`);
+				}
+				return newMap;
+			});
+		} catch (error) {
+			console.error("Error loading folder contents:", error);
+			// Clear loading state on error
+			setItemStates((prev) => {
+				const newMap = new Map(prev);
+				const existing = newMap.get(folderId);
+				if (existing) {
+					newMap.set(folderId, { ...existing, isLoading: false });
+				}
+				return newMap;
+			});
+		}
+	};
+
+	// Toggle folder expansion
+	const toggleFolder = async (item: DriveItem) => {
+		if (!item.isFolder) return; // Only folders can be expanded
+
+		const state = itemStates.get(item.id);
+
+		if (!state || state.children === null) {
+			// First time expanding - load children
+			await loadFolderContents(item.id);
+		} else {
+			// Toggle expansion state
+			setItemStates((prev) => {
+				const newMap = new Map(prev);
+				newMap.set(item.id, {
+					...state,
+					isExpanded: !state.isExpanded,
+				});
+				return newMap;
+			});
+		}
+	};
+
+	// Recursive render function for item tree
+	const renderItem = (item: DriveItem, level: number = 0) => {
+		const state = itemStates.get(item.id);
+		const isExpanded = state?.isExpanded || false;
+		const isLoading = state?.isLoading || false;
+		const children = state?.children;
+		const isSelected = selectedFolderId === item.id;
+		const isFolder = item.isFolder;
+
+		// Separate folders and files for children
+		const childFolders = children?.filter((c) => c.isFolder) || [];
+		const childFiles = children?.filter((c) => !c.isFolder) || [];
+
+		return (
+			<div key={item.id} className="w-full" style={{ marginLeft: `${level * 1.25}rem` }}>
+				<Button
+					variant="ghost"
+					className={cn(
+						"w-full justify-start gap-2 h-auto py-2 px-2 font-normal overflow-hidden",
+						isFolder && "hover:bg-accent cursor-pointer",
+						!isFolder && "cursor-default opacity-70 hover:bg-transparent",
+						isSelected && isFolder && "bg-accent"
+					)}
+					onClick={() => isFolder && onSelectFolder(item.id, item.name)}
+				>
+					{/* Expand/Collapse Icon (only for folders) */}
+					{isFolder ? (
+						<span
+							className="flex items-center justify-center w-4 h-4 shrink-0"
+							onClick={(e) => {
+								e.stopPropagation();
+								toggleFolder(item);
+							}}
+						>
+							{isLoading ? (
+								<Loader2 className="h-3 w-3 animate-spin" />
+							) : isExpanded ? (
+								<ChevronDown className="h-4 w-4" />
+							) : (
+								<ChevronRight className="h-4 w-4" />
+							)}
+						</span>
+					) : (
+						<span className="w-4 h-4 shrink-0" /> // Empty space for alignment
+					)}
+
+					{/* Icon */}
+					{isFolder ? (
+						isExpanded ? (
+							<FolderOpen className="h-4 w-4 text-blue-500 shrink-0" />
+						) : (
+							<Folder className="h-4 w-4 text-gray-500 shrink-0" />
+						)
+					) : (
+						getFileIcon(item.mimeType, "h-4 w-4 shrink-0")
+					)}
+
+					{/* Item Name */}
+					<span className="truncate flex-1 text-left text-sm min-w-0">{item.name}</span>
+				</Button>
+
+				{/* Render children if expanded (folders first, then files) */}
+				{isExpanded && isFolder && children && (
+					<div className="w-full">
+						{/* Render folders first */}
+						{childFolders.map((child) => renderItem(child, level + 1))}
+						
+						{/* Render files */}
+						{childFiles.map((child) => renderItem(child, level + 1))}
+						
+						{/* Empty state */}
+						{children.length === 0 && (
+							<div className="text-xs text-muted-foreground py-2 pl-2">
+								Empty folder
+							</div>
+						)}
+					</div>
+				)}
+			</div>
+		);
+	};
+
+	// Initialize on first render
+	if (!isInitialized && !isLoadingRoot) {
+		loadRootItems();
+	}
+
+	return (
+		<div className="border rounded-md w-full overflow-hidden">
+			<ScrollArea className="h-[450px] w-full">
+				<div className="p-2 pr-4 w-full overflow-x-hidden">
+					{/* My Drive Header (always visible, selectable) */}
+					<div className="mb-2 pb-2 border-b">
+						<Button
+							variant="ghost"
+							className={cn(
+								"w-full justify-start gap-2 h-auto py-2 px-2 font-normal hover:bg-accent overflow-hidden",
+								selectedFolderId === "root" && "bg-accent"
+							)}
+							onClick={() => onSelectFolder("root", "My Drive")}
+						>
+							<HardDrive className="h-4 w-4 text-primary shrink-0" />
+							<span className="font-semibold truncate">My Drive</span>
+						</Button>
+					</div>
+
+					{/* Loading indicator */}
+					{isLoadingRoot && (
+						<div className="flex items-center justify-center py-8">
+							<Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
+						</div>
+					)}
+
+					{/* Root items (folders and files) - same level as Google Drive shows */}
+					<div className="w-full overflow-x-hidden">
+						{!isLoadingRoot && rootItems.map((item) => renderItem(item, 0))}
+					</div>
+
+					{/* Empty state */}
+					{!isLoadingRoot && rootItems.length === 0 && (
+						<div className="text-center text-sm text-muted-foreground py-8">
+							No files or folders found in your Google Drive
+						</div>
+					)}
+				</div>
+			</ScrollArea>
+		</div>
+	);
+}

From c4a95ecc024ca9ef8b0f0705bb4200a7279d9aa4 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 15:57:26 +0200
Subject: [PATCH 23/39] feat(ui): integrate Google Drive folder selection into
 manage connectors page

- Add folder selection dialog for Google Drive indexing
- Hide date picker and quick index for Google Drive
- Show folder tree browser in modal
- Pass selected folder to indexing API
- Adjust modal size to prevent overflow
---
 .../connectors/(manage)/page.tsx              | 215 ++++++++++++++++--
 1 file changed, 190 insertions(+), 25 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
index e2f219448..fd1f7da1d 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
@@ -5,6 +5,8 @@ import {
 	Calendar as CalendarIcon,
 	Clock,
 	Edit,
+	Folder,
+	HardDrive,
 	Loader2,
 	Plus,
 	RefreshCw,
@@ -61,6 +63,13 @@ import { EnumConnectorName } from "@/contracts/enums/connector";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
 import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors";
 import { cn } from "@/lib/utils";
+import { authenticatedFetch } from "@/lib/auth-utils";
+import { GoogleDriveFolderTree } from "@/components/connectors/google-drive-folder-tree";
+
+interface DriveFolder {
+	id: string;
+	name: string;
+}
 
 export default function ConnectorsPage() {
 	const t = useTranslations("connectors");
@@ -105,6 +114,13 @@ export default function ConnectorsPage() {
 	const [customFrequency, setCustomFrequency] = useState<string>("");
 	const [isSavingPeriodic, setIsSavingPeriodic] = useState(false);
 
+	// Google Drive folder selection state
+	const [driveFolderDialogOpen, setDriveFolderDialogOpen] = useState(false);
+	const [driveFolders, setDriveFolders] = useState<DriveFolder[]>([]);
+	const [selectedFolderId, setSelectedFolderId] = useState<string>("");
+	const [selectedFolderName, setSelectedFolderName] = useState<string>("");
+	const [isLoadingFolders, setIsLoadingFolders] = useState(false);
+
 	useEffect(() => {
 		if (error) {
 			toast.error(t("failed_load"));
@@ -129,8 +145,78 @@ export default function ConnectorsPage() {
 
 	// Handle opening date picker for indexing
 	const handleOpenDatePicker = (connectorId: number) => {
+		// Check if this is a Google Drive connector
+		const connector = connectors.find((c) => c.id === connectorId);
+		if (connector?.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR) {
+			// Open folder selection dialog for Google Drive
+			handleOpenDriveFolderDialog(connectorId);
+		} else {
+			// Open date picker for other connectors
+			setSelectedConnectorForIndexing(connectorId);
+			setDatePickerOpen(true);
+		}
+	};
+
+	// Handle opening Google Drive folder selection dialog
+	const handleOpenDriveFolderDialog = async (connectorId: number) => {
 		setSelectedConnectorForIndexing(connectorId);
-		setDatePickerOpen(true);
+		setDriveFolderDialogOpen(true);
+		setIsLoadingFolders(true);
+		
+		try {
+			const response = await authenticatedFetch(
+				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/connectors/${connectorId}/google-drive/folders`,
+				{ method: "GET" }
+			);
+
+			if (!response.ok) {
+				throw new Error("Failed to load folders");
+			}
+
+			const data = await response.json();
+			setDriveFolders(data.folders || []);
+		} catch (error) {
+			console.error("Error loading folders:", error);
+			toast.error("Failed to load Google Drive folders");
+			setDriveFolderDialogOpen(false);
+		} finally {
+			setIsLoadingFolders(false);
+		}
+	};
+
+	// Handle Google Drive folder indexing
+	const handleIndexDriveFolder = async () => {
+		if (selectedConnectorForIndexing === null || !selectedFolderId) {
+			toast.error("Please select a folder");
+			return;
+		}
+
+		setDriveFolderDialogOpen(false);
+
+		try {
+			setIndexingConnectorId(selectedConnectorForIndexing);
+			const selectedFolder = driveFolders.find((f) => f.id === selectedFolderId);
+			const folderName = selectedFolder?.name || "Selected Folder";
+
+			// Call indexConnector with folder_id and folder_name as query params
+			await indexConnector(
+				selectedConnectorForIndexing,
+				searchSpaceId,
+				undefined,
+				undefined,
+				selectedFolderId,
+				folderName
+			);
+			toast.success(t("indexing_started"));
+		} catch (error) {
+			console.error("Error indexing connector content:", error);
+			toast.error(error instanceof Error ? error.message : t("indexing_failed"));
+		} finally {
+			setIndexingConnectorId(null);
+			setSelectedConnectorForIndexing(null);
+			setSelectedFolderId("");
+			setDriveFolders([]);
+		}
 	};
 
 	// Handle connector indexing with dates
@@ -361,39 +447,52 @@ export default function ConnectorsPage() {
 																		>
 																			{indexingConnectorId === connector.id ? (
 																				<RefreshCw className="h-4 w-4 animate-spin" />
+																			) : connector.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR ? (
+																				<Folder className="h-4 w-4" />
 																			) : (
 																				<CalendarIcon className="h-4 w-4" />
 																			)}
-																			<span className="sr-only">{t("index_date_range")}</span>
+																			<span className="sr-only">
+																				{connector.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR 
+																					? "Select folder to index" 
+																					: t("index_date_range")}
+																			</span>
 																		</Button>
 																	</TooltipTrigger>
 																	<TooltipContent>
-																		<p>{t("index_date_range")}</p>
-																	</TooltipContent>
-																</Tooltip>
-															</TooltipProvider>
-															<TooltipProvider>
-																<Tooltip>
-																	<TooltipTrigger asChild>
-																		<Button
-																			variant="outline"
-																			size="sm"
-																			onClick={() => handleQuickIndexConnector(connector.id)}
-																			disabled={indexingConnectorId === connector.id}
-																		>
-																			{indexingConnectorId === connector.id ? (
-																				<RefreshCw className="h-4 w-4 animate-spin" />
-																			) : (
-																				<RefreshCw className="h-4 w-4" />
-																			)}
-																			<span className="sr-only">{t("quick_index")}</span>
-																		</Button>
-																	</TooltipTrigger>
-																	<TooltipContent>
-																		<p>{t("quick_index_auto")}</p>
+																		<p>
+																			{connector.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR 
+																				? "Select folder to index" 
+																				: t("index_date_range")}
+																		</p>
 																	</TooltipContent>
 																</Tooltip>
 															</TooltipProvider>
+															{/* Hide quick index button for Google Drive (requires folder selection) */}
+															{connector.connector_type !== EnumConnectorName.GOOGLE_DRIVE_CONNECTOR && (
+																<TooltipProvider>
+																	<Tooltip>
+																		<TooltipTrigger asChild>
+																			<Button
+																				variant="outline"
+																				size="sm"
+																				onClick={() => handleQuickIndexConnector(connector.id)}
+																				disabled={indexingConnectorId === connector.id}
+																			>
+																				{indexingConnectorId === connector.id ? (
+																					<RefreshCw className="h-4 w-4 animate-spin" />
+																				) : (
+																					<RefreshCw className="h-4 w-4" />
+																				)}
+																				<span className="sr-only">{t("quick_index")}</span>
+																			</Button>
+																		</TooltipTrigger>
+																		<TooltipContent>
+																			<p>{t("quick_index_auto")}</p>
+																		</TooltipContent>
+																	</Tooltip>
+																</TooltipProvider>
+															)}
 														</div>
 													)}
 													{connector.is_indexable && (
@@ -581,6 +680,72 @@ export default function ConnectorsPage() {
 				</DialogContent>
 			</Dialog>
 
+			{/* Google Drive Folder Selection Dialog */}
+			<Dialog open={driveFolderDialogOpen} onOpenChange={setDriveFolderDialogOpen}>
+				<DialogContent className="w-auto max-w-full">
+					<DialogHeader>
+						<DialogTitle>Select Google Drive Folder</DialogTitle>
+						<DialogDescription className="text-sm">
+							Browse and select a folder to index. Click folders to expand and see subfolders.
+						</DialogDescription>
+					</DialogHeader>
+					<div className="grid gap-4 py-4 overflow-hidden w-full">
+						<div className="space-y-3 w-full overflow-hidden">
+							<Label>Browse Folders</Label>
+							{selectedConnectorForIndexing && (
+								<GoogleDriveFolderTree
+									connectorId={selectedConnectorForIndexing}
+									selectedFolderId={selectedFolderId}
+									onSelectFolder={(folderId, folderName) => {
+										setSelectedFolderId(folderId);
+										setSelectedFolderName(folderName);
+									}}
+								/>
+							)}
+							<p className="text-xs text-muted-foreground">
+								Changes to files in this folder will be automatically detected and re-indexed.
+							</p>
+						</div>
+						{selectedFolderId && selectedFolderName && (
+							<div className="p-3 bg-muted rounded-lg text-sm space-y-2">
+								<div>
+									<p className="font-medium mb-1">Selected folder:</p>
+									<p className="text-sm text-muted-foreground truncate" title={selectedFolderName}>
+										{selectedFolderName}
+									</p>
+								</div>
+								<div>
+									<p className="font-medium mb-1">What will be indexed:</p>
+									<ul className="list-disc list-inside space-y-1 text-muted-foreground text-xs">
+										<li>Google Docs, Sheets, Slides (as PDFs)</li>
+										<li>PDFs, Word, Excel, PowerPoint files</li>
+										<li>Text files, markdown, code files</li>
+										<li>Images (with OCR if enabled)</li>
+									</ul>
+								</div>
+							</div>
+						)}
+					</div>
+					<DialogFooter>
+						<Button
+							variant="outline"
+							onClick={() => {
+								setDriveFolderDialogOpen(false);
+								setSelectedConnectorForIndexing(null);
+								setSelectedFolderId("");
+								setSelectedFolderName("");
+								setDriveFolders([]);
+							}}
+						>
+							{tCommon("cancel")}
+						</Button>
+						<Button onClick={handleIndexDriveFolder} disabled={!selectedFolderId}>
+							{t("start_indexing")}
+						</Button>
+					</DialogFooter>
+				</DialogContent>
+			</Dialog>
+
 			{/* Periodic Indexing Configuration Dialog */}
 			<Dialog open={periodicDialogOpen} onOpenChange={setPeriodicDialogOpen}>
 				<DialogContent className="sm:max-w-[500px]">

From e0edfef5fcce0d40e09505dd871a4f44bf7dad4a Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 16:48:34 +0200
Subject: [PATCH 24/39] feat(ui): add multiple folder selection with checkboxes
 to Google Drive tree

- Replace single folder selection with multi-select checkboxes
- Remove cascading auto-select for clearer UX
- Each folder must be selected individually
- Visual indicators for selected folders
---
 .../connectors/google-drive-folder-tree.tsx   | 118 +++++++++++-------
 1 file changed, 72 insertions(+), 46 deletions(-)

diff --git a/surfsense_web/components/connectors/google-drive-folder-tree.tsx b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
index 22ef97556..793fdc750 100644
--- a/surfsense_web/components/connectors/google-drive-folder-tree.tsx
+++ b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
@@ -15,6 +15,7 @@ import {
 } from "lucide-react";
 import { useState } from "react";
 import { Button } from "@/components/ui/button";
+import { Checkbox } from "@/components/ui/checkbox";
 import { ScrollArea } from "@/components/ui/scroll-area";
 import { cn } from "@/lib/utils";
 import { authenticatedFetch } from "@/lib/auth-utils";
@@ -36,10 +37,15 @@ interface ItemTreeNode {
 	isLoading: boolean;
 }
 
+interface SelectedFolder {
+	id: string;
+	name: string;
+}
+
 interface GoogleDriveFolderTreeProps {
 	connectorId: number;
-	selectedFolderId: string | null;
-	onSelectFolder: (folderId: string, folderName: string) => void;
+	selectedFolders: SelectedFolder[];
+	onSelectFolders: (folders: SelectedFolder[]) => void;
 }
 
 // Helper to get appropriate icon for file type
@@ -59,25 +65,32 @@ function getFileIcon(mimeType: string, className: string = "h-4 w-4") {
 	return <File className={`${className} text-gray-500`} />;
 }
 
-// Helper to format file size
-function formatFileSize(bytes: number | undefined): string {
-	if (!bytes) return "";
-	if (bytes < 1024) return `${bytes} B`;
-	if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
-	if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
-	return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
-}
-
 export function GoogleDriveFolderTree({
 	connectorId,
-	selectedFolderId,
-	onSelectFolder,
+	selectedFolders,
+	onSelectFolders,
 }: GoogleDriveFolderTreeProps) {
 	const [rootItems, setRootItems] = useState<DriveItem[]>([]);
 	const [itemStates, setItemStates] = useState<Map<string, ItemTreeNode>>(new Map());
 	const [isLoadingRoot, setIsLoadingRoot] = useState(false);
 	const [isInitialized, setIsInitialized] = useState(false);
 
+	// Helper to check if a folder is selected
+	const isFolderSelected = (folderId: string): boolean => {
+		return selectedFolders.some((f) => f.id === folderId);
+	};
+
+	// Handle folder checkbox toggle
+	const toggleFolderSelection = (folderId: string, folderName: string) => {
+		if (isFolderSelected(folderId)) {
+			// Remove from selection
+			onSelectFolders(selectedFolders.filter((f) => f.id !== folderId));
+		} else {
+			// Add to selection
+			onSelectFolders([...selectedFolders, { id: folderId, name: folderName }]);
+		}
+	};
+
 	// Load root items (folders and files) on mount
 	const loadRootItems = async () => {
 		if (isInitialized) return; // Already loaded
@@ -215,7 +228,7 @@ export function GoogleDriveFolderTree({
 		const isExpanded = state?.isExpanded || false;
 		const isLoading = state?.isLoading || false;
 		const children = state?.children;
-		const isSelected = selectedFolderId === item.id;
+		const isSelected = isFolderSelected(item.id);
 		const isFolder = item.isFolder;
 
 		// Separate folders and files for children
@@ -224,15 +237,13 @@ export function GoogleDriveFolderTree({
 
 		return (
 			<div key={item.id} className="w-full" style={{ marginLeft: `${level * 1.25}rem` }}>
-				<Button
-					variant="ghost"
+				<div
 					className={cn(
-						"w-full justify-start gap-2 h-auto py-2 px-2 font-normal overflow-hidden",
+						"flex items-center gap-2 h-auto py-2 px-2 rounded-md",
 						isFolder && "hover:bg-accent cursor-pointer",
-						!isFolder && "cursor-default opacity-70 hover:bg-transparent",
-						isSelected && isFolder && "bg-accent"
+						!isFolder && "cursor-default opacity-60",
+						isSelected && isFolder && "bg-accent/50"
 					)}
-					onClick={() => isFolder && onSelectFolder(item.id, item.name)}
 				>
 					{/* Expand/Collapse Icon (only for folders) */}
 					{isFolder ? (
@@ -255,35 +266,50 @@ export function GoogleDriveFolderTree({
 						<span className="w-4 h-4 shrink-0" /> // Empty space for alignment
 					)}
 
-					{/* Icon */}
-					{isFolder ? (
-						isExpanded ? (
-							<FolderOpen className="h-4 w-4 text-blue-500 shrink-0" />
-						) : (
-							<Folder className="h-4 w-4 text-gray-500 shrink-0" />
-						)
-					) : (
-						getFileIcon(item.mimeType, "h-4 w-4 shrink-0")
+					{/* Checkbox (only for folders) */}
+					{isFolder && (
+						<Checkbox
+							checked={isSelected}
+							onCheckedChange={() => toggleFolderSelection(item.id, item.name)}
+							className="shrink-0"
+							onClick={(e) => e.stopPropagation()}
+						/>
 					)}
 
+					{/* Icon */}
+					<div className="shrink-0" style={{ marginLeft: isFolder ? "0" : "1.25rem" }}>
+						{isFolder ? (
+							isExpanded ? (
+								<FolderOpen className="h-4 w-4 text-blue-500" />
+							) : (
+								<Folder className="h-4 w-4 text-gray-500" />
+							)
+						) : (
+							getFileIcon(item.mimeType, "h-4 w-4")
+						)}
+					</div>
+
 					{/* Item Name */}
-					<span className="truncate flex-1 text-left text-sm min-w-0">{item.name}</span>
-				</Button>
+					<span
+						className="truncate flex-1 text-left text-sm min-w-0"
+						onClick={() => isFolder && toggleFolder(item)}
+					>
+						{item.name}
+					</span>
+				</div>
 
 				{/* Render children if expanded (folders first, then files) */}
 				{isExpanded && isFolder && children && (
 					<div className="w-full">
 						{/* Render folders first */}
 						{childFolders.map((child) => renderItem(child, level + 1))}
-						
+
 						{/* Render files */}
 						{childFiles.map((child) => renderItem(child, level + 1))}
-						
+
 						{/* Empty state */}
 						{children.length === 0 && (
-							<div className="text-xs text-muted-foreground py-2 pl-2">
-								Empty folder
-							</div>
+							<div className="text-xs text-muted-foreground py-2 pl-2">Empty folder</div>
 						)}
 					</div>
 				)}
@@ -302,17 +328,17 @@ export function GoogleDriveFolderTree({
 				<div className="p-2 pr-4 w-full overflow-x-hidden">
 					{/* My Drive Header (always visible, selectable) */}
 					<div className="mb-2 pb-2 border-b">
-						<Button
-							variant="ghost"
-							className={cn(
-								"w-full justify-start gap-2 h-auto py-2 px-2 font-normal hover:bg-accent overflow-hidden",
-								selectedFolderId === "root" && "bg-accent"
-							)}
-							onClick={() => onSelectFolder("root", "My Drive")}
-						>
+						<div className="flex items-center gap-2 h-auto py-2 px-2 rounded-md hover:bg-accent cursor-pointer">
+							<Checkbox
+								checked={isFolderSelected("root")}
+								onCheckedChange={() => toggleFolderSelection("root", "My Drive")}
+								className="shrink-0"
+							/>
 							<HardDrive className="h-4 w-4 text-primary shrink-0" />
-							<span className="font-semibold truncate">My Drive</span>
-						</Button>
+							<span className="font-semibold truncate" onClick={() => toggleFolderSelection("root", "My Drive")}>
+								My Drive
+							</span>
+						</div>
 					</div>
 
 					{/* Loading indicator */}

From 27a4bcdfc20466f936c0e4a3cf608264aa89b0f4 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 16:48:56 +0200
Subject: [PATCH 25/39] feat(ui): support multiple folder selection in Google
 Drive indexing

- Update manage page to handle array of selected folders
- Add info icon with clear description about folder-level indexing
- Display list of all selected folders before indexing
- Remove unnecessary file type details section
- Pass comma-separated folder IDs and names to backend
---
 .../connectors/(manage)/page.tsx              | 128 +++++++++---------
 1 file changed, 61 insertions(+), 67 deletions(-)

diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
index fd1f7da1d..bbbfd61e0 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
@@ -7,6 +7,7 @@ import {
 	Edit,
 	Folder,
 	HardDrive,
+	Info,
 	Loader2,
 	Plus,
 	RefreshCw,
@@ -117,8 +118,7 @@ export default function ConnectorsPage() {
 	// Google Drive folder selection state
 	const [driveFolderDialogOpen, setDriveFolderDialogOpen] = useState(false);
 	const [driveFolders, setDriveFolders] = useState<DriveFolder[]>([]);
-	const [selectedFolderId, setSelectedFolderId] = useState<string>("");
-	const [selectedFolderName, setSelectedFolderName] = useState<string>("");
+	const [selectedFolders, setSelectedFolders] = useState<Array<{ id: string; name: string }>>([]);
 	const [isLoadingFolders, setIsLoadingFolders] = useState(false);
 
 	useEffect(() => {
@@ -186,8 +186,8 @@ export default function ConnectorsPage() {
 
 	// Handle Google Drive folder indexing
 	const handleIndexDriveFolder = async () => {
-		if (selectedConnectorForIndexing === null || !selectedFolderId) {
-			toast.error("Please select a folder");
+		if (selectedConnectorForIndexing === null || selectedFolders.length === 0) {
+			toast.error("Please select at least one folder");
 			return;
 		}
 
@@ -195,28 +195,26 @@ export default function ConnectorsPage() {
 
 		try {
 			setIndexingConnectorId(selectedConnectorForIndexing);
-			const selectedFolder = driveFolders.find((f) => f.id === selectedFolderId);
-			const folderName = selectedFolder?.name || "Selected Folder";
 
-			// Call indexConnector with folder_id and folder_name as query params
+			// Call indexConnector with folder_ids and folder_names as query params
 			await indexConnector(
 				selectedConnectorForIndexing,
 				searchSpaceId,
 				undefined,
 				undefined,
-				selectedFolderId,
-				folderName
+				selectedFolders.map((f) => f.id).join(","),
+				selectedFolders.map((f) => f.name).join(", ")
 			);
 			toast.success(t("indexing_started"));
 		} catch (error) {
 			console.error("Error indexing connector content:", error);
 			toast.error(error instanceof Error ? error.message : t("indexing_failed"));
 		} finally {
-			setIndexingConnectorId(null);
-			setSelectedConnectorForIndexing(null);
-			setSelectedFolderId("");
-			setDriveFolders([]);
-		}
+		setIndexingConnectorId(null);
+		setSelectedConnectorForIndexing(null);
+		setSelectedFolders([]);
+		setDriveFolders([]);
+	}
 	};
 
 	// Handle connector indexing with dates
@@ -683,66 +681,62 @@ export default function ConnectorsPage() {
 			{/* Google Drive Folder Selection Dialog */}
 			<Dialog open={driveFolderDialogOpen} onOpenChange={setDriveFolderDialogOpen}>
 				<DialogContent className="w-auto max-w-full">
-					<DialogHeader>
-						<DialogTitle>Select Google Drive Folder</DialogTitle>
-						<DialogDescription className="text-sm">
-							Browse and select a folder to index. Click folders to expand and see subfolders.
-						</DialogDescription>
-					</DialogHeader>
-					<div className="grid gap-4 py-4 overflow-hidden w-full">
+				<DialogHeader>
+					<DialogTitle>Select Google Drive Folders</DialogTitle>
+					<DialogDescription className="flex items-start gap-2 text-sm p-2 border mt-1 rounded ">
+						<Info className="h-4 w-4 shrink-0 text-blue-500" />
+						<span>
+							Select folders to index. Only files <strong>directly in each folder</strong> will be
+							processed—subfolders must be selected separately.
+						</span>
+					</DialogDescription>
+				</DialogHeader>
+					<div className="grid gap-4 overflow-hidden w-full">
 						<div className="space-y-3 w-full overflow-hidden">
 							<Label>Browse Folders</Label>
-							{selectedConnectorForIndexing && (
-								<GoogleDriveFolderTree
-									connectorId={selectedConnectorForIndexing}
-									selectedFolderId={selectedFolderId}
-									onSelectFolder={(folderId, folderName) => {
-										setSelectedFolderId(folderId);
-										setSelectedFolderName(folderName);
-									}}
-								/>
-							)}
-							<p className="text-xs text-muted-foreground">
-								Changes to files in this folder will be automatically detected and re-indexed.
-							</p>
-						</div>
-						{selectedFolderId && selectedFolderName && (
-							<div className="p-3 bg-muted rounded-lg text-sm space-y-2">
-								<div>
-									<p className="font-medium mb-1">Selected folder:</p>
-									<p className="text-sm text-muted-foreground truncate" title={selectedFolderName}>
-										{selectedFolderName}
-									</p>
-								</div>
-								<div>
-									<p className="font-medium mb-1">What will be indexed:</p>
-									<ul className="list-disc list-inside space-y-1 text-muted-foreground text-xs">
-										<li>Google Docs, Sheets, Slides (as PDFs)</li>
-										<li>PDFs, Word, Excel, PowerPoint files</li>
-										<li>Text files, markdown, code files</li>
-										<li>Images (with OCR if enabled)</li>
-									</ul>
-								</div>
-							</div>
+						{selectedConnectorForIndexing && (
+							<GoogleDriveFolderTree
+								connectorId={selectedConnectorForIndexing}
+								selectedFolders={selectedFolders}
+								onSelectFolders={(folders) => {
+									setSelectedFolders(folders);
+								}}
+							/>
 						)}
 					</div>
+					{selectedFolders.length > 0 && (
+						<div className="p-3 bg-muted rounded-lg text-sm space-y-2">
+							<div>
+								<p className="font-medium mb-1">
+									Selected {selectedFolders.length} folder{selectedFolders.length > 1 ? "s" : ""}:
+								</p>
+								<div className="max-h-24 overflow-y-auto">
+									{selectedFolders.map((folder) => (
+										<p key={folder.id} className="text-sm text-muted-foreground truncate" title={folder.name}>
+											• {folder.name}
+										</p>
+									))}
+								</div>
+							</div>
+						</div>
+					)}
+				</div>
 					<DialogFooter>
 						<Button
 							variant="outline"
-							onClick={() => {
-								setDriveFolderDialogOpen(false);
-								setSelectedConnectorForIndexing(null);
-								setSelectedFolderId("");
-								setSelectedFolderName("");
-								setDriveFolders([]);
-							}}
-						>
-							{tCommon("cancel")}
-						</Button>
-						<Button onClick={handleIndexDriveFolder} disabled={!selectedFolderId}>
-							{t("start_indexing")}
-						</Button>
-					</DialogFooter>
+						onClick={() => {
+							setDriveFolderDialogOpen(false);
+							setSelectedConnectorForIndexing(null);
+							setSelectedFolders([]);
+							setDriveFolders([]);
+						}}
+					>
+						{tCommon("cancel")}
+					</Button>
+					<Button onClick={handleIndexDriveFolder} disabled={selectedFolders.length === 0}>
+						{t("start_indexing")}
+					</Button>
+				</DialogFooter>
 				</DialogContent>
 			</Dialog>
 

From 634eeb887e35ebc173c2de43e255d0d3739021e1 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 16:49:20 +0200
Subject: [PATCH 26/39] feat(routes): support multiple Google Drive folder
 indexing

- Accept comma-separated folder_ids and folder_names
- Loop through each folder and index sequentially
- Collect total indexed count and errors
- Update timestamp only on full success
---
 .../routes/search_source_connectors_routes.py | 56 +++++++++++++------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index d530163f4..af1f18513 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -1548,35 +1548,55 @@ async def run_google_drive_indexing(
     connector_id: int,
     search_space_id: int,
     user_id: str,
-    folder_id: str,
-    folder_name: str,
+    folder_ids: str,  # Comma-separated folder IDs
+    folder_names: str,  # Comma-separated folder names
 ):
-    """Runs the Google Drive indexing task and updates the timestamp."""
+    """Runs the Google Drive indexing task for multiple folders and updates the timestamp."""
     try:
         from app.tasks.connector_indexers.google_drive_indexer import (
             index_google_drive_files,
         )
 
-        indexed_count, error_message = await index_google_drive_files(
-            session,
-            connector_id,
-            search_space_id,
-            user_id,
-            folder_id,
-            folder_name,
-            use_delta_sync=True,
-            update_last_indexed=False,
-        )
-        if error_message:
+        # Split comma-separated IDs and names into lists
+        folder_id_list = [fid.strip() for fid in folder_ids.split(",")]
+        folder_name_list = [fname.strip() for fname in folder_names.split(",")]
+
+        total_indexed = 0
+        errors = []
+
+        # Index each folder
+        for folder_id, folder_name in zip(folder_id_list, folder_name_list):
+            try:
+                indexed_count, error_message = await index_google_drive_files(
+                    session,
+                    connector_id,
+                    search_space_id,
+                    user_id,
+                    folder_id,
+                    folder_name,
+                    use_delta_sync=True,
+                    update_last_indexed=False,
+                )
+                if error_message:
+                    errors.append(f"{folder_name}: {error_message}")
+                else:
+                    total_indexed += indexed_count
+            except Exception as e:
+                errors.append(f"{folder_name}: {str(e)}")
+                logger.error(
+                    f"Error indexing folder {folder_name} ({folder_id}): {e}",
+                    exc_info=True,
+                )
+
+        if errors:
             logger.error(
-                f"Google Drive indexing failed for connector {connector_id}: {error_message}"
+                f"Google Drive indexing completed with errors for connector {connector_id}: {'; '.join(errors)}"
             )
-            # Optionally update status in DB to indicate failure
         else:
             logger.info(
-                f"Google Drive indexing successful for connector {connector_id}. Indexed {indexed_count} documents."
+                f"Google Drive indexing successful for connector {connector_id}. Indexed {total_indexed} documents from {len(folder_id_list)} folder(s)."
             )
-            # Update the last indexed timestamp only on success
+            # Update the last indexed timestamp only on full success
             await update_connector_last_indexed(session, connector_id)
             await session.commit()  # Commit timestamp update
     except Exception as e:

From c9815fd6fb78037629409dd25673807122514dc4 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 16:49:47 +0200
Subject: [PATCH 27/39] feat(celery): update Google Drive task for multiple
 folders

- Accept comma-separated folder_ids and folder_names parameters
- Pass through to indexing function for batch processing
---
 .../app/tasks/celery_tasks/connector_tasks.py | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
index 8e507915f..44f57d464 100644
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@@ -479,10 +479,10 @@ def index_google_drive_files_task(
     connector_id: int,
     search_space_id: int,
     user_id: str,
-    folder_id: str,
-    folder_name: str,
+    folder_ids: str,  # Comma-separated folder IDs
+    folder_names: str,  # Comma-separated folder names
 ):
-    """Celery task to index Google Drive files."""
+    """Celery task to index Google Drive files from multiple folders."""
     import asyncio
 
     loop = asyncio.new_event_loop()
@@ -494,8 +494,8 @@ def index_google_drive_files_task(
                 connector_id,
                 search_space_id,
                 user_id,
-                folder_id,
-                folder_name,
+                folder_ids,
+                folder_names,
             )
         )
     finally:
@@ -506,10 +506,10 @@ async def _index_google_drive_files(
     connector_id: int,
     search_space_id: int,
     user_id: str,
-    folder_id: str,
-    folder_name: str,
+    folder_ids: str,  # Comma-separated folder IDs
+    folder_names: str,  # Comma-separated folder names
 ):
-    """Index Google Drive files with new session."""
+    """Index Google Drive files from multiple folders with new session."""
     from app.routes.search_source_connectors_routes import (
         run_google_drive_indexing,
     )
@@ -520,8 +520,8 @@ async def _index_google_drive_files(
             connector_id,
             search_space_id,
             user_id,
-            folder_id,
-            folder_name,
+            folder_ids,
+            folder_names,
         )
 
 

From 9f1fd20944d46a9475ec68b826addcfb3ce61f6c Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 16:55:14 +0200
Subject: [PATCH 28/39] feat(connectors): mark Google Drive documents with
 GOOGLE_DRIVE_CONNECTOR type

- Change document_type from file type (PDF, DOCX) to GOOGLE_DRIVE_CONNECTOR
- Store original file type in metadata for reference
- Add Google Drive specific metadata (file_id, mime_type, source)
- Include export format info for Google Workspace files
- Enables proper source tracking and bulk management
---
 .../google_drive/content_extractor.py         | 34 +++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
index 82b8d42b3..88aca8f46 100644
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -94,7 +94,7 @@ async def download_and_process_file(
         )
 
         logger.info(f"Processing {file_name} with Surfsense's file processor")
-        result = await process_file_in_background(
+        document = await process_file_in_background(
             file_path=temp_file_path,
             filename=file_name,
             search_space_id=search_space_id,
@@ -104,8 +104,38 @@ async def download_and_process_file(
             log_entry=log_entry,
         )
 
+        # Step 3: Update document type to GOOGLE_DRIVE_CONNECTOR and add metadata
+        if document:
+            from app.db import DocumentType
+
+            # Store original file type in metadata before changing document_type
+            original_type = document.document_type
+            
+            # Update document type to mark it as from Google Drive
+            document.document_type = DocumentType.GOOGLE_DRIVE_CONNECTOR
+            
+            # Add Google Drive specific metadata
+            if not document.metadata:
+                document.metadata = {}
+            
+            document.metadata.update({
+                "google_drive_file_id": file_id,
+                "google_drive_file_name": file_name,
+                "google_drive_mime_type": mime_type,
+                "original_document_type": original_type,
+                "source_connector": "google_drive",
+            })
+            
+            # If it was a Google Workspace file, note the export format
+            if is_google_workspace_file(mime_type):
+                document.metadata["exported_as"] = "pdf"
+                document.metadata["original_workspace_type"] = mime_type.split(".")[-1]  # e.g., "document", "spreadsheet"
+            
+            await session.flush()  # Persist the changes
+            logger.info(f"Updated document type to GOOGLE_DRIVE_CONNECTOR for {file_name}")
+
         # process_file_in_background returns None on duplicate/error, Document on success
-        return result, None
+        return document, None
 
     except Exception as e:
         logger.warning(f"Failed to process {file_name}: {e!s}")

From b2b891e4d746b0d2add1f7f3bf0fb6f341e9ee85 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 17:15:29 +0200
Subject: [PATCH 29/39] fix(connectors): properly commit Google Drive document
 type changes

- Return file metadata from content_extractor for indexer to use
- Update document type and metadata in indexer after processing
- Explicitly commit changes to database
- Ensures documents are properly marked as GOOGLE_DRIVE_CONNECTOR type
---
 .../google_drive/content_extractor.py         | 55 +++++++------------
 .../google_drive_indexer.py                   | 26 ++++++++-
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
index 88aca8f46..005e7b0ae 100644
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -29,7 +29,7 @@ async def download_and_process_file(
     session: AsyncSession,
     task_logger: TaskLoggingService,
     log_entry: Log,
-) -> tuple[Any, str | None]:
+) -> tuple[Any, str | None, dict[str, Any] | None]:
     """
     Download Google Drive file and process using Surfsense's existing infrastructure.
 
@@ -45,7 +45,7 @@ async def download_and_process_file(
         log_entry: Log entry for tracking
 
     Returns:
-        Tuple of (Document object if successful, error message if failed)
+        Tuple of (Document object if successful, error message if failed, file metadata dict)
     """
     file_id = file.get("id")
     file_name = file.get("name", "Unknown")
@@ -53,7 +53,7 @@ async def download_and_process_file(
 
     # Skip folders and shortcuts
     if should_skip_file(mime_type):
-        return None, f"Skipping {mime_type}"
+        return None, f"Skipping {mime_type}", None
 
     logger.info(f"Downloading file: {file_name} ({mime_type})")
 
@@ -104,42 +104,27 @@ async def download_and_process_file(
             log_entry=log_entry,
         )
 
-        # Step 3: Update document type to GOOGLE_DRIVE_CONNECTOR and add metadata
-        if document:
-            from app.db import DocumentType
-
-            # Store original file type in metadata before changing document_type
-            original_type = document.document_type
-            
-            # Update document type to mark it as from Google Drive
-            document.document_type = DocumentType.GOOGLE_DRIVE_CONNECTOR
-            
-            # Add Google Drive specific metadata
-            if not document.metadata:
-                document.metadata = {}
-            
-            document.metadata.update({
-                "google_drive_file_id": file_id,
-                "google_drive_file_name": file_name,
-                "google_drive_mime_type": mime_type,
-                "original_document_type": original_type,
-                "source_connector": "google_drive",
-            })
-            
-            # If it was a Google Workspace file, note the export format
-            if is_google_workspace_file(mime_type):
-                document.metadata["exported_as"] = "pdf"
-                document.metadata["original_workspace_type"] = mime_type.split(".")[-1]  # e.g., "document", "spreadsheet"
-            
-            await session.flush()  # Persist the changes
-            logger.info(f"Updated document type to GOOGLE_DRIVE_CONNECTOR for {file_name}")
-
+        # Note: Document type update happens in the indexer after this returns
+        # to ensure proper session management and commit timing
+        
+        # Prepare file metadata for the indexer to use
+        file_metadata = {
+            "google_drive_file_id": file_id,
+            "google_drive_file_name": file_name,
+            "google_drive_mime_type": mime_type,
+        }
+        
+        # If it was a Google Workspace file, note the export format
+        if is_google_workspace_file(mime_type):
+            file_metadata["exported_as"] = "pdf"
+            file_metadata["original_workspace_type"] = mime_type.split(".")[-1]  # e.g., "document", "spreadsheet"
+        
         # process_file_in_background returns None on duplicate/error, Document on success
-        return document, None
+        return document, None, file_metadata
 
     except Exception as e:
         logger.warning(f"Failed to process {file_name}: {e!s}")
-        return None, str(e)
+        return None, str(e), None
 
     finally:
         # Cleanup temp file (if process_file_in_background didn't already delete it)
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 9c4d446de..9ed295424 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -388,7 +388,7 @@ async def _process_single_file(
         # Download and process using Surfsense's existing infrastructure
         # This handles: markdown, audio, PDFs, Office docs, images, etc.
         # It also handles: deduplication, chunking, summarization, embedding
-        document, error = await download_and_process_file(
+        document, error, file_metadata = await download_and_process_file(
             client=drive_client,
             file=file,
             search_space_id=search_space_id,
@@ -407,7 +407,28 @@ async def _process_single_file(
             )
             return 0, 1
 
-        if document:
+        if document and file_metadata:
+            # Update document type to GOOGLE_DRIVE_CONNECTOR and add metadata
+            original_type = document.document_type
+            document.document_type = DocumentType.GOOGLE_DRIVE_CONNECTOR
+            
+            # Add Google Drive specific metadata
+            if not document.metadata:
+                document.metadata = {}
+            
+            document.metadata.update({
+                **file_metadata,
+                "original_document_type": original_type,
+                "source_connector": "google_drive",
+            })
+            
+            # Commit the document type and metadata changes
+            await session.commit()
+            
+            logger.info(
+                f"Updated document {document.id} to GOOGLE_DRIVE_CONNECTOR type with metadata"
+            )
+            
             # Successfully indexed
             await task_logger.log_task_progress(
                 log_entry,
@@ -416,6 +437,7 @@ async def _process_single_file(
                     "status": "indexed",
                     "document_id": document.id,
                     "file_name": file_name,
+                    "document_type": DocumentType.GOOGLE_DRIVE_CONNECTOR,
                 },
             )
             return 1, 0

From 8da58be9e01406161b99d73bc6521b0f45511f16 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 17:21:44 +0200
Subject: [PATCH 30/39] fix(connectors): refresh document from DB before
 updating type

- Query document from database to ensure it's attached to session
- Prevents detached instance errors after process_file_in_background commits
- Properly updates document_type and metadata with session management
---
 .../connector_indexers/google_drive_indexer.py     | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 9ed295424..190792f1a 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -408,6 +408,20 @@ async def _process_single_file(
             return 0, 1
 
         if document and file_metadata:
+            # Refresh document from database to ensure it's attached to session
+            from app.db import Document
+            from sqlalchemy import select
+            
+            # Get fresh document from database
+            result = await session.execute(
+                select(Document).where(Document.id == document.id)
+            )
+            document = result.scalar_one_or_none()
+            
+            if not document:
+                logger.error(f"Could not find document {document.id} in database")
+                return 0, 1
+            
             # Update document type to GOOGLE_DRIVE_CONNECTOR and add metadata
             original_type = document.document_type
             document.document_type = DocumentType.GOOGLE_DRIVE_CONNECTOR

From a5935bc6775d13e9c321e49a0ef6809012042f1a Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 18:01:39 +0200
Subject: [PATCH 31/39] feat(connectors): add connector parameter to file
 processor for source tracking

- Add optional 'connector' parameter with 'type' and 'metadata' fields
- Create helper function _update_document_from_connector
- Use document_metadata column (not metadata) for JSON field
- Merge metadata with existing using dict spread operator
- Google Drive documents now marked as GOOGLE_DRIVE_CONNECTOR
- Backward compatible - no changes to existing logic
- Simple and clean implementation
---
 .../google_drive/content_extractor.py         | 39 +++++++------
 .../google_drive_indexer.py                   | 58 ++-----------------
 .../document_processors/file_processors.py    | 34 +++++++++++
 3 files changed, 60 insertions(+), 71 deletions(-)

diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
index 005e7b0ae..04c48f47f 100644
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -92,9 +92,26 @@ async def download_and_process_file(
         from app.tasks.document_processors.file_processors import (
             process_file_in_background,
         )
+        from app.db import DocumentType
+
+        # Prepare connector info
+        connector_info = {
+            "type": DocumentType.GOOGLE_DRIVE_CONNECTOR,
+            "metadata": {
+                "google_drive_file_id": file_id,
+                "google_drive_file_name": file_name,
+                "google_drive_mime_type": mime_type,
+                "source_connector": "google_drive",
+            },
+        }
+        
+        # If it was a Google Workspace file, note the export format
+        if is_google_workspace_file(mime_type):
+            connector_info["metadata"]["exported_as"] = "pdf"
+            connector_info["metadata"]["original_workspace_type"] = mime_type.split(".")[-1]
 
         logger.info(f"Processing {file_name} with Surfsense's file processor")
-        document = await process_file_in_background(
+        await process_file_in_background(
             file_path=temp_file_path,
             filename=file_name,
             search_space_id=search_space_id,
@@ -102,25 +119,11 @@ async def download_and_process_file(
             session=session,
             task_logger=task_logger,
             log_entry=log_entry,
+            connector=connector_info,  # Pass connector info
         )
 
-        # Note: Document type update happens in the indexer after this returns
-        # to ensure proper session management and commit timing
-        
-        # Prepare file metadata for the indexer to use
-        file_metadata = {
-            "google_drive_file_id": file_id,
-            "google_drive_file_name": file_name,
-            "google_drive_mime_type": mime_type,
-        }
-        
-        # If it was a Google Workspace file, note the export format
-        if is_google_workspace_file(mime_type):
-            file_metadata["exported_as"] = "pdf"
-            file_metadata["original_workspace_type"] = mime_type.split(".")[-1]  # e.g., "document", "spreadsheet"
-        
-        # process_file_in_background returns None on duplicate/error, Document on success
-        return document, None, file_metadata
+        # process_file_in_background doesn't return the document
+        return None, None, connector_info["metadata"]
 
     except Exception as e:
         logger.warning(f"Failed to process {file_name}: {e!s}")
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 190792f1a..a2899853e 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -388,7 +388,8 @@ async def _process_single_file(
         # Download and process using Surfsense's existing infrastructure
         # This handles: markdown, audio, PDFs, Office docs, images, etc.
         # It also handles: deduplication, chunking, summarization, embedding
-        document, error, file_metadata = await download_and_process_file(
+        # Document type is set to GOOGLE_DRIVE_CONNECTOR during processing
+        _, error, _ = await download_and_process_file(
             client=drive_client,
             file=file,
             search_space_id=search_space_id,
@@ -407,58 +408,9 @@ async def _process_single_file(
             )
             return 0, 1
 
-        if document and file_metadata:
-            # Refresh document from database to ensure it's attached to session
-            from app.db import Document
-            from sqlalchemy import select
-            
-            # Get fresh document from database
-            result = await session.execute(
-                select(Document).where(Document.id == document.id)
-            )
-            document = result.scalar_one_or_none()
-            
-            if not document:
-                logger.error(f"Could not find document {document.id} in database")
-                return 0, 1
-            
-            # Update document type to GOOGLE_DRIVE_CONNECTOR and add metadata
-            original_type = document.document_type
-            document.document_type = DocumentType.GOOGLE_DRIVE_CONNECTOR
-            
-            # Add Google Drive specific metadata
-            if not document.metadata:
-                document.metadata = {}
-            
-            document.metadata.update({
-                **file_metadata,
-                "original_document_type": original_type,
-                "source_connector": "google_drive",
-            })
-            
-            # Commit the document type and metadata changes
-            await session.commit()
-            
-            logger.info(
-                f"Updated document {document.id} to GOOGLE_DRIVE_CONNECTOR type with metadata"
-            )
-            
-            # Successfully indexed
-            await task_logger.log_task_progress(
-                log_entry,
-                f"Successfully indexed: {file_name}",
-                {
-                    "status": "indexed",
-                    "document_id": document.id,
-                    "file_name": file_name,
-                    "document_type": DocumentType.GOOGLE_DRIVE_CONNECTOR,
-                },
-            )
-            return 1, 0
-        else:
-            # Likely a duplicate or unsupported type
-            logger.info(f"No document created for {file_name} (duplicate or unsupported)")
-            return 0, 1
+        # File was processed successfully (document type already set in processor)
+        logger.info(f"Successfully indexed Google Drive file: {file_name}")
+        return 1, 0
 
     except Exception as e:
         logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True)
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index a32e75a32..61f484ae1 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -447,6 +447,24 @@ async def add_received_file_document_using_docling(
         ) from e
 
 
+async def _update_document_from_connector(
+    document: Document | None, connector: dict | None, session: AsyncSession
+) -> None:
+    """Helper to update document type and metadata from connector info."""
+    if document and connector:
+        if "type" in connector:
+            document.document_type = connector["type"]
+        if "metadata" in connector:
+            # Merge with existing document_metadata (the actual column name)
+            if not document.document_metadata:
+                document.document_metadata = connector["metadata"]
+            else:
+                # Expand existing metadata with connector metadata
+                merged = {**document.document_metadata, **connector["metadata"]}
+                document.document_metadata = merged
+        await session.commit()
+
+
 async def process_file_in_background(
     file_path: str,
     filename: str,
@@ -455,6 +473,7 @@ async def process_file_in_background(
     session: AsyncSession,
     task_logger: TaskLoggingService,
     log_entry: Log,
+    connector: dict | None = None,  # Optional: {"type": "GOOGLE_DRIVE_CONNECTOR", "metadata": {...}}
 ):
     try:
         # Check if the file is a markdown or text file
@@ -492,6 +511,9 @@ async def process_file_in_background(
                 session, filename, markdown_content, search_space_id, user_id
             )
 
+            # Update from connector if provided
+            await _update_document_from_connector(result, connector, session)
+
             if result:
                 await task_logger.log_task_success(
                     log_entry,
@@ -608,6 +630,9 @@ async def process_file_in_background(
                 session, filename, transcribed_text, search_space_id, user_id
             )
 
+            # Update from connector if provided
+            await _update_document_from_connector(result, connector, session)
+
             if result:
                 await task_logger.log_task_success(
                     log_entry,
@@ -753,6 +778,9 @@ async def process_file_in_background(
                     session, filename, docs, search_space_id, user_id
                 )
 
+                # Update from connector if provided
+                await _update_document_from_connector(result, connector, session)
+
                 if result:
                     # Update page usage after successful processing
                     # allow_exceed=True because document was already created after passing initial check
@@ -897,6 +925,9 @@ async def process_file_in_background(
                         user_id, final_page_count, allow_exceed=True
                     )
 
+                    # Update from connector if provided
+                    await _update_document_from_connector(last_created_doc, connector, session)
+
                     await task_logger.log_task_success(
                         log_entry,
                         f"Successfully processed file with LlamaCloud: {filename}",
@@ -1021,6 +1052,9 @@ async def process_file_in_background(
                         user_id, final_page_count, allow_exceed=True
                     )
 
+                    # Update from connector if provided
+                    await _update_document_from_connector(doc_result, connector, session)
+
                     await task_logger.log_task_success(
                         log_entry,
                         f"Successfully processed file with Docling: {filename}",

From 506a9297a90c6fcf64a983a8b9d850c9398ad7dc Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 18:32:59 +0200
Subject: [PATCH 32/39] fix(connectors): track delta sync tokens per folder for
 Google Drive

- Store tokens in folder_tokens dict instead of single global token
- Each folder now tracks its own sync state independently
- Fixes issue where indexing folder 2 incorrectly used delta sync after folder 1 was indexed
- First-time indexing now correctly uses full scan for each new folder
---
 .../tasks/connector_indexers/google_drive_indexer.py   | 10 +++++++---
 surfsense_web/contracts/types/document.types.ts        |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index a2899853e..335c3b41d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -112,8 +112,9 @@ async def index_google_drive_files(
 
         logger.info(f"Indexing Google Drive folder: {target_folder_name} ({target_folder_id})")
 
-        # Decide sync strategy
-        start_page_token = connector.config.get("start_page_token")
+        # Decide sync strategy - track tokens per folder
+        folder_tokens = connector.config.get("folder_tokens", {})
+        start_page_token = folder_tokens.get(target_folder_id)
         can_use_delta_sync = use_delta_sync and start_page_token and connector.last_indexed_at
 
         if can_use_delta_sync:
@@ -156,7 +157,10 @@ async def index_google_drive_files(
             if new_token and not token_error:
                 from sqlalchemy.orm.attributes import flag_modified
 
-                connector.config["start_page_token"] = new_token
+                # Store token per folder
+                if "folder_tokens" not in connector.config:
+                    connector.config["folder_tokens"] = {}
+                connector.config["folder_tokens"][target_folder_id] = new_token
                 flag_modified(connector, "config")
 
             await update_connector_last_indexed(session, connector, update_last_indexed)
diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts
index 3ce5388dd..b2cdb79c3 100644
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@@ -15,6 +15,7 @@ export const documentTypeEnum = z.enum([
 	"CLICKUP_CONNECTOR",
 	"GOOGLE_CALENDAR_CONNECTOR",
 	"GOOGLE_GMAIL_CONNECTOR",
+	"GOOGLE_DRIVE_CONNECTOR",
 	"AIRTABLE_CONNECTOR",
 	"LUMA_CONNECTOR",
 	"ELASTICSEARCH_CONNECTOR",

From acf47e3b0cb6b4ba24defee4d38f07b10abad493 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 18:53:13 +0200
Subject: [PATCH 33/39] refactor(connectors): remove verbose docstrings and
 obvious comments

- Simplify module docstrings (remove meta-commentary about 'small focused modules')
- Remove redundant inline comments (e.g., 'Log task start', 'Get connector from database')
- Trim verbose function docstrings to essential information only
- Remove over-explanatory comments that restate what code does
- Keep necessary documentation, remove noise for better readability
---
 .../app/connectors/google_drive/__init__.py   |  6 +---
 .../connectors/google_drive/change_tracker.py | 10 +-----
 .../app/connectors/google_drive/client.py     | 15 ++-------
 .../google_drive/content_extractor.py         | 20 ++----------
 .../connectors/google_drive/credentials.py    | 13 +-------
 .../app/connectors/google_drive/file_types.py |  9 +-----
 .../connectors/google_drive/folder_manager.py | 17 ++--------
 .../google_drive_indexer.py                   | 32 +------------------
 8 files changed, 12 insertions(+), 110 deletions(-)

diff --git a/surfsense_backend/app/connectors/google_drive/__init__.py b/surfsense_backend/app/connectors/google_drive/__init__.py
index c50135155..6e0d25725 100644
--- a/surfsense_backend/app/connectors/google_drive/__init__.py
+++ b/surfsense_backend/app/connectors/google_drive/__init__.py
@@ -1,8 +1,4 @@
-"""
-Google Drive Connector Module.
-
-Simple, modular approach to Google Drive indexing.
-"""
+"""Google Drive Connector Module."""
 
 from .change_tracker import categorize_change, fetch_all_changes, get_start_page_token
 from .client import GoogleDriveClient
diff --git a/surfsense_backend/app/connectors/google_drive/change_tracker.py b/surfsense_backend/app/connectors/google_drive/change_tracker.py
index 1c697af5f..860e2dbef 100644
--- a/surfsense_backend/app/connectors/google_drive/change_tracker.py
+++ b/surfsense_backend/app/connectors/google_drive/change_tracker.py
@@ -1,9 +1,4 @@
-"""
-Change Tracking for Google Drive - Delta Sync Support.
-
-Handles change detection and incremental syncing using Drive API's changes endpoint.
-Small, focused module for tracking file modifications.
-"""
+"""Change tracking for Google Drive delta sync."""
 
 import logging
 from datetime import datetime
@@ -110,7 +105,6 @@ async def _filter_changes_by_folder(
     for change in changes:
         file = change.get("file")
         if not file:
-            # File was removed
             filtered.append(change)
             continue
 
@@ -147,7 +141,6 @@ def categorize_change(change: dict[str, Any]) -> str:
     if file.get("trashed"):
         return "trashed"
 
-    # Check if file was recently created
     created_time = file.get("createdTime")
     modified_time = file.get("modifiedTime")
 
@@ -198,7 +191,6 @@ async def fetch_all_changes(
 
             all_changes.extend(changes)
 
-            # If next_token is None, we've reached the end
             if not next_token or next_token == current_token:
                 break
 
diff --git a/surfsense_backend/app/connectors/google_drive/client.py b/surfsense_backend/app/connectors/google_drive/client.py
index 6d2d0abfd..5053aa449 100644
--- a/surfsense_backend/app/connectors/google_drive/client.py
+++ b/surfsense_backend/app/connectors/google_drive/client.py
@@ -1,9 +1,4 @@
-"""
-Google Drive API Client.
-
-Core client for interacting with Google Drive API.
-Handles service initialization and basic file operations.
-"""
+"""Google Drive API client."""
 
 from typing import Any
 
@@ -16,12 +11,7 @@ from .credentials import get_valid_credentials
 
 
 class GoogleDriveClient:
-    """
-    Main client for Google Drive API operations.
-
-    Handles service initialization and provides methods for
-    listing files, getting metadata, and downloading content.
-    """
+    """Client for Google Drive API operations."""
 
     def __init__(self, session: AsyncSession, connector_id: int):
         """
@@ -140,7 +130,6 @@ class GoogleDriveClient:
             service = await self.get_service()
             request = service.files().get_media(fileId=file_id)
 
-            # Execute the download
             import io
 
             fh = io.BytesIO()
diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
index 04c48f47f..00211957a 100644
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -1,8 +1,4 @@
-"""
-Content Extraction for Google Drive Files.
-
-Downloads files and delegates to Surfsense's existing file processors.
-"""
+"""Content extraction for Google Drive files."""
 
 import logging
 import os
@@ -31,9 +27,7 @@ async def download_and_process_file(
     log_entry: Log,
 ) -> tuple[Any, str | None, dict[str, Any] | None]:
     """
-    Download Google Drive file and process using Surfsense's existing infrastructure.
-
-    This is the ONLY function needed - it delegates everything to process_file_in_background.
+    Download Google Drive file and process using Surfsense file processors.
 
     Args:
         client: GoogleDriveClient instance
@@ -71,10 +65,8 @@ async def download_and_process_file(
             if error:
                 return None, error
 
-            # Set extension based on export format
             extension = ".pdf" if export_mime == "application/pdf" else ".txt"
         else:
-            # Regular files - download directly
             content_bytes, error = await client.download_file(file_id)
             if error:
                 return None, error
@@ -82,19 +74,15 @@ async def download_and_process_file(
             # Preserve original file extension
             extension = Path(file_name).suffix or ".bin"
 
-        # Save to temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp_file:
             tmp_file.write(content_bytes)
             temp_file_path = tmp_file.name
 
-        # Step 2: Delegate to Surfsense's existing file processor
-        # This handles ALL file types: markdown, audio, PDFs, Office docs, images, etc.
         from app.tasks.document_processors.file_processors import (
             process_file_in_background,
         )
         from app.db import DocumentType
 
-        # Prepare connector info
         connector_info = {
             "type": DocumentType.GOOGLE_DRIVE_CONNECTOR,
             "metadata": {
@@ -105,7 +93,6 @@ async def download_and_process_file(
             },
         }
         
-        # If it was a Google Workspace file, note the export format
         if is_google_workspace_file(mime_type):
             connector_info["metadata"]["exported_as"] = "pdf"
             connector_info["metadata"]["original_workspace_type"] = mime_type.split(".")[-1]
@@ -119,10 +106,9 @@ async def download_and_process_file(
             session=session,
             task_logger=task_logger,
             log_entry=log_entry,
-            connector=connector_info,  # Pass connector info
+            connector=connector_info,
         )
 
-        # process_file_in_background doesn't return the document
         return None, None, connector_info["metadata"]
 
     except Exception as e:
diff --git a/surfsense_backend/app/connectors/google_drive/credentials.py b/surfsense_backend/app/connectors/google_drive/credentials.py
index 5d09df881..4c1ef9c03 100644
--- a/surfsense_backend/app/connectors/google_drive/credentials.py
+++ b/surfsense_backend/app/connectors/google_drive/credentials.py
@@ -1,9 +1,4 @@
-"""
-Google Drive OAuth Credentials Management.
-
-Handles credential validation, token refresh, and persistence to database.
-Small, focused module for credential operations only.
-"""
+"""Google Drive OAuth credential management."""
 
 import json
 from datetime import datetime
@@ -35,7 +30,6 @@ async def get_valid_credentials(
         ValueError: If credentials are missing or invalid
         Exception: If token refresh fails
     """
-    # Fetch connector from database
     result = await session.execute(
         select(SearchSourceConnector).filter(
             SearchSourceConnector.id == connector_id
@@ -46,11 +40,9 @@ async def get_valid_credentials(
     if not connector:
         raise ValueError(f"Connector {connector_id} not found")
 
-    # Extract credentials from config
     config_data = connector.config
     exp = config_data.get("expiry", "").replace("Z", "")
 
-    # Validate required fields
     if not all(
         [
             config_data.get("client_id"),
@@ -62,7 +54,6 @@ async def get_valid_credentials(
             "Google OAuth credentials (client_id, client_secret, refresh_token) must be set"
         )
 
-    # Create credentials object
     credentials = Credentials(
         token=config_data.get("token"),
         refresh_token=config_data.get("refresh_token"),
@@ -73,12 +64,10 @@ async def get_valid_credentials(
         expiry=datetime.fromisoformat(exp) if exp else None,
     )
 
-    # Refresh token if expired
     if credentials.expired or not credentials.valid:
         try:
             credentials.refresh(Request())
 
-            # Persist refreshed token to database
             connector.config = json.loads(credentials.to_json())
             flag_modified(connector, "config")
             await session.commit()
diff --git a/surfsense_backend/app/connectors/google_drive/file_types.py b/surfsense_backend/app/connectors/google_drive/file_types.py
index f66680c6c..cb2354585 100644
--- a/surfsense_backend/app/connectors/google_drive/file_types.py
+++ b/surfsense_backend/app/connectors/google_drive/file_types.py
@@ -1,18 +1,11 @@
-"""
-File Type Handlers for Google Drive.
+"""File type handlers for Google Drive."""
 
-Simple module for basic file type detection.
-"""
-
-# Google Workspace MIME types that need export
 GOOGLE_DOC = "application/vnd.google-apps.document"
 GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
 GOOGLE_SLIDE = "application/vnd.google-apps.presentation"
 GOOGLE_FOLDER = "application/vnd.google-apps.folder"
 GOOGLE_SHORTCUT = "application/vnd.google-apps.shortcut"
 
-# Export MIME types for Google Workspace files
-# Export as PDF to preserve formatting, images, and structure
 EXPORT_FORMATS = {
     GOOGLE_DOC: "application/pdf",
     GOOGLE_SHEET: "application/pdf",
diff --git a/surfsense_backend/app/connectors/google_drive/folder_manager.py b/surfsense_backend/app/connectors/google_drive/folder_manager.py
index da9deb75d..599475a46 100644
--- a/surfsense_backend/app/connectors/google_drive/folder_manager.py
+++ b/surfsense_backend/app/connectors/google_drive/folder_manager.py
@@ -1,9 +1,4 @@
-"""
-Folder Management for Google Drive.
-
-Handles folder listing, selection, and hierarchy operations.
-Small, focused module for folder-related operations.
-"""
+"""Folder management for Google Drive."""
 
 import logging
 from typing import Any
@@ -165,11 +160,7 @@ async def list_folder_contents(
     parent_id: str | None = None,
 ) -> tuple[list[dict[str, Any]], str | None]:
     """
-    List both folders and files in a Google Drive folder.
-    
-    Fetches ALL items using pagination (handles folders with >100 items).
-    Returns items sorted with folders first, then files.
-    Each item includes 'isFolder' boolean for frontend rendering.
+    List folders and files in a Google Drive folder with pagination support.
 
     Args:
         client: GoogleDriveClient instance
@@ -212,20 +203,16 @@ async def list_folder_contents(
 
             all_items.extend(items)
             
-            # If no more pages, break
             if not next_token:
                 break
                 
             page_token = next_token
 
-        # Add 'isFolder' flag and sort (folders first, then files)
         for item in all_items:
             item["isFolder"] = item["mimeType"] == "application/vnd.google-apps.folder"
 
-        # Sort: folders first (alphabetically), then files (alphabetically)
         all_items.sort(key=lambda x: (not x["isFolder"], x["name"].lower()))
 
-        # Count folders and files for logging
         folder_count = sum(1 for item in all_items if item["isFolder"])
         file_count = len(all_items) - folder_count
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index 335c3b41d..cd862e372 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -1,11 +1,4 @@
-"""
-Google Drive Indexer - Delegates all processing to Surfsense's file processors.
-
-Handles:
-- Folder-specific indexing (user selects folder)
-- Delta sync (only index changed files)
-- Delegates file processing to process_file_in_background
-"""
+"""Google Drive indexer using Surfsense file processors."""
 
 import logging
 from datetime import datetime
@@ -63,7 +56,6 @@ async def index_google_drive_files(
     """
     task_logger = TaskLoggingService(session, search_space_id)
 
-    # Log task start
     log_entry = await task_logger.log_task_start(
         task_name="google_drive_files_indexing",
         source="connector_indexing_task",
@@ -78,7 +70,6 @@ async def index_google_drive_files(
     )
 
     try:
-        # Get connector from database
         connector = await get_connector_by_id(
             session, connector_id, SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
         )
@@ -90,7 +81,6 @@ async def index_google_drive_files(
             )
             return 0, error_msg
 
-        # Initialize Drive client
         await task_logger.log_task_progress(
             log_entry,
             f"Initializing Google Drive client for connector {connector_id}",
@@ -99,7 +89,6 @@ async def index_google_drive_files(
 
         drive_client = GoogleDriveClient(session, connector_id)
 
-        # Use folder from request params (required for Google Drive)
         if not folder_id:
             error_msg = "folder_id is required for Google Drive indexing"
             await task_logger.log_task_failure(
@@ -112,7 +101,6 @@ async def index_google_drive_files(
 
         logger.info(f"Indexing Google Drive folder: {target_folder_name} ({target_folder_id})")
 
-        # Decide sync strategy - track tokens per folder
         folder_tokens = connector.config.get("folder_tokens", {})
         start_page_token = folder_tokens.get(target_folder_id)
         can_use_delta_sync = use_delta_sync and start_page_token and connector.last_indexed_at
@@ -150,14 +138,11 @@ async def index_google_drive_files(
 
         documents_indexed, documents_skipped = result
 
-        # Update last indexed timestamp and get new start page token
         if documents_indexed > 0 or can_use_delta_sync:
-            # Get new start page token for next sync
             new_token, token_error = await get_start_page_token(drive_client)
             if new_token and not token_error:
                 from sqlalchemy.orm.attributes import flag_modified
 
-                # Store token per folder
                 if "folder_tokens" not in connector.config:
                     connector.config["folder_tokens"] = {}
                 connector.config["folder_tokens"][target_folder_id] = new_token
@@ -165,13 +150,11 @@ async def index_google_drive_files(
 
             await update_connector_last_indexed(session, connector, update_last_indexed)
 
-        # Final commit
         await session.commit()
         logger.info(
             f"Successfully committed Google Drive indexing changes to database"
         )
 
-        # Log success
         await task_logger.log_task_success(
             log_entry,
             f"Successfully completed Google Drive indexing for connector {connector_id}",
@@ -235,7 +218,6 @@ async def _index_full_scan(
     page_token = None
     files_processed = 0
 
-    # Paginate through all files in folder
     while files_processed < max_files:
         files, next_token, error = await get_files_in_folder(
             drive_client, folder_id, include_subfolders=False, page_token=page_token
@@ -254,7 +236,6 @@ async def _index_full_scan(
 
             files_processed += 1
 
-            # Process file
             indexed, skipped = await _process_single_file(
                 drive_client=drive_client,
                 session=session,
@@ -269,7 +250,6 @@ async def _index_full_scan(
             documents_indexed += indexed
             documents_skipped += skipped
 
-            # Batch commit every 10 files
             if documents_indexed % 10 == 0 and documents_indexed > 0:
                 await session.commit()
                 logger.info(f"Committed batch: {documents_indexed} files indexed so far")
@@ -304,7 +284,6 @@ async def _index_with_delta_sync(
         {"stage": "delta_sync", "start_token": start_page_token},
     )
 
-    # Fetch all changes since last sync
     changes, final_token, error = await fetch_all_changes(
         drive_client, start_page_token, folder_id
     )
@@ -330,14 +309,12 @@ async def _index_with_delta_sync(
         files_processed += 1
         change_type = categorize_change(change)
 
-        # Handle removed/trashed files
         if change_type in ["removed", "trashed"]:
             file_id = change.get("fileId")
             if file_id:
                 await _remove_document(session, file_id, search_space_id)
             continue
 
-        # Handle modified/new files
         file = change.get("file")
         if not file:
             continue
@@ -356,7 +333,6 @@ async def _index_with_delta_sync(
         documents_indexed += indexed
         documents_skipped += skipped
 
-        # Batch commit every 10 files
         if documents_indexed % 10 == 0 and documents_indexed > 0:
             await session.commit()
             logger.info(f"Committed batch: {documents_indexed} changes processed")
@@ -389,10 +365,6 @@ async def _process_single_file(
     try:
         logger.info(f"Processing file: {file_name} ({mime_type})")
 
-        # Download and process using Surfsense's existing infrastructure
-        # This handles: markdown, audio, PDFs, Office docs, images, etc.
-        # It also handles: deduplication, chunking, summarization, embedding
-        # Document type is set to GOOGLE_DRIVE_CONNECTOR during processing
         _, error, _ = await download_and_process_file(
             client=drive_client,
             file=file,
@@ -404,7 +376,6 @@ async def _process_single_file(
         )
 
         if error:
-            # Log and skip - not an error, just unsupported or empty
             await task_logger.log_task_progress(
                 log_entry,
                 f"Skipped {file_name}: {error}",
@@ -412,7 +383,6 @@ async def _process_single_file(
             )
             return 0, 1
 
-        # File was processed successfully (document type already set in processor)
         logger.info(f"Successfully indexed Google Drive file: {file_name}")
         return 1, 0
 

From 0b006de32dbfd0aba418920da65107acb2654db8 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 18:59:30 +0200
Subject: [PATCH 34/39] refactor(web): clean up Google Drive folder tree
 component

- Replace inline comments with JSDoc multiline comments for main functions
- Remove obvious/noisy inline comments from JSX
- Simplify component documentation while keeping it clear
- Improve readability by reducing comment clutter
---
 .../connectors/google-drive-folder-tree.tsx   | 59 ++++++-------------
 1 file changed, 19 insertions(+), 40 deletions(-)

diff --git a/surfsense_web/components/connectors/google-drive-folder-tree.tsx b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
index 793fdc750..05f4cc9e2 100644
--- a/surfsense_web/components/connectors/google-drive-folder-tree.tsx
+++ b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
@@ -75,25 +75,23 @@ export function GoogleDriveFolderTree({
 	const [isLoadingRoot, setIsLoadingRoot] = useState(false);
 	const [isInitialized, setIsInitialized] = useState(false);
 
-	// Helper to check if a folder is selected
 	const isFolderSelected = (folderId: string): boolean => {
 		return selectedFolders.some((f) => f.id === folderId);
 	};
 
-	// Handle folder checkbox toggle
 	const toggleFolderSelection = (folderId: string, folderName: string) => {
 		if (isFolderSelected(folderId)) {
-			// Remove from selection
 			onSelectFolders(selectedFolders.filter((f) => f.id !== folderId));
 		} else {
-			// Add to selection
 			onSelectFolders([...selectedFolders, { id: folderId, name: folderName }]);
 		}
 	};
 
-	// Load root items (folders and files) on mount
+	/**
+	 * Load root-level folders and files from Google Drive.
+	 */
 	const loadRootItems = async () => {
-		if (isInitialized) return; // Already loaded
+		if (isInitialized) return;
 
 		setIsLoadingRoot(true);
 		try {
@@ -112,17 +110,16 @@ export function GoogleDriveFolderTree({
 		}
 	};
 
-	// Helper function to find an item recursively through all loaded items
+	/**
+	 * Find an item by ID across all loaded items (root and nested).
+	 */
 	const findItem = (itemId: string): DriveItem | undefined => {
-		// First check if we have it in itemStates
 		const state = itemStates.get(itemId);
 		if (state?.item) return state.item;
 
-		// Check root items
 		const rootItem = rootItems.find((item) => item.id === itemId);
 		if (rootItem) return rootItem;
 
-		// Recursively search through all loaded children
 		for (const [, nodeState] of itemStates) {
 			if (nodeState.children) {
 				const found = nodeState.children.find((child) => child.id === itemId);
@@ -133,17 +130,17 @@ export function GoogleDriveFolderTree({
 		return undefined;
 	};
 
-	// Load children (folders and files) for a specific folder
+	/**
+	 * Load and display contents of a specific folder.
+	 */
 	const loadFolderContents = async (folderId: string) => {
 		try {
-			// Set loading state
 			setItemStates((prev) => {
 				const newMap = new Map(prev);
 				const existing = newMap.get(folderId);
 				if (existing) {
 					newMap.set(folderId, { ...existing, isLoading: true });
 				} else {
-					// First time loading this folder - create initial state
 					const item = findItem(folderId);
 					if (item) {
 						newMap.set(folderId, {
@@ -165,10 +162,6 @@ export function GoogleDriveFolderTree({
 			const data = await response.json();
 			const items = data.items || [];
 
-			// Check if folder only contains files (no subfolders)
-			const hasSubfolders = items.some((item: DriveItem) => item.isFolder);
-
-			// Update item state with loaded children
 			setItemStates((prev) => {
 				const newMap = new Map(prev);
 				const existing = newMap.get(folderId);
@@ -178,7 +171,7 @@ export function GoogleDriveFolderTree({
 					newMap.set(folderId, {
 						item,
 						children: items,
-						isExpanded: true, // Always expand after loading
+						isExpanded: true,
 						isLoading: false,
 					});
 				} else {
@@ -188,7 +181,6 @@ export function GoogleDriveFolderTree({
 			});
 		} catch (error) {
 			console.error("Error loading folder contents:", error);
-			// Clear loading state on error
 			setItemStates((prev) => {
 				const newMap = new Map(prev);
 				const existing = newMap.get(folderId);
@@ -200,17 +192,17 @@ export function GoogleDriveFolderTree({
 		}
 	};
 
-	// Toggle folder expansion
+	/**
+	 * Toggle folder expand/collapse state.
+	 */
 	const toggleFolder = async (item: DriveItem) => {
-		if (!item.isFolder) return; // Only folders can be expanded
+		if (!item.isFolder) return;
 
 		const state = itemStates.get(item.id);
 
 		if (!state || state.children === null) {
-			// First time expanding - load children
 			await loadFolderContents(item.id);
 		} else {
-			// Toggle expansion state
 			setItemStates((prev) => {
 				const newMap = new Map(prev);
 				newMap.set(item.id, {
@@ -222,7 +214,9 @@ export function GoogleDriveFolderTree({
 		}
 	};
 
-	// Recursive render function for item tree
+	/**
+	 * Render a single item (folder or file) with its children.
+	 */
 	const renderItem = (item: DriveItem, level: number = 0) => {
 		const state = itemStates.get(item.id);
 		const isExpanded = state?.isExpanded || false;
@@ -231,7 +225,6 @@ export function GoogleDriveFolderTree({
 		const isSelected = isFolderSelected(item.id);
 		const isFolder = item.isFolder;
 
-		// Separate folders and files for children
 		const childFolders = children?.filter((c) => c.isFolder) || [];
 		const childFiles = children?.filter((c) => !c.isFolder) || [];
 
@@ -245,7 +238,6 @@ export function GoogleDriveFolderTree({
 						isSelected && isFolder && "bg-accent/50"
 					)}
 				>
-					{/* Expand/Collapse Icon (only for folders) */}
 					{isFolder ? (
 						<span
 							className="flex items-center justify-center w-4 h-4 shrink-0"
@@ -263,10 +255,9 @@ export function GoogleDriveFolderTree({
 							)}
 						</span>
 					) : (
-						<span className="w-4 h-4 shrink-0" /> // Empty space for alignment
+						<span className="w-4 h-4 shrink-0" />
 					)}
 
-					{/* Checkbox (only for folders) */}
 					{isFolder && (
 						<Checkbox
 							checked={isSelected}
@@ -276,7 +267,6 @@ export function GoogleDriveFolderTree({
 						/>
 					)}
 
-					{/* Icon */}
 					<div className="shrink-0" style={{ marginLeft: isFolder ? "0" : "1.25rem" }}>
 						{isFolder ? (
 							isExpanded ? (
@@ -289,7 +279,6 @@ export function GoogleDriveFolderTree({
 						)}
 					</div>
 
-					{/* Item Name */}
 					<span
 						className="truncate flex-1 text-left text-sm min-w-0"
 						onClick={() => isFolder && toggleFolder(item)}
@@ -298,16 +287,11 @@ export function GoogleDriveFolderTree({
 					</span>
 				</div>
 
-				{/* Render children if expanded (folders first, then files) */}
 				{isExpanded && isFolder && children && (
 					<div className="w-full">
-						{/* Render folders first */}
 						{childFolders.map((child) => renderItem(child, level + 1))}
-
-						{/* Render files */}
 						{childFiles.map((child) => renderItem(child, level + 1))}
 
-						{/* Empty state */}
 						{children.length === 0 && (
 							<div className="text-xs text-muted-foreground py-2 pl-2">Empty folder</div>
 						)}
@@ -317,7 +301,6 @@ export function GoogleDriveFolderTree({
 		);
 	};
 
-	// Initialize on first render
 	if (!isInitialized && !isLoadingRoot) {
 		loadRootItems();
 	}
@@ -326,7 +309,6 @@ export function GoogleDriveFolderTree({
 		<div className="border rounded-md w-full overflow-hidden">
 			<ScrollArea className="h-[450px] w-full">
 				<div className="p-2 pr-4 w-full overflow-x-hidden">
-					{/* My Drive Header (always visible, selectable) */}
 					<div className="mb-2 pb-2 border-b">
 						<div className="flex items-center gap-2 h-auto py-2 px-2 rounded-md hover:bg-accent cursor-pointer">
 							<Checkbox
@@ -341,19 +323,16 @@ export function GoogleDriveFolderTree({
 						</div>
 					</div>
 
-					{/* Loading indicator */}
 					{isLoadingRoot && (
 						<div className="flex items-center justify-center py-8">
 							<Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
 						</div>
 					)}
 
-					{/* Root items (folders and files) - same level as Google Drive shows */}
 					<div className="w-full overflow-x-hidden">
 						{!isLoadingRoot && rootItems.map((item) => renderItem(item, 0))}
 					</div>
 
-					{/* Empty state */}
 					{!isLoadingRoot && rootItems.length === 0 && (
 						<div className="text-center text-sm text-muted-foreground py-8">
 							No files or folders found in your Google Drive

From 10c98745cdc3a2e7231d27dc8b05d1c9b6b609b8 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 19:17:37 +0200
Subject: [PATCH 35/39] refactor(web): use React Query for Google Drive folder
 operations

- Fix errors in connectors-api.service (use .issues instead of .errors)
- Create useGoogleDriveFolders hook with proper React Query integration
- Add Google Drive folders cache keys with proper query invalidation
- Refactor GoogleDriveFolderTree to use React Query hook for root data
- Remove manual state management (isInitialized, setRootItems, loadRootItems)
- Remove unused state (driveFolders, isLoadingFolders) from manage page
- Simplify handleOpenDriveFolderDialog function
- Automatic loading, caching, error handling, and refetching via React Query
- Better performance with proper caching and state management
---
 .../connectors/(manage)/page.tsx              | 68 ++++++-------------
 .../connectors/google-drive-folder-tree.tsx   | 49 ++++---------
 .../contracts/types/connector.types.ts        | 32 +++++++++
 .../hooks/use-google-drive-folders.ts         | 29 ++++++++
 .../lib/apis/connectors-api.service.ts        | 40 +++++++++--
 surfsense_web/lib/query-client/cache-keys.ts  |  4 ++
 6 files changed, 129 insertions(+), 93 deletions(-)
 create mode 100644 surfsense_web/hooks/use-google-drive-folders.ts

diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
index 5854cb706..1e0e76ca9 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/(manage)/page.tsx
@@ -70,14 +70,8 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/comp
 import { EnumConnectorName } from "@/contracts/enums/connector";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
 import { cn } from "@/lib/utils";
-import { authenticatedFetch } from "@/lib/auth-utils";
 import { GoogleDriveFolderTree } from "@/components/connectors/google-drive-folder-tree";
 
-interface DriveFolder {
-	id: string;
-	name: string;
-}
-
 export default function ConnectorsPage() {
 	const t = useTranslations("connectors");
 	const tCommon = useTranslations("common");
@@ -127,9 +121,7 @@ export default function ConnectorsPage() {
 
 	// Google Drive folder selection state
 	const [driveFolderDialogOpen, setDriveFolderDialogOpen] = useState(false);
-	const [driveFolders, setDriveFolders] = useState<DriveFolder[]>([]);
 	const [selectedFolders, setSelectedFolders] = useState<Array<{ id: string; name: string }>>([]);
-	const [isLoadingFolders, setIsLoadingFolders] = useState(false);
 
 	useEffect(() => {
 		if (error) {
@@ -165,31 +157,9 @@ export default function ConnectorsPage() {
 		}
 	};
 
-	// Handle opening Google Drive folder selection dialog
-	const handleOpenDriveFolderDialog = async (connectorId: number) => {
+	const handleOpenDriveFolderDialog = (connectorId: number) => {
 		setSelectedConnectorForIndexing(connectorId);
 		setDriveFolderDialogOpen(true);
-		setIsLoadingFolders(true);
-		
-		try {
-			const response = await authenticatedFetch(
-				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/connectors/${connectorId}/google-drive/folders`,
-				{ method: "GET" }
-			);
-
-			if (!response.ok) {
-				throw new Error("Failed to load folders");
-			}
-
-			const data = await response.json();
-			setDriveFolders(data.folders || []);
-		} catch (error) {
-			console.error("Error loading folders:", error);
-			toast.error("Failed to load Google Drive folders");
-			setDriveFolderDialogOpen(false);
-		} finally {
-			setIsLoadingFolders(false);
-		}
 	};
 
 	// Handle Google Drive folder indexing
@@ -204,15 +174,17 @@ export default function ConnectorsPage() {
 		try {
 			setIndexingConnectorId(selectedConnectorForIndexing);
 
-			// Call indexConnector with folder_ids and folder_names as query params
-			await indexConnector(
-				selectedConnectorForIndexing,
-				searchSpaceId,
-				undefined,
-				undefined,
-				selectedFolders.map((f) => f.id).join(","),
-				selectedFolders.map((f) => f.name).join(", ")
-			);
+			const folderIds = selectedFolders.map((f) => f.id).join(",");
+			const folderNames = selectedFolders.map((f) => f.name).join(", ");
+
+			await indexConnector({
+				connector_id: selectedConnectorForIndexing,
+				queryParams: {
+					search_space_id: searchSpaceId,
+					folder_ids: folderIds,
+					folder_names: folderNames,
+				},
+			});
 			toast.success(t("indexing_started"));
 		} catch (error) {
 			console.error("Error indexing connector content:", error);
@@ -221,7 +193,6 @@ export default function ConnectorsPage() {
 		setIndexingConnectorId(null);
 		setSelectedConnectorForIndexing(null);
 		setSelectedFolders([]);
-		setDriveFolders([]);
 	}
 	};
 
@@ -747,14 +718,13 @@ export default function ConnectorsPage() {
 					<DialogFooter>
 						<Button
 							variant="outline"
-						onClick={() => {
-							setDriveFolderDialogOpen(false);
-							setSelectedConnectorForIndexing(null);
-							setSelectedFolders([]);
-							setDriveFolders([]);
-						}}
-					>
-						{tCommon("cancel")}
+							onClick={() => {
+								setDriveFolderDialogOpen(false);
+								setSelectedConnectorForIndexing(null);
+								setSelectedFolders([]);
+							}}
+						>
+							{tCommon("cancel")}
 					</Button>
 					<Button onClick={handleIndexDriveFolder} disabled={selectedFolders.length === 0}>
 						{t("start_indexing")}
diff --git a/surfsense_web/components/connectors/google-drive-folder-tree.tsx b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
index 05f4cc9e2..cec207b2a 100644
--- a/surfsense_web/components/connectors/google-drive-folder-tree.tsx
+++ b/surfsense_web/components/connectors/google-drive-folder-tree.tsx
@@ -18,7 +18,8 @@ import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { ScrollArea } from "@/components/ui/scroll-area";
 import { cn } from "@/lib/utils";
-import { authenticatedFetch } from "@/lib/auth-utils";
+import { useGoogleDriveFolders } from "@/hooks/use-google-drive-folders";
+import { connectorsApiService } from "@/lib/apis/connectors-api.service";
 
 interface DriveItem {
 	id: string;
@@ -70,10 +71,13 @@ export function GoogleDriveFolderTree({
 	selectedFolders,
 	onSelectFolders,
 }: GoogleDriveFolderTreeProps) {
-	const [rootItems, setRootItems] = useState<DriveItem[]>([]);
 	const [itemStates, setItemStates] = useState<Map<string, ItemTreeNode>>(new Map());
-	const [isLoadingRoot, setIsLoadingRoot] = useState(false);
-	const [isInitialized, setIsInitialized] = useState(false);
+
+	const { data: rootData, isLoading: isLoadingRoot } = useGoogleDriveFolders({
+		connectorId,
+	});
+
+	const rootItems = rootData?.items || [];
 
 	const isFolderSelected = (folderId: string): boolean => {
 		return selectedFolders.some((f) => f.id === folderId);
@@ -87,29 +91,6 @@ export function GoogleDriveFolderTree({
 		}
 	};
 
-	/**
-	 * Load root-level folders and files from Google Drive.
-	 */
-	const loadRootItems = async () => {
-		if (isInitialized) return;
-
-		setIsLoadingRoot(true);
-		try {
-			const response = await authenticatedFetch(
-				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/connectors/${connectorId}/google-drive/folders`
-			);
-			if (!response.ok) throw new Error("Failed to load items");
-
-			const data = await response.json();
-			setRootItems(data.items || []);
-			setIsInitialized(true);
-		} catch (error) {
-			console.error("Error loading root items:", error);
-		} finally {
-			setIsLoadingRoot(false);
-		}
-	};
-
 	/**
 	 * Find an item by ID across all loaded items (root and nested).
 	 */
@@ -154,12 +135,10 @@ export function GoogleDriveFolderTree({
 				return newMap;
 			});
 
-			const response = await authenticatedFetch(
-				`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/connectors/${connectorId}/google-drive/folders?parent_id=${folderId}`
-			);
-			if (!response.ok) throw new Error("Failed to load folder contents");
-
-			const data = await response.json();
+			const data = await connectorsApiService.listGoogleDriveFolders({
+				connector_id: connectorId,
+				parent_id: folderId,
+			});
 			const items = data.items || [];
 
 			setItemStates((prev) => {
@@ -301,10 +280,6 @@ export function GoogleDriveFolderTree({
 		);
 	};
 
-	if (!isInitialized && !isLoadingRoot) {
-		loadRootItems();
-	}
-
 	return (
 		<div className="border rounded-md w-full overflow-hidden">
 			<ScrollArea className="h-[450px] w-full">
diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts
index 4e09ba067..c590f3941 100644
--- a/surfsense_web/contracts/types/connector.types.ts
+++ b/surfsense_web/contracts/types/connector.types.ts
@@ -17,6 +17,7 @@ export const searchSourceConnectorTypeEnum = z.enum([
 	"CLICKUP_CONNECTOR",
 	"GOOGLE_CALENDAR_CONNECTOR",
 	"GOOGLE_GMAIL_CONNECTOR",
+	"GOOGLE_DRIVE_CONNECTOR",
 	"AIRTABLE_CONNECTOR",
 	"LUMA_CONNECTOR",
 	"ELASTICSEARCH_CONNECTOR",
@@ -39,6 +40,19 @@ export const searchSourceConnector = z.object({
 	created_at: z.string(),
 });
 
+export const googleDriveItem = z.object({
+	id: z.string(),
+	name: z.string(),
+	mimeType: z.string(),
+	isFolder: z.boolean(),
+	parents: z.array(z.string()).optional(),
+	size: z.number().optional(),
+	iconLink: z.string().optional(),
+	webViewLink: z.string().optional(),
+	createdTime: z.string().optional(),
+	modifiedTime: z.string().optional(),
+});
+
 /**
  * Get connectors
  */
@@ -120,6 +134,9 @@ export const indexConnectorRequest = z.object({
 		search_space_id: z.number().or(z.string()),
 		start_date: z.string().optional(),
 		end_date: z.string().optional(),
+		// Google Drive only
+		folder_ids: z.string().optional(),
+		folder_names: z.string().optional(),
 	}),
 });
 
@@ -140,6 +157,18 @@ export const listGitHubRepositoriesRequest = z.object({
 
 export const listGitHubRepositoriesResponse = z.array(z.record(z.string(), z.any()));
 
+/**
+ * List Google Drive folders
+ */
+export const listGoogleDriveFoldersRequest = z.object({
+	connector_id: z.number(),
+	parent_id: z.string().optional(),
+});
+
+export const listGoogleDriveFoldersResponse = z.object({
+	items: z.array(googleDriveItem),
+});
+
 // Inferred types
 export type SearchSourceConnectorType = z.infer<typeof searchSourceConnectorTypeEnum>;
 export type SearchSourceConnector = z.infer<typeof searchSourceConnector>;
@@ -157,3 +186,6 @@ export type IndexConnectorRequest = z.infer<typeof indexConnectorRequest>;
 export type IndexConnectorResponse = z.infer<typeof indexConnectorResponse>;
 export type ListGitHubRepositoriesRequest = z.infer<typeof listGitHubRepositoriesRequest>;
 export type ListGitHubRepositoriesResponse = z.infer<typeof listGitHubRepositoriesResponse>;
+export type ListGoogleDriveFoldersRequest = z.infer<typeof listGoogleDriveFoldersRequest>;
+export type ListGoogleDriveFoldersResponse = z.infer<typeof listGoogleDriveFoldersResponse>;
+export type GoogleDriveItem = z.infer<typeof googleDriveItem>;
diff --git a/surfsense_web/hooks/use-google-drive-folders.ts b/surfsense_web/hooks/use-google-drive-folders.ts
new file mode 100644
index 000000000..65555a6c9
--- /dev/null
+++ b/surfsense_web/hooks/use-google-drive-folders.ts
@@ -0,0 +1,29 @@
+import { useQuery } from "@tanstack/react-query";
+import { connectorsApiService } from "@/lib/apis/connectors-api.service";
+import { cacheKeys } from "@/lib/query-client/cache-keys";
+
+interface UseGoogleDriveFoldersOptions {
+	connectorId: number;
+	parentId?: string;
+	enabled?: boolean;
+}
+
+export function useGoogleDriveFolders({
+	connectorId,
+	parentId,
+	enabled = true,
+}: UseGoogleDriveFoldersOptions) {
+	return useQuery({
+		queryKey: cacheKeys.connectors.googleDrive.folders(connectorId, parentId),
+		queryFn: async () => {
+			return connectorsApiService.listGoogleDriveFolders({
+				connector_id: connectorId,
+				parent_id: parentId,
+			});
+		},
+		enabled: enabled && !!connectorId,
+		staleTime: 5 * 60 * 1000, // 5 minutes
+		retry: 2,
+	});
+}
+
diff --git a/surfsense_web/lib/apis/connectors-api.service.ts b/surfsense_web/lib/apis/connectors-api.service.ts
index eeee5e6a1..f6929391a 100644
--- a/surfsense_web/lib/apis/connectors-api.service.ts
+++ b/surfsense_web/lib/apis/connectors-api.service.ts
@@ -17,6 +17,9 @@ import {
 	type ListGitHubRepositoriesRequest,
 	listGitHubRepositoriesRequest,
 	listGitHubRepositoriesResponse,
+	type ListGoogleDriveFoldersRequest,
+	listGoogleDriveFoldersRequest,
+	listGoogleDriveFoldersResponse,
 	type UpdateConnectorRequest,
 	updateConnectorRequest,
 	updateConnectorResponse,
@@ -34,7 +37,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -66,7 +69,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -85,7 +88,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -118,7 +121,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -138,7 +141,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -157,7 +160,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -187,7 +190,7 @@ class ConnectorsApiService {
 		if (!parsedRequest.success) {
 			console.error("Invalid request:", parsedRequest.error);
 
-			const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", ");
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
 			throw new ValidationError(`Invalid request: ${errorMessage}`);
 		}
 
@@ -195,6 +198,29 @@ class ConnectorsApiService {
 			body: parsedRequest.data,
 		});
 	};
+
+	/**
+	 * List Google Drive folders and files
+	 */
+	listGoogleDriveFolders = async (request: ListGoogleDriveFoldersRequest) => {
+		const parsedRequest = listGoogleDriveFoldersRequest.safeParse(request);
+
+		if (!parsedRequest.success) {
+			console.error("Invalid request:", parsedRequest.error);
+
+			const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
+			throw new ValidationError(`Invalid request: ${errorMessage}`);
+		}
+
+		const { connector_id, parent_id } = parsedRequest.data;
+
+		const queryParams = parent_id ? `?parent_id=${encodeURIComponent(parent_id)}` : "";
+
+		return baseApiService.get(
+			`/api/v1/connectors/${connector_id}/google-drive/folders${queryParams}`,
+			listGoogleDriveFoldersResponse
+		);
+	};
 }
 
 export const connectorsApiService = new ConnectorsApiService();
diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts
index 7722ec01e..54f411ad1 100644
--- a/surfsense_web/lib/query-client/cache-keys.ts
+++ b/surfsense_web/lib/query-client/cache-keys.ts
@@ -67,5 +67,9 @@ export const cacheKeys = {
 			["connectors", ...(queries ? Object.values(queries) : [])] as const,
 		byId: (connectorId: string) => ["connector", connectorId] as const,
 		index: () => ["connector", "index"] as const,
+		googleDrive: {
+			folders: (connectorId: number, parentId?: string) =>
+				["connectors", "google-drive", connectorId, "folders", parentId] as const,
+		},
 	},
 };

From 27beac4f6298fd32818a34bec5ff8eb367258ccd Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Sun, 28 Dec 2025 19:57:10 +0200
Subject: [PATCH 36/39] fix: Google Drive folder handling and connector page
 updates

---
 .../routes/search_source_connectors_routes.py | 21 ++++++++----
 .../document_processors/file_processors.py    | 20 +++++------
 .../add/google-drive-connector/page.tsx       | 33 ++++++-------------
 .../hooks/use-search-source-connectors.ts     | 12 +++----
 4 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py
index af1f18513..894be54c4 100644
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@@ -543,13 +543,13 @@ async def index_connector_content(
         None,
         description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date",
     ),
-    folder_id: str = Query(
+    folder_ids: str = Query(
         None,
-        description="[Google Drive only] Folder ID to index. If not provided, uses the connector's saved selected_folder_id",
+        description="[Google Drive only] Comma-separated folder IDs to index",
     ),
-    folder_name: str = Query(
+    folder_names: str = Query(
         None,
-        description="[Google Drive only] Folder name for display purposes",
+        description="[Google Drive only] Comma-separated folder names for display purposes",
     ),
     session: AsyncSession = Depends(get_async_session),
     user: User = Depends(current_active_user),
@@ -763,15 +763,22 @@ async def index_connector_content(
                 index_google_drive_files_task,
             )
 
+            if not folder_ids or not folder_names:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Google Drive indexing requires folder_ids and folder_names parameters",
+                )
+
             logger.info(
-                f"Triggering Google Drive indexing for connector {connector_id} into search space {search_space_id}, folder: {folder_name or 'default'}"
+                f"Triggering Google Drive indexing for connector {connector_id} into search space {search_space_id}, folders: {folder_names}"
             )
+            # Pass comma-separated strings directly to Celery task
             index_google_drive_files_task.delay(
                 connector_id,
                 search_space_id,
                 str(user.id),
-                folder_id,
-                folder_name,
+                folder_ids,  # Pass as comma-separated string
+                folder_names,  # Pass as comma-separated string
             )
             response_message = "Google Drive indexing started in the background."
 
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index 61f484ae1..cda4ec88b 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -511,8 +511,8 @@ async def process_file_in_background(
                 session, filename, markdown_content, search_space_id, user_id
             )
 
-            # Update from connector if provided
-            await _update_document_from_connector(result, connector, session)
+            if connector:
+                await _update_document_from_connector(result, connector, session)
 
             if result:
                 await task_logger.log_task_success(
@@ -630,8 +630,8 @@ async def process_file_in_background(
                 session, filename, transcribed_text, search_space_id, user_id
             )
 
-            # Update from connector if provided
-            await _update_document_from_connector(result, connector, session)
+            if connector:
+                await _update_document_from_connector(result, connector, session)
 
             if result:
                 await task_logger.log_task_success(
@@ -778,8 +778,8 @@ async def process_file_in_background(
                     session, filename, docs, search_space_id, user_id
                 )
 
-                # Update from connector if provided
-                await _update_document_from_connector(result, connector, session)
+                if connector:
+                    await _update_document_from_connector(result, connector, session)
 
                 if result:
                     # Update page usage after successful processing
@@ -925,8 +925,8 @@ async def process_file_in_background(
                         user_id, final_page_count, allow_exceed=True
                     )
 
-                    # Update from connector if provided
-                    await _update_document_from_connector(last_created_doc, connector, session)
+                    if connector:
+                        await _update_document_from_connector(last_created_doc, connector, session)
 
                     await task_logger.log_task_success(
                         log_entry,
@@ -1052,8 +1052,8 @@ async def process_file_in_background(
                         user_id, final_page_count, allow_exceed=True
                     )
 
-                    # Update from connector if provided
-                    await _update_document_from_connector(doc_result, connector, session)
+                    if connector:
+                        await _update_document_from_connector(doc_result, connector, session)
 
                     await task_logger.log_task_success(
                         log_entry,
diff --git a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx
index b9fb8d953..4f0c2b23f 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/connectors/add/google-drive-connector/page.tsx
@@ -1,11 +1,13 @@
 "use client";
 
+import { useAtomValue } from "jotai";
 import { ArrowLeft, Check, ExternalLink, Loader2 } from "lucide-react";
 import { motion } from "motion/react";
 import Link from "next/link";
-import { useParams, useRouter, useSearchParams } from "next/navigation";
+import { useParams, useRouter } from "next/navigation";
 import { useEffect, useState } from "react";
 import { toast } from "sonner";
+import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms";
 import { Button } from "@/components/ui/button";
 import {
 	Card,
@@ -17,45 +19,30 @@ import {
 } from "@/components/ui/card";
 import { EnumConnectorName } from "@/contracts/enums/connector";
 import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
-import {
-	type SearchSourceConnector,
-	useSearchSourceConnectors,
-} from "@/hooks/use-search-source-connectors";
+import type { SearchSourceConnector } from "@/contracts/types/connector.types";
 import { authenticatedFetch } from "@/lib/auth-utils";
 
 export default function GoogleDriveConnectorPage() {
 	const router = useRouter();
 	const params = useParams();
-	const searchParams = useSearchParams();
 	const searchSpaceId = params.search_space_id as string;
 	
 	const [isConnecting, setIsConnecting] = useState(false);
 	const [doesConnectorExist, setDoesConnectorExist] = useState(false);
 
-	const { fetchConnectors } = useSearchSourceConnectors(true, Number.parseInt(searchSpaceId));
+	const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
 
-	// Check if connector exists and handle OAuth success
 	useEffect(() => {
-		const success = searchParams.get("success");
-		
-		fetchConnectors(Number.parseInt(searchSpaceId)).then((data) => {
-			const driveConnector = data.find(
+		fetchConnectors().then((data) => {
+			const connectors = data.data || [];
+			const connector = connectors.find(
 				(c: SearchSourceConnector) => c.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR
 			);
-			
-			if (driveConnector) {
+			if (connector) {
 				setDoesConnectorExist(true);
-				
-				// If just connected, show success and redirect
-				if (success === "true") {
-					toast.success("Google Drive connected successfully!");
-					setTimeout(() => {
-						router.push(`/dashboard/${searchSpaceId}/connectors`);
-					}, 1500);
-				}
 			}
 		});
-	}, [searchParams, fetchConnectors, searchSpaceId, router]);
+	}, []);
 
 	const handleConnectGoogle = async () => {
 		try {
diff --git a/surfsense_web/hooks/use-search-source-connectors.ts b/surfsense_web/hooks/use-search-source-connectors.ts
index ee8ce5518..14c21831b 100644
--- a/surfsense_web/hooks/use-search-source-connectors.ts
+++ b/surfsense_web/hooks/use-search-source-connectors.ts
@@ -268,8 +268,8 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
 		searchSpaceId: string | number,
 		startDate?: string,
 		endDate?: string,
-		folderId?: string,
-		folderName?: string
+		folderIds?: string,
+		folderNames?: string
 	) => {
 		try {
 			// Build query parameters
@@ -282,11 +282,11 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
 			if (endDate) {
 				params.append("end_date", endDate);
 			}
-			if (folderId) {
-				params.append("folder_id", folderId);
+			if (folderIds) {
+				params.append("folder_ids", folderIds);
 			}
-			if (folderName) {
-				params.append("folder_name", folderName);
+			if (folderNames) {
+				params.append("folder_names", folderNames);
 			}
 
 			const response = await authenticatedFetch(

From 16bc991b133efb891d4d4de7aedcc8fa5d58ad89 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 29 Dec 2025 18:13:27 +0200
Subject: [PATCH 37/39] feat: add Google Drive connector to knowledge base
 search

---
 .../agents/new_chat/tools/knowledge_base.py   |  12 +++
 .../google_drive/content_extractor.py         |  10 ++
 .../app/services/connector_service.py         | 100 ++++++++++++++++++
 3 files changed, 122 insertions(+)

diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
index 6c3dfd34b..98c363513 100644
--- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
@@ -36,6 +36,7 @@ _ALL_CONNECTORS: list[str] = [
     "CLICKUP_CONNECTOR",
     "GOOGLE_CALENDAR_CONNECTOR",
     "GOOGLE_GMAIL_CONNECTOR",
+    "GOOGLE_DRIVE_CONNECTOR",
     "DISCORD_CONNECTOR",
     "AIRTABLE_CONNECTOR",
     "TAVILY_API",
@@ -425,6 +426,16 @@ async def search_knowledge_base_async(
                 )
                 all_documents.extend(chunks)
 
+            elif connector == "GOOGLE_DRIVE_CONNECTOR":
+                _, chunks = await connector_service.search_google_drive(
+                    user_query=query,
+                    search_space_id=search_space_id,
+                    top_k=top_k,
+                    start_date=resolved_start_date,
+                    end_date=resolved_end_date,
+                )
+                all_documents.extend(chunks)
+
             elif connector == "CONFLUENCE_CONNECTOR":
                 _, chunks = await connector_service.search_confluence(
                     user_query=query,
@@ -561,6 +572,7 @@ def create_search_knowledge_base_tool(
         - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
         - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
         - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
+        - GOOGLE_DRIVE_CONNECTOR: "Google Drive files and documents" (personal cloud storage and file management)
         - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
         - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization)
         - TAVILY_API: "Tavily search API results" (personalized search results)
diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
index 00211957a..f8c5d39d5 100644
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -93,6 +93,16 @@ async def download_and_process_file(
             },
         }
         
+        # Add additional Drive metadata if available
+        if "modifiedTime" in file:
+            connector_info["metadata"]["modified_time"] = file["modifiedTime"]
+        if "createdTime" in file:
+            connector_info["metadata"]["created_time"] = file["createdTime"]
+        if "size" in file:
+            connector_info["metadata"]["file_size"] = file["size"]
+        if "webViewLink" in file:
+            connector_info["metadata"]["web_view_link"] = file["webViewLink"]
+        
         if is_google_workspace_file(mime_type):
             connector_info["metadata"]["exported_as"] = "pdf"
             connector_info["metadata"]["original_workspace_type"] = mime_type.split(".")[-1]
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index 3a6dcc605..b3d970dae 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -1808,6 +1808,106 @@ class ConnectorService:
 
         return result_object, gmail_docs
 
+    async def search_google_drive(
+        self,
+        user_query: str,
+        search_space_id: int,
+        top_k: int = 20,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+    ) -> tuple:
+        """
+        Search for Google Drive files and return both the source information and langchain documents.
+
+        Uses combined chunk-level and document-level hybrid search with RRF fusion.
+
+        Args:
+            user_query: The user's query
+            search_space_id: The search space ID to search in
+            top_k: Maximum number of results to return
+            start_date: Optional start date for filtering documents by updated_at
+            end_date: Optional end date for filtering documents by updated_at
+
+        Returns:
+            tuple: (sources_info, langchain_documents)
+        """
+        drive_docs = await self._combined_rrf_search(
+            query_text=user_query,
+            search_space_id=search_space_id,
+            document_type="GOOGLE_DRIVE_CONNECTOR",
+            top_k=top_k,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Early return if no results
+        if not drive_docs:
+            return {
+                "id": 33,
+                "name": "Google Drive Files",
+                "type": "GOOGLE_DRIVE_CONNECTOR",
+                "sources": [],
+            }, []
+
+        def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            return (
+                doc_info.get("title")
+                or metadata.get("google_drive_file_name")
+                or metadata.get("FILE_NAME")
+                or "Untitled File"
+            )
+
+        def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
+            file_id = metadata.get("google_drive_file_id", "")
+            return f"https://drive.google.com/file/d/{file_id}/view" if file_id else ""
+
+        def _description_fn(
+            chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> str:
+            description = self._chunk_preview(chunk.get("content", ""))
+            info_parts = []
+            mime_type = metadata.get("google_drive_mime_type", "")
+            modified_time = metadata.get("modified_time", "")
+            if mime_type:
+                # Simplify mime type for display
+                if "google-apps" in mime_type:
+                    file_type = mime_type.split(".")[-1].title()
+                else:
+                    file_type = mime_type.split("/")[-1].upper()
+                info_parts.append(f"Type: {file_type}")
+            if modified_time:
+                info_parts.append(f"Modified: {modified_time}")
+            if info_parts:
+                description = (description + " | " + " | ".join(info_parts)).strip(" |")
+            return description
+
+        def _extra_fields_fn(
+            _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
+        ) -> dict[str, Any]:
+            return {
+                "google_drive_file_id": metadata.get("google_drive_file_id", ""),
+                "google_drive_mime_type": metadata.get("google_drive_mime_type", ""),
+                "modified_time": metadata.get("modified_time", ""),
+            }
+
+        sources_list = self._build_chunk_sources_from_documents(
+            drive_docs,
+            title_fn=_title_fn,
+            url_fn=_url_fn,
+            description_fn=_description_fn,
+            extra_fields_fn=_extra_fields_fn,
+        )
+
+        # Create result object
+        result_object = {
+            "id": 33,  # Assign a unique ID for the Google Drive connector
+            "name": "Google Drive Files",
+            "type": "GOOGLE_DRIVE_CONNECTOR",
+            "sources": sources_list,
+        }
+
+        return result_object, drive_docs
+
     async def search_confluence(
         self,
         user_query: str,

From 7618662e706e7132448a8d32e3a14fabb7987d82 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 29 Dec 2025 20:38:26 +0200
Subject: [PATCH 38/39] refactor: rename GOOGLE_DRIVE_CONNECTOR to
 GOOGLE_DRIVE_FILE document type

---
 ...5_rename_google_drive_connector_to_file.py | 74 +++++++++++++++++++
 .../agents/new_chat/tools/knowledge_base.py   |  6 +-
 .../google_drive/content_extractor.py         |  2 +-
 surfsense_backend/app/db.py                   |  2 +-
 .../app/services/connector_service.py         |  6 +-
 .../google_drive_indexer.py                   |  2 +-
 .../document_processors/file_processors.py    |  2 +-
 .../contracts/types/document.types.ts         |  2 +-
 8 files changed, 85 insertions(+), 11 deletions(-)
 create mode 100644 surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py

diff --git a/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py b/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py
new file mode 100644
index 000000000..137274b16
--- /dev/null
+++ b/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py
@@ -0,0 +1,74 @@
+"""Rename GOOGLE_DRIVE_CONNECTOR document type to GOOGLE_DRIVE_FILE
+
+Revision ID: 55
+Revises: 54
+Create Date: 2025-12-29 12:00:00.000000
+
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "55"
+down_revision: str | None = "54"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    from sqlalchemy import text
+    
+    connection = op.get_bind()
+    
+    connection.execute(
+        text(
+            """
+            DO $$
+            BEGIN
+                IF NOT EXISTS (
+                    SELECT 1 FROM pg_type t
+                    JOIN pg_enum e ON t.oid = e.enumtypid
+                    WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_DRIVE_FILE'
+                ) THEN
+                    ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'GOOGLE_DRIVE_FILE';
+                END IF;
+            END
+            $$;
+            """
+        )
+    )
+    
+    connection.commit()
+    
+    connection.execute(
+        text(
+            """
+            UPDATE documents
+            SET document_type = 'GOOGLE_DRIVE_FILE'
+            WHERE document_type = 'GOOGLE_DRIVE_CONNECTOR';
+            """
+        )
+    )
+    
+    connection.commit()
+
+
+def downgrade() -> None:
+    from sqlalchemy import text
+    
+    connection = op.get_bind()
+    
+    connection.execute(
+        text(
+            """
+            UPDATE documents
+            SET document_type = 'GOOGLE_DRIVE_CONNECTOR'
+            WHERE document_type = 'GOOGLE_DRIVE_FILE';
+            """
+        )
+    )
+    
+    connection.commit()
+
diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
index 98c363513..ecaff6f2f 100644
--- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
@@ -36,7 +36,7 @@ _ALL_CONNECTORS: list[str] = [
     "CLICKUP_CONNECTOR",
     "GOOGLE_CALENDAR_CONNECTOR",
     "GOOGLE_GMAIL_CONNECTOR",
-    "GOOGLE_DRIVE_CONNECTOR",
+    "GOOGLE_DRIVE_FILE",
     "DISCORD_CONNECTOR",
     "AIRTABLE_CONNECTOR",
     "TAVILY_API",
@@ -426,7 +426,7 @@ async def search_knowledge_base_async(
                 )
                 all_documents.extend(chunks)
 
-            elif connector == "GOOGLE_DRIVE_CONNECTOR":
+            elif connector == "GOOGLE_DRIVE_FILE":
                 _, chunks = await connector_service.search_google_drive(
                     user_query=query,
                     search_space_id=search_space_id,
@@ -572,7 +572,7 @@ def create_search_knowledge_base_tool(
         - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
         - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
         - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
-        - GOOGLE_DRIVE_CONNECTOR: "Google Drive files and documents" (personal cloud storage and file management)
+        - GOOGLE_DRIVE_FILE: "Google Drive files and documents" (personal cloud storage and file management)
         - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
         - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization)
         - TAVILY_API: "Tavily search API results" (personalized search results)
diff --git a/surfsense_backend/app/connectors/google_drive/content_extractor.py b/surfsense_backend/app/connectors/google_drive/content_extractor.py
index f8c5d39d5..1246d9e43 100644
--- a/surfsense_backend/app/connectors/google_drive/content_extractor.py
+++ b/surfsense_backend/app/connectors/google_drive/content_extractor.py
@@ -84,7 +84,7 @@ async def download_and_process_file(
         from app.db import DocumentType
 
         connector_info = {
-            "type": DocumentType.GOOGLE_DRIVE_CONNECTOR,
+            "type": DocumentType.GOOGLE_DRIVE_FILE,
             "metadata": {
                 "google_drive_file_id": file_id,
                 "google_drive_file_name": file_name,
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index a6bc3b938..c761561b5 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -46,7 +46,7 @@ class DocumentType(str, Enum):
     CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
     GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
     GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
-    GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR"
+    GOOGLE_DRIVE_FILE = "GOOGLE_DRIVE_FILE"
     AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
     LUMA_CONNECTOR = "LUMA_CONNECTOR"
     ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py
index b3d970dae..cf0a83dc8 100644
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@@ -1834,7 +1834,7 @@ class ConnectorService:
         drive_docs = await self._combined_rrf_search(
             query_text=user_query,
             search_space_id=search_space_id,
-            document_type="GOOGLE_DRIVE_CONNECTOR",
+            document_type="GOOGLE_DRIVE_FILE",
             top_k=top_k,
             start_date=start_date,
             end_date=end_date,
@@ -1845,7 +1845,7 @@ class ConnectorService:
             return {
                 "id": 33,
                 "name": "Google Drive Files",
-                "type": "GOOGLE_DRIVE_CONNECTOR",
+                "type": "GOOGLE_DRIVE_FILE",
                 "sources": [],
             }, []
 
@@ -1902,7 +1902,7 @@ class ConnectorService:
         result_object = {
             "id": 33,  # Assign a unique ID for the Google Drive connector
             "name": "Google Drive Files",
-            "type": "GOOGLE_DRIVE_CONNECTOR",
+            "type": "GOOGLE_DRIVE_FILE",
             "sources": sources_list,
         }
 
diff --git a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
index cd862e372..5695c084d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/google_drive_indexer.py
@@ -396,7 +396,7 @@ async def _remove_document(
 ):
     """Remove a document that was deleted in Drive."""
     unique_identifier_hash = generate_unique_identifier_hash(
-        DocumentType.GOOGLE_DRIVE_CONNECTOR, file_id, search_space_id
+        DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
     )
 
     existing_document = await check_document_by_unique_identifier(
diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py
index cda4ec88b..6a01db6a9 100644
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@@ -473,7 +473,7 @@ async def process_file_in_background(
     session: AsyncSession,
     task_logger: TaskLoggingService,
     log_entry: Log,
-    connector: dict | None = None,  # Optional: {"type": "GOOGLE_DRIVE_CONNECTOR", "metadata": {...}}
+    connector: dict | None = None,  # Optional: {"type": "GOOGLE_DRIVE_FILE", "metadata": {...}}
 ):
     try:
         # Check if the file is a markdown or text file
diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts
index b2cdb79c3..94ff27940 100644
--- a/surfsense_web/contracts/types/document.types.ts
+++ b/surfsense_web/contracts/types/document.types.ts
@@ -15,7 +15,7 @@ export const documentTypeEnum = z.enum([
 	"CLICKUP_CONNECTOR",
 	"GOOGLE_CALENDAR_CONNECTOR",
 	"GOOGLE_GMAIL_CONNECTOR",
-	"GOOGLE_DRIVE_CONNECTOR",
+	"GOOGLE_DRIVE_FILE",
 	"AIRTABLE_CONNECTOR",
 	"LUMA_CONNECTOR",
 	"ELASTICSEARCH_CONNECTOR",

From c3054809ee8697e2b00678f02c3c656c81026509 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 29 Dec 2025 20:39:36 +0200
Subject: [PATCH 39/39] chore: update env example

---
 surfsense_backend/.env.example | 1 +
 1 file changed, 1 insertion(+)

diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index b7e580a66..15dcc76e1 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -37,6 +37,7 @@ GOOGLE_OAUTH_CLIENT_SECRET=GOCSV
 # Connector Specific Configs
 GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
 GOOGLE_GMAIL_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/gmail/connector/callback
+GOOGLE_DRIVE_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/drive/connector/callback
 
 # Airtable OAuth for Aitable Connector
 AIRTABLE_CLIENT_ID=your_airtable_client_id