mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
Merge pull request #640 from CREDO23/google-drive-connector
[Feat] Add Google drive connector
This commit is contained in:
commit
23870042f3
35 changed files with 2877 additions and 26 deletions
|
|
@ -37,6 +37,7 @@ GOOGLE_OAUTH_CLIENT_SECRET=GOCSV
|
|||
# Connector Specific Configs
|
||||
GOOGLE_CALENDAR_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/calendar/connector/callback
|
||||
GOOGLE_GMAIL_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/gmail/connector/callback
|
||||
GOOGLE_DRIVE_REDIRECT_URI=http://localhost:8000/api/v1/auth/google/drive/connector/callback
|
||||
|
||||
# Airtable OAuth for Aitable Connector
|
||||
AIRTABLE_CLIENT_ID=your_airtable_client_id
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
"""Add Google Drive connector enums
|
||||
|
||||
Revision ID: 54
|
||||
Revises: 53
|
||||
Create Date: 2025-12-28 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "54"
|
||||
down_revision: str | None = "53"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Safely add 'GOOGLE_DRIVE_CONNECTOR' to enum types if missing."""
|
||||
|
||||
# Add to searchsourceconnectortype enum
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_type t
|
||||
JOIN pg_enum e ON t.oid = e.enumtypid
|
||||
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'GOOGLE_DRIVE_CONNECTOR'
|
||||
) THEN
|
||||
ALTER TYPE searchsourceconnectortype ADD VALUE 'GOOGLE_DRIVE_CONNECTOR';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
# Add to documenttype enum
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_type t
|
||||
JOIN pg_enum e ON t.oid = e.enumtypid
|
||||
WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_DRIVE_CONNECTOR'
|
||||
) THEN
|
||||
ALTER TYPE documenttype ADD VALUE 'GOOGLE_DRIVE_CONNECTOR';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove 'GOOGLE_DRIVE_CONNECTOR' from enum types.
|
||||
|
||||
Note: PostgreSQL doesn't support removing enum values directly.
|
||||
This would require recreating the enum type, which is complex and risky.
|
||||
For now, we'll leave the enum values in place.
|
||||
|
||||
In a production environment with strict downgrade requirements, you would need to:
|
||||
1. Create new enum types without the value
|
||||
2. Convert all columns to use the new type
|
||||
3. Drop the old enum type
|
||||
4. Rename the new type to the old name
|
||||
|
||||
This is left as pass to avoid accidental data loss.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
"""Rename GOOGLE_DRIVE_CONNECTOR document type to GOOGLE_DRIVE_FILE
|
||||
|
||||
Revision ID: 55
|
||||
Revises: 54
|
||||
Create Date: 2025-12-29 12:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "55"
|
||||
down_revision: str | None = "54"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
from sqlalchemy import text
|
||||
|
||||
connection = op.get_bind()
|
||||
|
||||
connection.execute(
|
||||
text(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_type t
|
||||
JOIN pg_enum e ON t.oid = e.enumtypid
|
||||
WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_DRIVE_FILE'
|
||||
) THEN
|
||||
ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'GOOGLE_DRIVE_FILE';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
connection.commit()
|
||||
|
||||
connection.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE documents
|
||||
SET document_type = 'GOOGLE_DRIVE_FILE'
|
||||
WHERE document_type = 'GOOGLE_DRIVE_CONNECTOR';
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
connection.commit()
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
from sqlalchemy import text
|
||||
|
||||
connection = op.get_bind()
|
||||
|
||||
connection.execute(
|
||||
text(
|
||||
"""
|
||||
UPDATE documents
|
||||
SET document_type = 'GOOGLE_DRIVE_CONNECTOR'
|
||||
WHERE document_type = 'GOOGLE_DRIVE_FILE';
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
connection.commit()
|
||||
|
||||
|
|
@ -36,6 +36,7 @@ _ALL_CONNECTORS: list[str] = [
|
|||
"CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_FILE",
|
||||
"DISCORD_CONNECTOR",
|
||||
"AIRTABLE_CONNECTOR",
|
||||
"TAVILY_API",
|
||||
|
|
@ -425,6 +426,16 @@ async def search_knowledge_base_async(
|
|||
)
|
||||
all_documents.extend(chunks)
|
||||
|
||||
elif connector == "GOOGLE_DRIVE_FILE":
|
||||
_, chunks = await connector_service.search_google_drive(
|
||||
user_query=query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
)
|
||||
all_documents.extend(chunks)
|
||||
|
||||
elif connector == "CONFLUENCE_CONNECTOR":
|
||||
_, chunks = await connector_service.search_confluence(
|
||||
user_query=query,
|
||||
|
|
@ -561,6 +572,7 @@ def create_search_knowledge_base_tool(
|
|||
- CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
|
||||
- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
|
||||
- GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
|
||||
- GOOGLE_DRIVE_FILE: "Google Drive files and documents" (personal cloud storage and file management)
|
||||
- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
|
||||
- AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization)
|
||||
- TAVILY_API: "Tavily search API results" (personalized search results)
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ class Config:
|
|||
# Google Gmail redirect URI
|
||||
GOOGLE_GMAIL_REDIRECT_URI = os.getenv("GOOGLE_GMAIL_REDIRECT_URI")
|
||||
|
||||
# Google Drive redirect URI
|
||||
GOOGLE_DRIVE_REDIRECT_URI = os.getenv("GOOGLE_DRIVE_REDIRECT_URI")
|
||||
|
||||
# Airtable OAuth
|
||||
AIRTABLE_CLIENT_ID = os.getenv("AIRTABLE_CLIENT_ID")
|
||||
AIRTABLE_CLIENT_SECRET = os.getenv("AIRTABLE_CLIENT_SECRET")
|
||||
|
|
|
|||
20
surfsense_backend/app/connectors/google_drive/__init__.py
Normal file
20
surfsense_backend/app/connectors/google_drive/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
"""Google Drive Connector Module."""
|
||||
|
||||
from .change_tracker import categorize_change, fetch_all_changes, get_start_page_token
|
||||
from .client import GoogleDriveClient
|
||||
from .content_extractor import download_and_process_file
|
||||
from .credentials import get_valid_credentials, validate_credentials
|
||||
from .folder_manager import get_files_in_folder, list_folder_contents
|
||||
|
||||
__all__ = [
|
||||
"GoogleDriveClient",
|
||||
"get_valid_credentials",
|
||||
"validate_credentials",
|
||||
"download_and_process_file",
|
||||
"get_files_in_folder",
|
||||
"list_folder_contents",
|
||||
"get_start_page_token",
|
||||
"fetch_all_changes",
|
||||
"categorize_change",
|
||||
]
|
||||
|
||||
205
surfsense_backend/app/connectors/google_drive/change_tracker.py
Normal file
205
surfsense_backend/app/connectors/google_drive/change_tracker.py
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
"""Change tracking for Google Drive delta sync."""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from .client import GoogleDriveClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def get_start_page_token(
|
||||
client: GoogleDriveClient,
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""
|
||||
Get the starting page token for change tracking.
|
||||
|
||||
This token represents the current state and is used for future delta syncs.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
|
||||
Returns:
|
||||
Tuple of (start_page_token, error message)
|
||||
"""
|
||||
try:
|
||||
service = await client.get_service()
|
||||
response = service.changes().getStartPageToken(supportsAllDrives=True).execute()
|
||||
token = response.get("startPageToken")
|
||||
|
||||
logger.info(f"Got start page token: {token}")
|
||||
return token, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting start page token: {e!s}", exc_info=True)
|
||||
return None, f"Error getting start page token: {e!s}"
|
||||
|
||||
|
||||
async def get_changes(
|
||||
client: GoogleDriveClient,
|
||||
page_token: str,
|
||||
folder_id: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||
"""
|
||||
Get list of changes since the given page token.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
page_token: Page token from previous sync
|
||||
folder_id: Optional folder ID to filter changes
|
||||
|
||||
Returns:
|
||||
Tuple of (changes list, new_page_token, error message)
|
||||
"""
|
||||
try:
|
||||
service = await client.get_service()
|
||||
|
||||
params = {
|
||||
"pageToken": page_token,
|
||||
"pageSize": 100,
|
||||
"fields": "nextPageToken, newStartPageToken, changes(fileId, removed, file(id, name, mimeType, modifiedTime, size, webViewLink, parents, trashed))",
|
||||
"supportsAllDrives": True,
|
||||
"includeItemsFromAllDrives": True,
|
||||
}
|
||||
|
||||
response = service.changes().list(**params).execute()
|
||||
|
||||
changes = response.get("changes", [])
|
||||
next_token = response.get("nextPageToken")
|
||||
new_start_token = response.get("newStartPageToken")
|
||||
|
||||
# Use new start token if this is the last page
|
||||
token_to_return = new_start_token if new_start_token else next_token
|
||||
|
||||
# Filter changes by folder if specified
|
||||
if folder_id:
|
||||
changes = await _filter_changes_by_folder(client, changes, folder_id)
|
||||
|
||||
logger.info(f"Got {len(changes)} changes, next token: {token_to_return}")
|
||||
return changes, token_to_return, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting changes: {e!s}", exc_info=True)
|
||||
return [], None, f"Error getting changes: {e!s}"
|
||||
|
||||
|
||||
async def _filter_changes_by_folder(
|
||||
client: GoogleDriveClient,
|
||||
changes: list[dict[str, Any]],
|
||||
folder_id: str,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Filter changes to only include files within the specified folder.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
changes: List of changes from API
|
||||
folder_id: Folder ID to filter by
|
||||
|
||||
Returns:
|
||||
Filtered list of changes
|
||||
"""
|
||||
filtered = []
|
||||
|
||||
for change in changes:
|
||||
file = change.get("file")
|
||||
if not file:
|
||||
filtered.append(change)
|
||||
continue
|
||||
|
||||
# Check if file is in the folder (or subfolder)
|
||||
parents = file.get("parents", [])
|
||||
if folder_id in parents:
|
||||
filtered.append(change)
|
||||
else:
|
||||
# Check if any parent is a descendant of folder_id
|
||||
# This is a simplified check - full implementation would traverse hierarchy
|
||||
# For now, we'll include it and let indexer validate
|
||||
filtered.append(change)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def categorize_change(change: dict[str, Any]) -> str:
|
||||
"""
|
||||
Categorize a change event.
|
||||
|
||||
Args:
|
||||
change: Change event from Drive API
|
||||
|
||||
Returns:
|
||||
Category: 'removed', 'trashed', 'modified', 'new'
|
||||
"""
|
||||
if change.get("removed"):
|
||||
return "removed"
|
||||
|
||||
file = change.get("file")
|
||||
if not file:
|
||||
return "removed"
|
||||
|
||||
if file.get("trashed"):
|
||||
return "trashed"
|
||||
|
||||
created_time = file.get("createdTime")
|
||||
modified_time = file.get("modifiedTime")
|
||||
|
||||
if created_time and modified_time:
|
||||
try:
|
||||
created = datetime.fromisoformat(created_time.replace("Z", "+00:00"))
|
||||
modified = datetime.fromisoformat(modified_time.replace("Z", "+00:00"))
|
||||
|
||||
# If created and modified times are very close, it's likely a new file
|
||||
time_diff = abs((modified - created).total_seconds())
|
||||
if time_diff < 60: # Within 1 minute
|
||||
return "new"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "modified"
|
||||
|
||||
|
||||
async def fetch_all_changes(
|
||||
client: GoogleDriveClient,
|
||||
start_token: str,
|
||||
folder_id: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||
"""
|
||||
Fetch all changes from start token, handling pagination.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
start_token: Starting page token
|
||||
folder_id: Optional folder ID to filter changes
|
||||
|
||||
Returns:
|
||||
Tuple of (all changes, final_page_token, error message)
|
||||
"""
|
||||
all_changes = []
|
||||
current_token = start_token
|
||||
error = None
|
||||
|
||||
try:
|
||||
while current_token:
|
||||
changes, next_token, err = await get_changes(
|
||||
client, current_token, folder_id
|
||||
)
|
||||
|
||||
if err:
|
||||
error = err
|
||||
break
|
||||
|
||||
all_changes.extend(changes)
|
||||
|
||||
if not next_token or next_token == current_token:
|
||||
break
|
||||
|
||||
current_token = next_token
|
||||
|
||||
logger.info(f"Fetched total of {len(all_changes)} changes")
|
||||
return all_changes, current_token, error
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching all changes: {e!s}", exc_info=True)
|
||||
return all_changes, current_token, f"Error fetching all changes: {e!s}"
|
||||
|
||||
183
surfsense_backend/app/connectors/google_drive/client.py
Normal file
183
surfsense_backend/app/connectors/google_drive/client.py
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
"""Google Drive API client."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from google.oauth2.credentials import Credentials
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from .credentials import get_valid_credentials
|
||||
|
||||
|
||||
class GoogleDriveClient:
|
||||
"""Client for Google Drive API operations."""
|
||||
|
||||
def __init__(self, session: AsyncSession, connector_id: int):
|
||||
"""
|
||||
Initialize Google Drive client.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Drive connector
|
||||
"""
|
||||
self.session = session
|
||||
self.connector_id = connector_id
|
||||
self.service = None
|
||||
|
||||
async def get_service(self):
|
||||
"""
|
||||
Get or create the Drive service instance.
|
||||
|
||||
Returns:
|
||||
Google Drive service instance
|
||||
|
||||
Raises:
|
||||
Exception: If service creation fails
|
||||
"""
|
||||
if self.service:
|
||||
return self.service
|
||||
|
||||
try:
|
||||
credentials = await get_valid_credentials(self.session, self.connector_id)
|
||||
self.service = build("drive", "v3", credentials=credentials)
|
||||
return self.service
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to create Google Drive service: {e!s}") from e
|
||||
|
||||
async def list_files(
|
||||
self,
|
||||
query: str = "",
|
||||
fields: str = "nextPageToken, files(id, name, mimeType, modifiedTime, size, webViewLink, parents, owners, createdTime, description)",
|
||||
page_size: int = 100,
|
||||
page_token: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||
"""
|
||||
List files from Google Drive with pagination.
|
||||
|
||||
Args:
|
||||
query: Search query (e.g., "mimeType != 'application/vnd.google-apps.folder'")
|
||||
fields: Fields to retrieve
|
||||
page_size: Number of files per page (max 1000)
|
||||
page_token: Token for next page
|
||||
|
||||
Returns:
|
||||
Tuple of (files list, next_page_token, error message)
|
||||
"""
|
||||
try:
|
||||
service = await self.get_service()
|
||||
|
||||
params = {
|
||||
"pageSize": min(page_size, 1000),
|
||||
"fields": fields,
|
||||
"supportsAllDrives": True,
|
||||
"includeItemsFromAllDrives": True,
|
||||
}
|
||||
|
||||
if query:
|
||||
params["q"] = query
|
||||
if page_token:
|
||||
params["pageToken"] = page_token
|
||||
|
||||
result = service.files().list(**params).execute()
|
||||
|
||||
files = result.get("files", [])
|
||||
next_token = result.get("nextPageToken")
|
||||
|
||||
return files, next_token, None
|
||||
|
||||
except HttpError as e:
|
||||
error_msg = f"HTTP error listing files: {e.resp.status} - {e.error_details}"
|
||||
return [], None, error_msg
|
||||
except Exception as e:
|
||||
return [], None, f"Error listing files: {e!s}"
|
||||
|
||||
async def get_file_metadata(
|
||||
self, file_id: str, fields: str = "*"
|
||||
) -> tuple[dict[str, Any] | None, str | None]:
|
||||
"""
|
||||
Get metadata for a specific file.
|
||||
|
||||
Args:
|
||||
file_id: ID of the file
|
||||
fields: Fields to retrieve
|
||||
|
||||
Returns:
|
||||
Tuple of (file metadata, error message)
|
||||
"""
|
||||
try:
|
||||
service = await self.get_service()
|
||||
file = service.files().get(fileId=file_id, fields=fields, supportsAllDrives=True).execute()
|
||||
return file, None
|
||||
except HttpError as e:
|
||||
return None, f"HTTP error getting file metadata: {e.resp.status}"
|
||||
except Exception as e:
|
||||
return None, f"Error getting file metadata: {e!s}"
|
||||
|
||||
async def download_file(
|
||||
self, file_id: str
|
||||
) -> tuple[bytes | None, str | None]:
|
||||
"""
|
||||
Download binary file content.
|
||||
|
||||
Args:
|
||||
file_id: ID of the file to download
|
||||
|
||||
Returns:
|
||||
Tuple of (file content bytes, error message)
|
||||
"""
|
||||
try:
|
||||
service = await self.get_service()
|
||||
request = service.files().get_media(fileId=file_id)
|
||||
|
||||
import io
|
||||
|
||||
fh = io.BytesIO()
|
||||
from googleapiclient.http import MediaIoBaseDownload
|
||||
|
||||
downloader = MediaIoBaseDownload(fh, request)
|
||||
|
||||
done = False
|
||||
while not done:
|
||||
_, done = downloader.next_chunk()
|
||||
|
||||
return fh.getvalue(), None
|
||||
|
||||
except HttpError as e:
|
||||
return None, f"HTTP error downloading file: {e.resp.status}"
|
||||
except Exception as e:
|
||||
return None, f"Error downloading file: {e!s}"
|
||||
|
||||
async def export_google_file(
|
||||
self, file_id: str, mime_type: str
|
||||
) -> tuple[bytes | None, str | None]:
|
||||
"""
|
||||
Export Google Workspace file to specified format.
|
||||
|
||||
Args:
|
||||
file_id: ID of the Google file
|
||||
mime_type: Target MIME type (e.g., 'application/pdf', 'text/plain')
|
||||
|
||||
Returns:
|
||||
Tuple of (exported content as bytes, error message)
|
||||
"""
|
||||
try:
|
||||
service = await self.get_service()
|
||||
content = (
|
||||
service.files()
|
||||
.export(fileId=file_id, mimeType=mime_type)
|
||||
.execute()
|
||||
)
|
||||
|
||||
# Content is already bytes from the API
|
||||
# Keep as bytes to support both text and binary formats (like PDF)
|
||||
if not isinstance(content, bytes):
|
||||
content = content.encode("utf-8")
|
||||
|
||||
return content, None
|
||||
|
||||
except HttpError as e:
|
||||
return None, f"HTTP error exporting file: {e.resp.status}"
|
||||
except Exception as e:
|
||||
return None, f"Error exporting file: {e!s}"
|
||||
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
"""Content extraction for Google Drive files."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import Log
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
|
||||
from .client import GoogleDriveClient
|
||||
from .file_types import get_export_mime_type, is_google_workspace_file, should_skip_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def download_and_process_file(
|
||||
client: GoogleDriveClient,
|
||||
file: dict[str, Any],
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
session: AsyncSession,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: Log,
|
||||
) -> tuple[Any, str | None, dict[str, Any] | None]:
|
||||
"""
|
||||
Download Google Drive file and process using Surfsense file processors.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
file: File metadata from Drive API
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
session: Database session
|
||||
task_logger: Task logging service
|
||||
log_entry: Log entry for tracking
|
||||
|
||||
Returns:
|
||||
Tuple of (Document object if successful, error message if failed, file metadata dict)
|
||||
"""
|
||||
file_id = file.get("id")
|
||||
file_name = file.get("name", "Unknown")
|
||||
mime_type = file.get("mimeType", "")
|
||||
|
||||
# Skip folders and shortcuts
|
||||
if should_skip_file(mime_type):
|
||||
return None, f"Skipping {mime_type}", None
|
||||
|
||||
logger.info(f"Downloading file: {file_name} ({mime_type})")
|
||||
|
||||
temp_file_path = None
|
||||
try:
|
||||
# Step 1: Download or export the file
|
||||
if is_google_workspace_file(mime_type):
|
||||
# Google Workspace files need export (as PDF to preserve formatting & images)
|
||||
export_mime = get_export_mime_type(mime_type)
|
||||
if not export_mime:
|
||||
return None, f"Cannot export Google Workspace type: {mime_type}"
|
||||
|
||||
logger.info(f"Exporting Google Workspace file as {export_mime}")
|
||||
content_bytes, error = await client.export_google_file(file_id, export_mime)
|
||||
if error:
|
||||
return None, error
|
||||
|
||||
extension = ".pdf" if export_mime == "application/pdf" else ".txt"
|
||||
else:
|
||||
content_bytes, error = await client.download_file(file_id)
|
||||
if error:
|
||||
return None, error
|
||||
|
||||
# Preserve original file extension
|
||||
extension = Path(file_name).suffix or ".bin"
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp_file:
|
||||
tmp_file.write(content_bytes)
|
||||
temp_file_path = tmp_file.name
|
||||
|
||||
from app.tasks.document_processors.file_processors import (
|
||||
process_file_in_background,
|
||||
)
|
||||
from app.db import DocumentType
|
||||
|
||||
connector_info = {
|
||||
"type": DocumentType.GOOGLE_DRIVE_FILE,
|
||||
"metadata": {
|
||||
"google_drive_file_id": file_id,
|
||||
"google_drive_file_name": file_name,
|
||||
"google_drive_mime_type": mime_type,
|
||||
"source_connector": "google_drive",
|
||||
},
|
||||
}
|
||||
|
||||
# Add additional Drive metadata if available
|
||||
if "modifiedTime" in file:
|
||||
connector_info["metadata"]["modified_time"] = file["modifiedTime"]
|
||||
if "createdTime" in file:
|
||||
connector_info["metadata"]["created_time"] = file["createdTime"]
|
||||
if "size" in file:
|
||||
connector_info["metadata"]["file_size"] = file["size"]
|
||||
if "webViewLink" in file:
|
||||
connector_info["metadata"]["web_view_link"] = file["webViewLink"]
|
||||
|
||||
if is_google_workspace_file(mime_type):
|
||||
connector_info["metadata"]["exported_as"] = "pdf"
|
||||
connector_info["metadata"]["original_workspace_type"] = mime_type.split(".")[-1]
|
||||
|
||||
logger.info(f"Processing {file_name} with Surfsense's file processor")
|
||||
await process_file_in_background(
|
||||
file_path=temp_file_path,
|
||||
filename=file_name,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
session=session,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
connector=connector_info,
|
||||
)
|
||||
|
||||
return None, None, connector_info["metadata"]
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process {file_name}: {e!s}")
|
||||
return None, str(e), None
|
||||
|
||||
finally:
|
||||
# Cleanup temp file (if process_file_in_background didn't already delete it)
|
||||
if temp_file_path and os.path.exists(temp_file_path):
|
||||
try:
|
||||
os.unlink(temp_file_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not delete temp file {temp_file_path}: {e}")
|
||||
|
||||
|
||||
98
surfsense_backend/app/connectors/google_drive/credentials.py
Normal file
98
surfsense_backend/app/connectors/google_drive/credentials.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
"""Google Drive OAuth credential management."""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
||||
|
||||
|
||||
async def get_valid_credentials(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
) -> Credentials:
|
||||
"""
|
||||
Get valid Google OAuth credentials, refreshing if needed.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: Connector ID
|
||||
|
||||
Returns:
|
||||
Valid Google OAuth credentials
|
||||
|
||||
Raises:
|
||||
ValueError: If credentials are missing or invalid
|
||||
Exception: If token refresh fails
|
||||
"""
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
)
|
||||
connector = result.scalars().first()
|
||||
|
||||
if not connector:
|
||||
raise ValueError(f"Connector {connector_id} not found")
|
||||
|
||||
config_data = connector.config
|
||||
exp = config_data.get("expiry", "").replace("Z", "")
|
||||
|
||||
if not all(
|
||||
[
|
||||
config_data.get("client_id"),
|
||||
config_data.get("client_secret"),
|
||||
config_data.get("refresh_token"),
|
||||
]
|
||||
):
|
||||
raise ValueError(
|
||||
"Google OAuth credentials (client_id, client_secret, refresh_token) must be set"
|
||||
)
|
||||
|
||||
credentials = Credentials(
|
||||
token=config_data.get("token"),
|
||||
refresh_token=config_data.get("refresh_token"),
|
||||
token_uri=config_data.get("token_uri"),
|
||||
client_id=config_data.get("client_id"),
|
||||
client_secret=config_data.get("client_secret"),
|
||||
scopes=config_data.get("scopes", []),
|
||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||
)
|
||||
|
||||
if credentials.expired or not credentials.valid:
|
||||
try:
|
||||
credentials.refresh(Request())
|
||||
|
||||
connector.config = json.loads(credentials.to_json())
|
||||
flag_modified(connector, "config")
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to refresh Google OAuth credentials: {e!s}") from e
|
||||
|
||||
return credentials
|
||||
|
||||
|
||||
def validate_credentials(credentials: Credentials) -> bool:
|
||||
"""
|
||||
Validate that credentials have required fields.
|
||||
|
||||
Args:
|
||||
credentials: Google OAuth credentials
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise
|
||||
"""
|
||||
return all(
|
||||
[
|
||||
credentials.client_id,
|
||||
credentials.client_secret,
|
||||
credentials.refresh_token,
|
||||
]
|
||||
)
|
||||
|
||||
30
surfsense_backend/app/connectors/google_drive/file_types.py
Normal file
30
surfsense_backend/app/connectors/google_drive/file_types.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""File type handlers for Google Drive."""
|
||||
|
||||
GOOGLE_DOC = "application/vnd.google-apps.document"
|
||||
GOOGLE_SHEET = "application/vnd.google-apps.spreadsheet"
|
||||
GOOGLE_SLIDE = "application/vnd.google-apps.presentation"
|
||||
GOOGLE_FOLDER = "application/vnd.google-apps.folder"
|
||||
GOOGLE_SHORTCUT = "application/vnd.google-apps.shortcut"
|
||||
|
||||
EXPORT_FORMATS = {
|
||||
GOOGLE_DOC: "application/pdf",
|
||||
GOOGLE_SHEET: "application/pdf",
|
||||
GOOGLE_SLIDE: "application/pdf",
|
||||
}
|
||||
|
||||
|
||||
def is_google_workspace_file(mime_type: str) -> bool:
|
||||
"""Check if file is a Google Workspace file that needs export."""
|
||||
return mime_type.startswith("application/vnd.google-apps")
|
||||
|
||||
|
||||
def should_skip_file(mime_type: str) -> bool:
|
||||
"""Check if file should be skipped (folders, shortcuts, etc)."""
|
||||
return mime_type in [GOOGLE_FOLDER, GOOGLE_SHORTCUT]
|
||||
|
||||
|
||||
def get_export_mime_type(mime_type: str) -> str | None:
|
||||
"""Get export MIME type for Google Workspace files."""
|
||||
return EXPORT_FORMATS.get(mime_type)
|
||||
|
||||
|
||||
230
surfsense_backend/app/connectors/google_drive/folder_manager.py
Normal file
230
surfsense_backend/app/connectors/google_drive/folder_manager.py
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
"""Folder management for Google Drive."""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from .client import GoogleDriveClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def list_folders(
|
||||
client: GoogleDriveClient,
|
||||
parent_id: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], str | None]:
|
||||
"""
|
||||
List folders in Google Drive.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
parent_id: Parent folder ID (None for root)
|
||||
|
||||
Returns:
|
||||
Tuple of (folders list, error message)
|
||||
"""
|
||||
try:
|
||||
# Build query to get only folders
|
||||
query_parts = ["mimeType = 'application/vnd.google-apps.folder'", "trashed = false"]
|
||||
|
||||
if parent_id:
|
||||
query_parts.append(f"'{parent_id}' in parents")
|
||||
|
||||
query = " and ".join(query_parts)
|
||||
|
||||
folders, _, error = await client.list_files(
|
||||
query=query,
|
||||
fields="files(id, name, parents, createdTime, modifiedTime)",
|
||||
page_size=100,
|
||||
)
|
||||
|
||||
if error:
|
||||
return [], error
|
||||
|
||||
return folders, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing folders: {e!s}", exc_info=True)
|
||||
return [], f"Error listing folders: {e!s}"
|
||||
|
||||
|
||||
async def get_folder_hierarchy(
|
||||
client: GoogleDriveClient,
|
||||
folder_id: str,
|
||||
) -> tuple[list[dict[str, str]], str | None]:
|
||||
"""
|
||||
Get the full path hierarchy for a folder.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
folder_id: Folder ID to get hierarchy for
|
||||
|
||||
Returns:
|
||||
Tuple of (hierarchy list [{'id': ..., 'name': ...}], error message)
|
||||
"""
|
||||
try:
|
||||
hierarchy = []
|
||||
current_id = folder_id
|
||||
|
||||
# Traverse up to root
|
||||
while current_id:
|
||||
file, error = await client.get_file_metadata(
|
||||
current_id,
|
||||
fields="id, name, parents, mimeType"
|
||||
)
|
||||
|
||||
if error:
|
||||
return [], error
|
||||
|
||||
if not file:
|
||||
break
|
||||
|
||||
hierarchy.insert(0, {"id": file["id"], "name": file["name"]})
|
||||
|
||||
# Get parent
|
||||
parents = file.get("parents", [])
|
||||
current_id = parents[0] if parents else None
|
||||
|
||||
return hierarchy, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting folder hierarchy: {e!s}", exc_info=True)
|
||||
return [], f"Error getting folder hierarchy: {e!s}"
|
||||
|
||||
|
||||
async def get_files_in_folder(
|
||||
client: GoogleDriveClient,
|
||||
folder_id: str,
|
||||
include_subfolders: bool = True,
|
||||
page_token: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], str | None, str | None]:
|
||||
"""
|
||||
Get all indexable files in a folder.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
folder_id: Folder ID to search in
|
||||
include_subfolders: Whether to include subfolders
|
||||
page_token: Pagination token
|
||||
|
||||
Returns:
|
||||
Tuple of (files list, next_page_token, error message)
|
||||
"""
|
||||
try:
|
||||
# Build query
|
||||
query_parts = [
|
||||
f"'{folder_id}' in parents",
|
||||
"trashed = false",
|
||||
"mimeType != 'application/vnd.google-apps.shortcut'", # Skip shortcuts
|
||||
]
|
||||
|
||||
if not include_subfolders:
|
||||
query_parts.append("mimeType != 'application/vnd.google-apps.folder'")
|
||||
|
||||
query = " and ".join(query_parts)
|
||||
|
||||
files, next_token, error = await client.list_files(
|
||||
query=query,
|
||||
page_size=100,
|
||||
page_token=page_token,
|
||||
)
|
||||
|
||||
if error:
|
||||
return [], None, error
|
||||
|
||||
return files, next_token, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting files in folder: {e!s}", exc_info=True)
|
||||
return [], None, f"Error getting files in folder: {e!s}"
|
||||
|
||||
|
||||
def format_folder_path(hierarchy: list[dict[str, str]]) -> str:
|
||||
"""
|
||||
Format folder hierarchy as a path string.
|
||||
|
||||
Args:
|
||||
hierarchy: List of folder dicts with 'id' and 'name'
|
||||
|
||||
Returns:
|
||||
Formatted path (e.g., "My Drive / Projects / Documents")
|
||||
"""
|
||||
if not hierarchy:
|
||||
return "My Drive"
|
||||
|
||||
folder_names = [folder["name"] for folder in hierarchy]
|
||||
return " / ".join(folder_names)
|
||||
|
||||
|
||||
async def list_folder_contents(
|
||||
client: GoogleDriveClient,
|
||||
parent_id: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], str | None]:
|
||||
"""
|
||||
List folders and files in a Google Drive folder with pagination support.
|
||||
|
||||
Args:
|
||||
client: GoogleDriveClient instance
|
||||
parent_id: Parent folder ID (None for root)
|
||||
|
||||
Returns:
|
||||
Tuple of (items list with folders and files, error message)
|
||||
"""
|
||||
try:
|
||||
# Build query to get folders and files (exclude shortcuts)
|
||||
query_parts = [
|
||||
"trashed = false",
|
||||
"mimeType != 'application/vnd.google-apps.shortcut'",
|
||||
]
|
||||
|
||||
# For root, we need to explicitly query for items in 'root'
|
||||
# For subfolders, query for items with that parent
|
||||
if parent_id:
|
||||
query_parts.append(f"'{parent_id}' in parents")
|
||||
else:
|
||||
# Query for root-level items
|
||||
query_parts.append("'root' in parents")
|
||||
|
||||
query = " and ".join(query_parts)
|
||||
|
||||
# Fetch all items with pagination (max 1000 per page)
|
||||
all_items = []
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
items, next_token, error = await client.list_files(
|
||||
query=query,
|
||||
fields="files(id, name, mimeType, parents, createdTime, modifiedTime, size, webViewLink, iconLink)",
|
||||
page_size=1000, # Max allowed by Google Drive API
|
||||
page_token=page_token,
|
||||
)
|
||||
|
||||
if error:
|
||||
return [], error
|
||||
|
||||
all_items.extend(items)
|
||||
|
||||
if not next_token:
|
||||
break
|
||||
|
||||
page_token = next_token
|
||||
|
||||
for item in all_items:
|
||||
item["isFolder"] = item["mimeType"] == "application/vnd.google-apps.folder"
|
||||
|
||||
all_items.sort(key=lambda x: (not x["isFolder"], x["name"].lower()))
|
||||
|
||||
folder_count = sum(1 for item in all_items if item["isFolder"])
|
||||
file_count = len(all_items) - folder_count
|
||||
|
||||
logger.info(
|
||||
f"Listed {len(all_items)} items ({folder_count} folders, {file_count} files) "
|
||||
+ (f"in folder {parent_id}" if parent_id else "in root (My Drive)")
|
||||
)
|
||||
|
||||
return all_items, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing folder contents: {e!s}", exc_info=True)
|
||||
return [], f"Error listing folder contents: {e!s}"
|
||||
|
||||
|
||||
|
|
@ -46,6 +46,7 @@ class DocumentType(str, Enum):
|
|||
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
|
||||
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
|
||||
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
|
||||
GOOGLE_DRIVE_FILE = "GOOGLE_DRIVE_FILE"
|
||||
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
|
||||
LUMA_CONNECTOR = "LUMA_CONNECTOR"
|
||||
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
|
||||
|
|
@ -69,6 +70,7 @@ class SearchSourceConnectorType(str, Enum):
|
|||
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR"
|
||||
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR"
|
||||
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR"
|
||||
GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR"
|
||||
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
|
||||
LUMA_CONNECTOR = "LUMA_CONNECTOR"
|
||||
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ from .google_calendar_add_connector_route import (
|
|||
from .google_gmail_add_connector_route import (
|
||||
router as google_gmail_add_connector_router,
|
||||
)
|
||||
from .google_drive_add_connector_route import (
|
||||
router as google_drive_add_connector_router,
|
||||
)
|
||||
from .logs_routes import router as logs_router
|
||||
from .luma_add_connector_route import router as luma_add_connector_router
|
||||
from .new_chat_routes import router as new_chat_router
|
||||
|
|
@ -33,6 +36,7 @@ router.include_router(podcasts_router) # Podcast task status and audio
|
|||
router.include_router(search_source_connectors_router)
|
||||
router.include_router(google_calendar_add_connector_router)
|
||||
router.include_router(google_gmail_add_connector_router)
|
||||
router.include_router(google_drive_add_connector_router)
|
||||
router.include_router(airtable_add_connector_router)
|
||||
router.include_router(luma_add_connector_router)
|
||||
router.include_router(new_llm_config_router) # LLM configs with prompt configuration
|
||||
|
|
|
|||
315
surfsense_backend/app/routes/google_drive_add_connector_route.py
Normal file
315
surfsense_backend/app/routes/google_drive_add_connector_route.py
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
"""
|
||||
Google Drive Connector OAuth Routes.
|
||||
|
||||
Handles OAuth 2.0 authentication flow for Google Drive connector.
|
||||
Folder selection happens at index time on the manage connector page.
|
||||
|
||||
Endpoints:
|
||||
- GET /auth/google/drive/connector/add - Initiate OAuth
|
||||
- GET /auth/google/drive/connector/callback - Handle OAuth callback
|
||||
- GET /connectors/{connector_id}/google-drive/folders - List user's folders (for index-time selection)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.config import config
|
||||
from app.connectors.google_drive import (
|
||||
GoogleDriveClient,
|
||||
get_start_page_token,
|
||||
get_valid_credentials,
|
||||
list_folder_contents,
|
||||
)
|
||||
from app.connectors.google_drive.folder_manager import list_folders
|
||||
from app.db import (
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
|
||||
# Relax token scope validation for Google OAuth
|
||||
os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter()
|
||||
|
||||
# Google Drive OAuth scopes
|
||||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/drive.readonly", # Read-only access to Drive
|
||||
"https://www.googleapis.com/auth/userinfo.email", # User email
|
||||
"https://www.googleapis.com/auth/userinfo.profile", # User profile
|
||||
"openid",
|
||||
]
|
||||
|
||||
|
||||
def get_google_flow():
|
||||
"""Create and return a Google OAuth flow for Drive API."""
|
||||
try:
|
||||
return Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": config.GOOGLE_OAUTH_CLIENT_ID,
|
||||
"client_secret": config.GOOGLE_OAUTH_CLIENT_SECRET,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [config.GOOGLE_DRIVE_REDIRECT_URI],
|
||||
}
|
||||
},
|
||||
scopes=SCOPES,
|
||||
redirect_uri=config.GOOGLE_DRIVE_REDIRECT_URI,
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to create Google OAuth flow: {e!s}"
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/auth/google/drive/connector/add")
|
||||
async def connect_drive(space_id: int, user: User = Depends(current_active_user)):
|
||||
"""
|
||||
Initiate Google Drive OAuth flow.
|
||||
|
||||
Query params:
|
||||
space_id: Search space ID to add connector to
|
||||
|
||||
Returns:
|
||||
JSON with auth_url to redirect user to Google authorization
|
||||
"""
|
||||
try:
|
||||
if not space_id:
|
||||
raise HTTPException(status_code=400, detail="space_id is required")
|
||||
|
||||
flow = get_google_flow()
|
||||
|
||||
# Encode space_id and user_id in state parameter
|
||||
state_payload = json.dumps(
|
||||
{
|
||||
"space_id": space_id,
|
||||
"user_id": str(user.id),
|
||||
}
|
||||
)
|
||||
state_encoded = base64.urlsafe_b64encode(state_payload.encode()).decode()
|
||||
|
||||
# Generate authorization URL
|
||||
auth_url, _ = flow.authorization_url(
|
||||
access_type="offline", # Get refresh token
|
||||
prompt="consent", # Force consent screen to get refresh token
|
||||
include_granted_scopes="true",
|
||||
state=state_encoded,
|
||||
)
|
||||
|
||||
logger.info(f"Initiating Google Drive OAuth for user {user.id}, space {space_id}")
|
||||
return {"auth_url": auth_url}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initiate Google Drive OAuth: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to initiate Google OAuth: {e!s}"
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/auth/google/drive/connector/callback")
|
||||
async def drive_callback(
|
||||
request: Request,
|
||||
code: str,
|
||||
state: str,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
"""
|
||||
Handle Google Drive OAuth callback.
|
||||
|
||||
Query params:
|
||||
code: Authorization code from Google
|
||||
state: Encoded state with space_id and user_id
|
||||
|
||||
Returns:
|
||||
Redirect to frontend success page
|
||||
"""
|
||||
try:
|
||||
# Decode and parse state
|
||||
decoded_state = base64.urlsafe_b64decode(state.encode()).decode()
|
||||
data = json.loads(decoded_state)
|
||||
|
||||
user_id = UUID(data["user_id"])
|
||||
space_id = data["space_id"]
|
||||
|
||||
logger.info(f"Processing Google Drive callback for user {user_id}, space {space_id}")
|
||||
|
||||
# Exchange authorization code for tokens
|
||||
flow = get_google_flow()
|
||||
flow.fetch_token(code=code)
|
||||
|
||||
creds = flow.credentials
|
||||
creds_dict = json.loads(creds.to_json())
|
||||
|
||||
# Check if connector already exists for this space/user
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.search_space_id == space_id,
|
||||
SearchSourceConnector.user_id == user_id,
|
||||
SearchSourceConnector.connector_type
|
||||
== SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
|
||||
)
|
||||
)
|
||||
existing_connector = result.scalars().first()
|
||||
|
||||
if existing_connector:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="A GOOGLE_DRIVE_CONNECTOR already exists in this search space. Each search space can have only one connector of each type per user.",
|
||||
)
|
||||
|
||||
# Create new connector (NO folder selection here - happens at index time)
|
||||
db_connector = SearchSourceConnector(
|
||||
name="Google Drive Connector",
|
||||
connector_type=SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
|
||||
config={
|
||||
**creds_dict,
|
||||
"start_page_token": None, # Will be set on first index
|
||||
},
|
||||
search_space_id=space_id,
|
||||
user_id=user_id,
|
||||
is_indexable=True,
|
||||
)
|
||||
|
||||
session.add(db_connector)
|
||||
await session.commit()
|
||||
await session.refresh(db_connector)
|
||||
|
||||
# Get initial start page token for delta sync
|
||||
try:
|
||||
drive_client = GoogleDriveClient(session, db_connector.id)
|
||||
start_token, token_error = await get_start_page_token(drive_client)
|
||||
|
||||
if start_token and not token_error:
|
||||
db_connector.config["start_page_token"] = start_token
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
flag_modified(db_connector, "config")
|
||||
await session.commit()
|
||||
logger.info(f"Set initial start page token for connector {db_connector.id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get initial start page token: {e!s}")
|
||||
|
||||
logger.info(
|
||||
f"Successfully created Google Drive connector {db_connector.id} for user {user_id}"
|
||||
)
|
||||
|
||||
# Redirect to connectors management page (not to folder selection)
|
||||
return RedirectResponse(
|
||||
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors?success=google-drive-connected"
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
await session.rollback()
|
||||
raise
|
||||
except ValidationError as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Validation error: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Invalid connector configuration: {e!s}"
|
||||
) from e
|
||||
except IntegrityError as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Database integrity error: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="A connector with this configuration already exists.",
|
||||
) from e
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Unexpected error in Drive callback: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to complete Google OAuth: {e!s}"
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/connectors/{connector_id}/google-drive/folders")
|
||||
async def list_google_drive_folders(
|
||||
connector_id: int,
|
||||
parent_id: str | None = None,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
List folders AND files in user's Google Drive with hierarchical support.
|
||||
|
||||
This is called at index time from the manage connector page to display
|
||||
the complete file system (folders and files). Only folders are selectable.
|
||||
|
||||
Args:
|
||||
connector_id: ID of the Google Drive connector
|
||||
parent_id: Optional parent folder ID to list contents (None for root)
|
||||
|
||||
Returns:
|
||||
JSON with list of items: {
|
||||
"items": [
|
||||
{"id": str, "name": str, "mimeType": str, "isFolder": bool, ...},
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
# Get connector and verify ownership
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id,
|
||||
SearchSourceConnector.user_id == user.id,
|
||||
SearchSourceConnector.connector_type
|
||||
== SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
|
||||
)
|
||||
)
|
||||
connector = result.scalars().first()
|
||||
|
||||
if not connector:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Google Drive connector not found or access denied",
|
||||
)
|
||||
|
||||
# Initialize Drive client (credentials will be loaded on first API call)
|
||||
drive_client = GoogleDriveClient(session, connector_id)
|
||||
|
||||
# List both folders and files (sorted: folders first)
|
||||
items, error = await list_folder_contents(drive_client, parent_id=parent_id)
|
||||
|
||||
if error:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to list folder contents: {error}"
|
||||
)
|
||||
|
||||
# Count folders and files for better logging
|
||||
folder_count = sum(1 for item in items if item.get("isFolder", False))
|
||||
file_count = len(items) - folder_count
|
||||
|
||||
logger.info(
|
||||
f"✅ Listed {len(items)} total items ({folder_count} folders, {file_count} files) for connector {connector_id}"
|
||||
+ (f" in folder {parent_id}" if parent_id else " in ROOT")
|
||||
)
|
||||
|
||||
# Log first few items for debugging
|
||||
if items:
|
||||
logger.info(f"First 3 items: {[item.get('name') for item in items[:3]]}")
|
||||
|
||||
return {"items": items}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing Drive contents: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to list Drive contents: {e!s}"
|
||||
) from e
|
||||
|
|
@ -45,6 +45,7 @@ from app.tasks.connector_indexers import (
|
|||
index_github_repos,
|
||||
index_google_calendar_events,
|
||||
index_google_gmail_messages,
|
||||
index_google_drive_files,
|
||||
index_jira_issues,
|
||||
index_linear_issues,
|
||||
index_luma_events,
|
||||
|
|
@ -542,6 +543,14 @@ async def index_connector_content(
|
|||
None,
|
||||
description="End date for indexing (YYYY-MM-DD format). If not provided, uses today's date",
|
||||
),
|
||||
folder_ids: str = Query(
|
||||
None,
|
||||
description="[Google Drive only] Comma-separated folder IDs to index",
|
||||
),
|
||||
folder_names: str = Query(
|
||||
None,
|
||||
description="[Google Drive only] Comma-separated folder names for display purposes",
|
||||
),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
|
|
@ -747,6 +756,32 @@ async def index_connector_content(
|
|||
)
|
||||
response_message = "Google Gmail indexing started in the background."
|
||||
|
||||
elif (
|
||||
connector.connector_type == SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
|
||||
):
|
||||
from app.tasks.celery_tasks.connector_tasks import (
|
||||
index_google_drive_files_task,
|
||||
)
|
||||
|
||||
if not folder_ids or not folder_names:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Google Drive indexing requires folder_ids and folder_names parameters",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Triggering Google Drive indexing for connector {connector_id} into search space {search_space_id}, folders: {folder_names}"
|
||||
)
|
||||
# Pass comma-separated strings directly to Celery task
|
||||
index_google_drive_files_task.delay(
|
||||
connector_id,
|
||||
search_space_id,
|
||||
str(user.id),
|
||||
folder_ids, # Pass as comma-separated string
|
||||
folder_names, # Pass as comma-separated string
|
||||
)
|
||||
response_message = "Google Drive indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
|
||||
from app.tasks.celery_tasks.connector_tasks import (
|
||||
index_discord_messages_task,
|
||||
|
|
@ -1515,6 +1550,70 @@ async def run_google_gmail_indexing(
|
|||
# Optionally update status in DB to indicate failure
|
||||
|
||||
|
||||
async def run_google_drive_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
folder_ids: str, # Comma-separated folder IDs
|
||||
folder_names: str, # Comma-separated folder names
|
||||
):
|
||||
"""Runs the Google Drive indexing task for multiple folders and updates the timestamp."""
|
||||
try:
|
||||
from app.tasks.connector_indexers.google_drive_indexer import (
|
||||
index_google_drive_files,
|
||||
)
|
||||
|
||||
# Split comma-separated IDs and names into lists
|
||||
folder_id_list = [fid.strip() for fid in folder_ids.split(",")]
|
||||
folder_name_list = [fname.strip() for fname in folder_names.split(",")]
|
||||
|
||||
total_indexed = 0
|
||||
errors = []
|
||||
|
||||
# Index each folder
|
||||
for folder_id, folder_name in zip(folder_id_list, folder_name_list):
|
||||
try:
|
||||
indexed_count, error_message = await index_google_drive_files(
|
||||
session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
user_id,
|
||||
folder_id,
|
||||
folder_name,
|
||||
use_delta_sync=True,
|
||||
update_last_indexed=False,
|
||||
)
|
||||
if error_message:
|
||||
errors.append(f"{folder_name}: {error_message}")
|
||||
else:
|
||||
total_indexed += indexed_count
|
||||
except Exception as e:
|
||||
errors.append(f"{folder_name}: {str(e)}")
|
||||
logger.error(
|
||||
f"Error indexing folder {folder_name} ({folder_id}): {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
if errors:
|
||||
logger.error(
|
||||
f"Google Drive indexing completed with errors for connector {connector_id}: {'; '.join(errors)}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Google Drive indexing successful for connector {connector_id}. Indexed {total_indexed} documents from {len(folder_id_list)} folder(s)."
|
||||
)
|
||||
# Update the last indexed timestamp only on full success
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
await session.commit() # Commit timestamp update
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Critical error in run_google_drive_indexing for connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
# Optionally update status in DB to indicate failure
|
||||
|
||||
|
||||
# Add new helper functions for luma indexing
|
||||
async def run_luma_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
|
|
|
|||
|
|
@ -1808,6 +1808,106 @@ class ConnectorService:
|
|||
|
||||
return result_object, gmail_docs
|
||||
|
||||
async def search_google_drive(
|
||||
self,
|
||||
user_query: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Google Drive files and return both the source information and langchain documents.
|
||||
|
||||
Uses combined chunk-level and document-level hybrid search with RRF fusion.
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
drive_docs = await self._combined_rrf_search(
|
||||
query_text=user_query,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_DRIVE_FILE",
|
||||
top_k=top_k,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Early return if no results
|
||||
if not drive_docs:
|
||||
return {
|
||||
"id": 33,
|
||||
"name": "Google Drive Files",
|
||||
"type": "GOOGLE_DRIVE_FILE",
|
||||
"sources": [],
|
||||
}, []
|
||||
|
||||
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||
return (
|
||||
doc_info.get("title")
|
||||
or metadata.get("google_drive_file_name")
|
||||
or metadata.get("FILE_NAME")
|
||||
or "Untitled File"
|
||||
)
|
||||
|
||||
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||
file_id = metadata.get("google_drive_file_id", "")
|
||||
return f"https://drive.google.com/file/d/{file_id}/view" if file_id else ""
|
||||
|
||||
def _description_fn(
|
||||
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
|
||||
) -> str:
|
||||
description = self._chunk_preview(chunk.get("content", ""))
|
||||
info_parts = []
|
||||
mime_type = metadata.get("google_drive_mime_type", "")
|
||||
modified_time = metadata.get("modified_time", "")
|
||||
if mime_type:
|
||||
# Simplify mime type for display
|
||||
if "google-apps" in mime_type:
|
||||
file_type = mime_type.split(".")[-1].title()
|
||||
else:
|
||||
file_type = mime_type.split("/")[-1].upper()
|
||||
info_parts.append(f"Type: {file_type}")
|
||||
if modified_time:
|
||||
info_parts.append(f"Modified: {modified_time}")
|
||||
if info_parts:
|
||||
description = (description + " | " + " | ".join(info_parts)).strip(" |")
|
||||
return description
|
||||
|
||||
def _extra_fields_fn(
|
||||
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"google_drive_file_id": metadata.get("google_drive_file_id", ""),
|
||||
"google_drive_mime_type": metadata.get("google_drive_mime_type", ""),
|
||||
"modified_time": metadata.get("modified_time", ""),
|
||||
}
|
||||
|
||||
sources_list = self._build_chunk_sources_from_documents(
|
||||
drive_docs,
|
||||
title_fn=_title_fn,
|
||||
url_fn=_url_fn,
|
||||
description_fn=_description_fn,
|
||||
extra_fields_fn=_extra_fields_fn,
|
||||
)
|
||||
|
||||
# Create result object
|
||||
result_object = {
|
||||
"id": 33, # Assign a unique ID for the Google Drive connector
|
||||
"name": "Google Drive Files",
|
||||
"type": "GOOGLE_DRIVE_FILE",
|
||||
"sources": sources_list,
|
||||
}
|
||||
|
||||
return result_object, drive_docs
|
||||
|
||||
async def search_confluence(
|
||||
self,
|
||||
user_query: str,
|
||||
|
|
|
|||
|
|
@ -473,6 +473,58 @@ async def _index_google_gmail_messages(
|
|||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_google_drive_files", bind=True)
|
||||
def index_google_drive_files_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
folder_ids: str, # Comma-separated folder IDs
|
||||
folder_names: str, # Comma-separated folder names
|
||||
):
|
||||
"""Celery task to index Google Drive files from multiple folders."""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_google_drive_files(
|
||||
connector_id,
|
||||
search_space_id,
|
||||
user_id,
|
||||
folder_ids,
|
||||
folder_names,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_google_drive_files(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
folder_ids: str, # Comma-separated folder IDs
|
||||
folder_names: str, # Comma-separated folder names
|
||||
):
|
||||
"""Index Google Drive files from multiple folders with new session."""
|
||||
from app.routes.search_source_connectors_routes import (
|
||||
run_google_drive_indexing,
|
||||
)
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await run_google_drive_indexing(
|
||||
session,
|
||||
connector_id,
|
||||
search_space_id,
|
||||
user_id,
|
||||
folder_ids,
|
||||
folder_names,
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_discord_messages", bind=True)
|
||||
def index_discord_messages_task(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ from .elasticsearch_indexer import index_elasticsearch_documents
|
|||
from .github_indexer import index_github_repos
|
||||
from .google_calendar_indexer import index_google_calendar_events
|
||||
from .google_gmail_indexer import index_google_gmail_messages
|
||||
from .google_drive_indexer import index_google_drive_files
|
||||
from .jira_indexer import index_jira_issues
|
||||
|
||||
# Issue tracking and project management
|
||||
|
|
@ -57,6 +58,7 @@ __all__ = [ # noqa: RUF022
|
|||
"index_github_repos",
|
||||
# Calendar and scheduling
|
||||
"index_google_calendar_events",
|
||||
"index_google_drive_files",
|
||||
"index_luma_events",
|
||||
"index_jira_issues",
|
||||
# Issue tracking and project management
|
||||
|
|
|
|||
|
|
@ -0,0 +1,410 @@
|
|||
"""Google Drive indexer using Surfsense file processors."""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.connectors.google_drive import (
|
||||
GoogleDriveClient,
|
||||
categorize_change,
|
||||
download_and_process_file,
|
||||
fetch_all_changes,
|
||||
get_files_in_folder,
|
||||
get_start_page_token,
|
||||
)
|
||||
from app.db import DocumentType, SearchSourceConnectorType
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.tasks.connector_indexers.base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
from app.utils.document_converters import generate_unique_identifier_hash
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def index_google_drive_files(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
folder_id: str | None = None,
|
||||
folder_name: str | None = None,
|
||||
use_delta_sync: bool = True,
|
||||
update_last_indexed: bool = True,
|
||||
max_files: int = 500,
|
||||
) -> tuple[int, str | None]:
|
||||
"""
|
||||
Index Google Drive files for a specific connector.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Drive connector
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
folder_id: Specific folder to index (from UI/request, takes precedence)
|
||||
folder_name: Folder name for display (from UI/request)
|
||||
use_delta_sync: Whether to use change tracking for incremental sync
|
||||
update_last_indexed: Whether to update last_indexed_at timestamp
|
||||
max_files: Maximum number of files to index
|
||||
|
||||
Returns:
|
||||
Tuple of (number_of_indexed_files, error_message)
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="google_drive_files_indexing",
|
||||
source="connector_indexing_task",
|
||||
message=f"Starting Google Drive indexing for connector {connector_id}",
|
||||
metadata={
|
||||
"connector_id": connector_id,
|
||||
"user_id": str(user_id),
|
||||
"folder_id": folder_id,
|
||||
"use_delta_sync": use_delta_sync,
|
||||
"max_files": max_files,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR
|
||||
)
|
||||
|
||||
if not connector:
|
||||
error_msg = f"Google Drive connector with ID {connector_id} not found"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "ConnectorNotFound"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Initializing Google Drive client for connector {connector_id}",
|
||||
{"stage": "client_initialization"},
|
||||
)
|
||||
|
||||
drive_client = GoogleDriveClient(session, connector_id)
|
||||
|
||||
if not folder_id:
|
||||
error_msg = "folder_id is required for Google Drive indexing"
|
||||
await task_logger.log_task_failure(
|
||||
log_entry, error_msg, {"error_type": "MissingParameter"}
|
||||
)
|
||||
return 0, error_msg
|
||||
|
||||
target_folder_id = folder_id
|
||||
target_folder_name = folder_name or "Selected Folder"
|
||||
|
||||
logger.info(f"Indexing Google Drive folder: {target_folder_name} ({target_folder_id})")
|
||||
|
||||
folder_tokens = connector.config.get("folder_tokens", {})
|
||||
start_page_token = folder_tokens.get(target_folder_id)
|
||||
can_use_delta_sync = use_delta_sync and start_page_token and connector.last_indexed_at
|
||||
|
||||
if can_use_delta_sync:
|
||||
logger.info(f"Using delta sync for connector {connector_id}")
|
||||
result = await _index_with_delta_sync(
|
||||
drive_client=drive_client,
|
||||
session=session,
|
||||
connector=connector,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
folder_id=target_folder_id,
|
||||
start_page_token=start_page_token,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
max_files=max_files,
|
||||
)
|
||||
else:
|
||||
logger.info(f"Using full scan for connector {connector_id}")
|
||||
result = await _index_full_scan(
|
||||
drive_client=drive_client,
|
||||
session=session,
|
||||
connector=connector,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
folder_id=target_folder_id,
|
||||
folder_name=target_folder_name,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
max_files=max_files,
|
||||
)
|
||||
|
||||
documents_indexed, documents_skipped = result
|
||||
|
||||
if documents_indexed > 0 or can_use_delta_sync:
|
||||
new_token, token_error = await get_start_page_token(drive_client)
|
||||
if new_token and not token_error:
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
if "folder_tokens" not in connector.config:
|
||||
connector.config["folder_tokens"] = {}
|
||||
connector.config["folder_tokens"][target_folder_id] = new_token
|
||||
flag_modified(connector, "config")
|
||||
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
await session.commit()
|
||||
logger.info(
|
||||
f"Successfully committed Google Drive indexing changes to database"
|
||||
)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully completed Google Drive indexing for connector {connector_id}",
|
||||
{
|
||||
"files_processed": documents_indexed,
|
||||
"files_skipped": documents_skipped,
|
||||
"sync_type": "delta" if can_use_delta_sync else "full",
|
||||
"folder": target_folder_name,
|
||||
},
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Google Drive indexing completed: {documents_indexed} files indexed, {documents_skipped} skipped"
|
||||
)
|
||||
return documents_indexed, None
|
||||
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Database error during Google Drive indexing for connector {connector_id}",
|
||||
str(db_error),
|
||||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
logger.error(f"Database error: {db_error!s}", exc_info=True)
|
||||
return 0, f"Database error: {db_error!s}"
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to index Google Drive files for connector {connector_id}",
|
||||
str(e),
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
logger.error(f"Failed to index Google Drive files: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index Google Drive files: {e!s}"
|
||||
|
||||
|
||||
async def _index_full_scan(
|
||||
drive_client: GoogleDriveClient,
|
||||
session: AsyncSession,
|
||||
connector: any,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
folder_id: str | None,
|
||||
folder_name: str,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: any,
|
||||
max_files: int,
|
||||
) -> tuple[int, int]:
|
||||
"""Perform full scan indexing of a folder."""
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Starting full scan of folder: {folder_name}",
|
||||
{"stage": "full_scan", "folder_id": folder_id},
|
||||
)
|
||||
|
||||
documents_indexed = 0
|
||||
documents_skipped = 0
|
||||
page_token = None
|
||||
files_processed = 0
|
||||
|
||||
while files_processed < max_files:
|
||||
files, next_token, error = await get_files_in_folder(
|
||||
drive_client, folder_id, include_subfolders=False, page_token=page_token
|
||||
)
|
||||
|
||||
if error:
|
||||
logger.error(f"Error listing files: {error}")
|
||||
break
|
||||
|
||||
if not files:
|
||||
break
|
||||
|
||||
for file in files:
|
||||
if files_processed >= max_files:
|
||||
break
|
||||
|
||||
files_processed += 1
|
||||
|
||||
indexed, skipped = await _process_single_file(
|
||||
drive_client=drive_client,
|
||||
session=session,
|
||||
file=file,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
)
|
||||
|
||||
documents_indexed += indexed
|
||||
documents_skipped += skipped
|
||||
|
||||
if documents_indexed % 10 == 0 and documents_indexed > 0:
|
||||
await session.commit()
|
||||
logger.info(f"Committed batch: {documents_indexed} files indexed so far")
|
||||
|
||||
page_token = next_token
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
logger.info(
|
||||
f"Full scan complete: {documents_indexed} indexed, {documents_skipped} skipped"
|
||||
)
|
||||
return documents_indexed, documents_skipped
|
||||
|
||||
|
||||
async def _index_with_delta_sync(
|
||||
drive_client: GoogleDriveClient,
|
||||
session: AsyncSession,
|
||||
connector: any,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
folder_id: str | None,
|
||||
start_page_token: str,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: any,
|
||||
max_files: int,
|
||||
) -> tuple[int, int]:
|
||||
"""Perform delta sync indexing using change tracking."""
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Starting delta sync from token: {start_page_token[:20]}...",
|
||||
{"stage": "delta_sync", "start_token": start_page_token},
|
||||
)
|
||||
|
||||
changes, final_token, error = await fetch_all_changes(
|
||||
drive_client, start_page_token, folder_id
|
||||
)
|
||||
|
||||
if error:
|
||||
logger.error(f"Error fetching changes: {error}")
|
||||
return 0, 0
|
||||
|
||||
if not changes:
|
||||
logger.info("No changes detected since last sync")
|
||||
return 0, 0
|
||||
|
||||
logger.info(f"Processing {len(changes)} changes")
|
||||
|
||||
documents_indexed = 0
|
||||
documents_skipped = 0
|
||||
files_processed = 0
|
||||
|
||||
for change in changes:
|
||||
if files_processed >= max_files:
|
||||
break
|
||||
|
||||
files_processed += 1
|
||||
change_type = categorize_change(change)
|
||||
|
||||
if change_type in ["removed", "trashed"]:
|
||||
file_id = change.get("fileId")
|
||||
if file_id:
|
||||
await _remove_document(session, file_id, search_space_id)
|
||||
continue
|
||||
|
||||
file = change.get("file")
|
||||
if not file:
|
||||
continue
|
||||
|
||||
indexed, skipped = await _process_single_file(
|
||||
drive_client=drive_client,
|
||||
session=session,
|
||||
file=file,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
)
|
||||
|
||||
documents_indexed += indexed
|
||||
documents_skipped += skipped
|
||||
|
||||
if documents_indexed % 10 == 0 and documents_indexed > 0:
|
||||
await session.commit()
|
||||
logger.info(f"Committed batch: {documents_indexed} changes processed")
|
||||
|
||||
logger.info(
|
||||
f"Delta sync complete: {documents_indexed} indexed, {documents_skipped} skipped"
|
||||
)
|
||||
return documents_indexed, documents_skipped
|
||||
|
||||
|
||||
async def _process_single_file(
|
||||
drive_client: GoogleDriveClient,
|
||||
session: AsyncSession,
|
||||
file: dict,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: any,
|
||||
) -> tuple[int, int]:
|
||||
"""
|
||||
Process a single file by downloading and using Surfsense's file processor.
|
||||
|
||||
Returns:
|
||||
Tuple of (indexed_count, skipped_count)
|
||||
"""
|
||||
file_name = file.get("name", "Unknown")
|
||||
mime_type = file.get("mimeType", "")
|
||||
|
||||
try:
|
||||
logger.info(f"Processing file: {file_name} ({mime_type})")
|
||||
|
||||
_, error, _ = await download_and_process_file(
|
||||
client=drive_client,
|
||||
file=file,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
session=session,
|
||||
task_logger=task_logger,
|
||||
log_entry=log_entry,
|
||||
)
|
||||
|
||||
if error:
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Skipped {file_name}: {error}",
|
||||
{"status": "skipped", "reason": error},
|
||||
)
|
||||
return 0, 1
|
||||
|
||||
logger.info(f"Successfully indexed Google Drive file: {file_name}")
|
||||
return 1, 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file {file_name}: {e!s}", exc_info=True)
|
||||
return 0, 1
|
||||
|
||||
|
||||
async def _remove_document(
|
||||
session: AsyncSession, file_id: str, search_space_id: int
|
||||
):
|
||||
"""Remove a document that was deleted in Drive."""
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id
|
||||
)
|
||||
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
if existing_document:
|
||||
await session.delete(existing_document)
|
||||
logger.info(f"Removed deleted file document: {file_id}")
|
||||
|
||||
|
||||
|
|
@ -447,6 +447,24 @@ async def add_received_file_document_using_docling(
|
|||
) from e
|
||||
|
||||
|
||||
async def _update_document_from_connector(
|
||||
document: Document | None, connector: dict | None, session: AsyncSession
|
||||
) -> None:
|
||||
"""Helper to update document type and metadata from connector info."""
|
||||
if document and connector:
|
||||
if "type" in connector:
|
||||
document.document_type = connector["type"]
|
||||
if "metadata" in connector:
|
||||
# Merge with existing document_metadata (the actual column name)
|
||||
if not document.document_metadata:
|
||||
document.document_metadata = connector["metadata"]
|
||||
else:
|
||||
# Expand existing metadata with connector metadata
|
||||
merged = {**document.document_metadata, **connector["metadata"]}
|
||||
document.document_metadata = merged
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def process_file_in_background(
|
||||
file_path: str,
|
||||
filename: str,
|
||||
|
|
@ -455,6 +473,7 @@ async def process_file_in_background(
|
|||
session: AsyncSession,
|
||||
task_logger: TaskLoggingService,
|
||||
log_entry: Log,
|
||||
connector: dict | None = None, # Optional: {"type": "GOOGLE_DRIVE_FILE", "metadata": {...}}
|
||||
):
|
||||
try:
|
||||
# Check if the file is a markdown or text file
|
||||
|
|
@ -492,6 +511,9 @@ async def process_file_in_background(
|
|||
session, filename, markdown_content, search_space_id, user_id
|
||||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(result, connector, session)
|
||||
|
||||
if result:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -608,6 +630,9 @@ async def process_file_in_background(
|
|||
session, filename, transcribed_text, search_space_id, user_id
|
||||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(result, connector, session)
|
||||
|
||||
if result:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
|
|
@ -753,6 +778,9 @@ async def process_file_in_background(
|
|||
session, filename, docs, search_space_id, user_id
|
||||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(result, connector, session)
|
||||
|
||||
if result:
|
||||
# Update page usage after successful processing
|
||||
# allow_exceed=True because document was already created after passing initial check
|
||||
|
|
@ -897,6 +925,9 @@ async def process_file_in_background(
|
|||
user_id, final_page_count, allow_exceed=True
|
||||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(last_created_doc, connector, session)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully processed file with LlamaCloud: {filename}",
|
||||
|
|
@ -1021,6 +1052,9 @@ async def process_file_in_background(
|
|||
user_id, final_page_count, allow_exceed=True
|
||||
)
|
||||
|
||||
if connector:
|
||||
await _update_document_from_connector(doc_result, connector, session)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully processed file with Docling: {filename}",
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ import {
|
|||
Calendar as CalendarIcon,
|
||||
Clock,
|
||||
Edit,
|
||||
Folder,
|
||||
HardDrive,
|
||||
Info,
|
||||
Loader2,
|
||||
Plus,
|
||||
RefreshCw,
|
||||
|
|
@ -67,6 +70,7 @@ import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/comp
|
|||
import { EnumConnectorName } from "@/contracts/enums/connector";
|
||||
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { GoogleDriveFolderTree } from "@/components/connectors/google-drive-folder-tree";
|
||||
|
||||
export default function ConnectorsPage() {
|
||||
const t = useTranslations("connectors");
|
||||
|
|
@ -115,6 +119,10 @@ export default function ConnectorsPage() {
|
|||
const [customFrequency, setCustomFrequency] = useState<string>("");
|
||||
const [isSavingPeriodic, setIsSavingPeriodic] = useState(false);
|
||||
|
||||
// Google Drive folder selection state
|
||||
const [driveFolderDialogOpen, setDriveFolderDialogOpen] = useState(false);
|
||||
const [selectedFolders, setSelectedFolders] = useState<Array<{ id: string; name: string }>>([]);
|
||||
|
||||
useEffect(() => {
|
||||
if (error) {
|
||||
toast.error(t("failed_load"));
|
||||
|
|
@ -137,8 +145,55 @@ export default function ConnectorsPage() {
|
|||
|
||||
// Handle opening date picker for indexing
|
||||
const handleOpenDatePicker = (connectorId: number) => {
|
||||
// Check if this is a Google Drive connector
|
||||
const connector = connectors.find((c) => c.id === connectorId);
|
||||
if (connector?.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR) {
|
||||
// Open folder selection dialog for Google Drive
|
||||
handleOpenDriveFolderDialog(connectorId);
|
||||
} else {
|
||||
// Open date picker for other connectors
|
||||
setSelectedConnectorForIndexing(connectorId);
|
||||
setDatePickerOpen(true);
|
||||
}
|
||||
};
|
||||
|
||||
const handleOpenDriveFolderDialog = (connectorId: number) => {
|
||||
setSelectedConnectorForIndexing(connectorId);
|
||||
setDatePickerOpen(true);
|
||||
setDriveFolderDialogOpen(true);
|
||||
};
|
||||
|
||||
// Handle Google Drive folder indexing
|
||||
const handleIndexDriveFolder = async () => {
|
||||
if (selectedConnectorForIndexing === null || selectedFolders.length === 0) {
|
||||
toast.error("Please select at least one folder");
|
||||
return;
|
||||
}
|
||||
|
||||
setDriveFolderDialogOpen(false);
|
||||
|
||||
try {
|
||||
setIndexingConnectorId(selectedConnectorForIndexing);
|
||||
|
||||
const folderIds = selectedFolders.map((f) => f.id).join(",");
|
||||
const folderNames = selectedFolders.map((f) => f.name).join(", ");
|
||||
|
||||
await indexConnector({
|
||||
connector_id: selectedConnectorForIndexing,
|
||||
queryParams: {
|
||||
search_space_id: searchSpaceId,
|
||||
folder_ids: folderIds,
|
||||
folder_names: folderNames,
|
||||
},
|
||||
});
|
||||
toast.success(t("indexing_started"));
|
||||
} catch (error) {
|
||||
console.error("Error indexing connector content:", error);
|
||||
toast.error(error instanceof Error ? error.message : t("indexing_failed"));
|
||||
} finally {
|
||||
setIndexingConnectorId(null);
|
||||
setSelectedConnectorForIndexing(null);
|
||||
setSelectedFolders([]);
|
||||
}
|
||||
};
|
||||
|
||||
// Handle connector indexing with dates
|
||||
|
|
@ -387,39 +442,52 @@ export default function ConnectorsPage() {
|
|||
>
|
||||
{indexingConnectorId === connector.id ? (
|
||||
<RefreshCw className="h-4 w-4 animate-spin" />
|
||||
) : connector.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR ? (
|
||||
<Folder className="h-4 w-4" />
|
||||
) : (
|
||||
<CalendarIcon className="h-4 w-4" />
|
||||
)}
|
||||
<span className="sr-only">{t("index_date_range")}</span>
|
||||
<span className="sr-only">
|
||||
{connector.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR
|
||||
? "Select folder to index"
|
||||
: t("index_date_range")}
|
||||
</span>
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>{t("index_date_range")}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => handleQuickIndexConnector(connector.id)}
|
||||
disabled={indexingConnectorId === connector.id}
|
||||
>
|
||||
{indexingConnectorId === connector.id ? (
|
||||
<RefreshCw className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<RefreshCw className="h-4 w-4" />
|
||||
)}
|
||||
<span className="sr-only">{t("quick_index")}</span>
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>{t("quick_index_auto")}</p>
|
||||
<p>
|
||||
{connector.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR
|
||||
? "Select folder to index"
|
||||
: t("index_date_range")}
|
||||
</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
{/* Hide quick index button for Google Drive (requires folder selection) */}
|
||||
{connector.connector_type !== EnumConnectorName.GOOGLE_DRIVE_CONNECTOR && (
|
||||
<TooltipProvider>
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => handleQuickIndexConnector(connector.id)}
|
||||
disabled={indexingConnectorId === connector.id}
|
||||
>
|
||||
{indexingConnectorId === connector.id ? (
|
||||
<RefreshCw className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<RefreshCw className="h-4 w-4" />
|
||||
)}
|
||||
<span className="sr-only">{t("quick_index")}</span>
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p>{t("quick_index_auto")}</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
{connector.is_indexable && (
|
||||
|
|
@ -607,6 +675,67 @@ export default function ConnectorsPage() {
|
|||
</DialogContent>
|
||||
</Dialog>
|
||||
|
||||
{/* Google Drive Folder Selection Dialog */}
|
||||
<Dialog open={driveFolderDialogOpen} onOpenChange={setDriveFolderDialogOpen}>
|
||||
<DialogContent className="w-auto max-w-full">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Select Google Drive Folders</DialogTitle>
|
||||
<DialogDescription className="flex items-start gap-2 text-sm p-2 border mt-1 rounded ">
|
||||
<Info className="h-4 w-4 shrink-0 text-blue-500" />
|
||||
<span>
|
||||
Select folders to index. Only files <strong>directly in each folder</strong> will be
|
||||
processed—subfolders must be selected separately.
|
||||
</span>
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
<div className="grid gap-4 overflow-hidden w-full">
|
||||
<div className="space-y-3 w-full overflow-hidden">
|
||||
<Label>Browse Folders</Label>
|
||||
{selectedConnectorForIndexing && (
|
||||
<GoogleDriveFolderTree
|
||||
connectorId={selectedConnectorForIndexing}
|
||||
selectedFolders={selectedFolders}
|
||||
onSelectFolders={(folders) => {
|
||||
setSelectedFolders(folders);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
{selectedFolders.length > 0 && (
|
||||
<div className="p-3 bg-muted rounded-lg text-sm space-y-2">
|
||||
<div>
|
||||
<p className="font-medium mb-1">
|
||||
Selected {selectedFolders.length} folder{selectedFolders.length > 1 ? "s" : ""}:
|
||||
</p>
|
||||
<div className="max-h-24 overflow-y-auto">
|
||||
{selectedFolders.map((folder) => (
|
||||
<p key={folder.id} className="text-sm text-muted-foreground truncate" title={folder.name}>
|
||||
• {folder.name}
|
||||
</p>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<DialogFooter>
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={() => {
|
||||
setDriveFolderDialogOpen(false);
|
||||
setSelectedConnectorForIndexing(null);
|
||||
setSelectedFolders([]);
|
||||
}}
|
||||
>
|
||||
{tCommon("cancel")}
|
||||
</Button>
|
||||
<Button onClick={handleIndexDriveFolder} disabled={selectedFolders.length === 0}>
|
||||
{t("start_indexing")}
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
|
||||
{/* Periodic Indexing Configuration Dialog */}
|
||||
<Dialog open={periodicDialogOpen} onOpenChange={setPeriodicDialogOpen}>
|
||||
<DialogContent className="sm:max-w-[500px]">
|
||||
|
|
|
|||
|
|
@ -0,0 +1,205 @@
|
|||
"use client";
|
||||
|
||||
import { useAtomValue } from "jotai";
|
||||
import { ArrowLeft, Check, ExternalLink, Loader2 } from "lucide-react";
|
||||
import { motion } from "motion/react";
|
||||
import Link from "next/link";
|
||||
import { useParams, useRouter } from "next/navigation";
|
||||
import { useEffect, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { connectorsAtom } from "@/atoms/connectors/connector-query.atoms";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardDescription,
|
||||
CardFooter,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { EnumConnectorName } from "@/contracts/enums/connector";
|
||||
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
|
||||
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
|
||||
import { authenticatedFetch } from "@/lib/auth-utils";
|
||||
|
||||
export default function GoogleDriveConnectorPage() {
|
||||
const router = useRouter();
|
||||
const params = useParams();
|
||||
const searchSpaceId = params.search_space_id as string;
|
||||
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [doesConnectorExist, setDoesConnectorExist] = useState(false);
|
||||
|
||||
const { refetch: fetchConnectors } = useAtomValue(connectorsAtom);
|
||||
|
||||
useEffect(() => {
|
||||
fetchConnectors().then((data) => {
|
||||
const connectors = data.data || [];
|
||||
const connector = connectors.find(
|
||||
(c: SearchSourceConnector) => c.connector_type === EnumConnectorName.GOOGLE_DRIVE_CONNECTOR
|
||||
);
|
||||
if (connector) {
|
||||
setDoesConnectorExist(true);
|
||||
}
|
||||
});
|
||||
}, []);
|
||||
|
||||
const handleConnectGoogle = async () => {
|
||||
try {
|
||||
setIsConnecting(true);
|
||||
const response = await authenticatedFetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/auth/google/drive/connector/add/?space_id=${searchSpaceId}`,
|
||||
{ method: "GET" }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to initiate Google OAuth");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
window.location.href = data.auth_url;
|
||||
} catch (error) {
|
||||
console.error("Error connecting to Google:", error);
|
||||
toast.error("Failed to connect to Google Drive");
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="container mx-auto py-8 max-w-2xl">
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.5 }}
|
||||
>
|
||||
{/* Header */}
|
||||
<div className="mb-8">
|
||||
<Link
|
||||
href={`/dashboard/${searchSpaceId}/connectors/add`}
|
||||
className="inline-flex items-center text-sm text-muted-foreground hover:text-foreground mb-4"
|
||||
>
|
||||
<ArrowLeft className="mr-2 h-4 w-4" />
|
||||
Back to connectors
|
||||
</Link>
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="flex h-12 w-12 items-center justify-center rounded-lg">
|
||||
{getConnectorIcon(EnumConnectorName.GOOGLE_DRIVE_CONNECTOR, "h-6 w-6")}
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-3xl font-bold tracking-tight">Connect Google Drive</h1>
|
||||
<p className="text-muted-foreground">
|
||||
Securely connect your Google Drive account
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Connection Card */}
|
||||
{!doesConnectorExist ? (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>Connect Your Google Account</CardTitle>
|
||||
<CardDescription>
|
||||
Authorize read-only access to your Google Drive. You'll select which folder to
|
||||
index when you start indexing.
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="flex items-center gap-3 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Read-only access to your Drive files</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Index documents, spreadsheets, presentations, PDFs & more</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Automatic updates with change tracking</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-3 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Secure OAuth 2.0 authentication</span>
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter className="flex justify-between">
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button onClick={handleConnectGoogle} disabled={isConnecting}>
|
||||
{isConnecting ? (
|
||||
<>
|
||||
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||
Connecting...
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<ExternalLink className="mr-2 h-4 w-4" />
|
||||
Connect Google Drive
|
||||
</>
|
||||
)}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</Card>
|
||||
) : (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>✅ Already Connected</CardTitle>
|
||||
<CardDescription>
|
||||
Your Google Drive connector is already set up. Go to the connectors page to
|
||||
start indexing.
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardFooter>
|
||||
<Button onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors`)}>
|
||||
Go to Connectors
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Information Card */}
|
||||
<Card className="mt-6">
|
||||
<CardHeader>
|
||||
<CardTitle>How Google Drive Integration Works</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<h4 className="font-medium">1️⃣ Connect Your Account</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
First, securely connect your Google Drive account using OAuth 2.0. We only
|
||||
request read-only access.
|
||||
</p>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<h4 className="font-medium">2️⃣ Select Folder to Index</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
When you're ready to index, go to the connectors page and click "Index". You'll
|
||||
choose which folder to process.
|
||||
</p>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<h4 className="font-medium">3️⃣ Automatic Change Detection</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
We use Google Drive's change tracking API to detect when files are modified,
|
||||
added, or deleted. Only changed files are re-indexed.
|
||||
</p>
|
||||
</div>
|
||||
<div className="space-y-2">
|
||||
<h4 className="font-medium">📄 Comprehensive File Support</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
Supports Google Workspace files (Docs, Sheets, Slides), Microsoft Office
|
||||
documents, PDFs, text files, images (with OCR), and more.
|
||||
</p>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</motion.div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
320
surfsense_web/components/connectors/google-drive-folder-tree.tsx
Normal file
320
surfsense_web/components/connectors/google-drive-folder-tree.tsx
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
"use client";
|
||||
|
||||
import {
|
||||
ChevronDown,
|
||||
ChevronRight,
|
||||
File,
|
||||
FileText,
|
||||
Folder,
|
||||
FolderOpen,
|
||||
HardDrive,
|
||||
Image,
|
||||
Loader2,
|
||||
Sheet,
|
||||
Presentation,
|
||||
} from "lucide-react";
|
||||
import { useState } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Checkbox } from "@/components/ui/checkbox";
|
||||
import { ScrollArea } from "@/components/ui/scroll-area";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { useGoogleDriveFolders } from "@/hooks/use-google-drive-folders";
|
||||
import { connectorsApiService } from "@/lib/apis/connectors-api.service";
|
||||
|
||||
interface DriveItem {
|
||||
id: string;
|
||||
name: string;
|
||||
mimeType: string;
|
||||
isFolder: boolean;
|
||||
parents?: string[];
|
||||
size?: number;
|
||||
iconLink?: string;
|
||||
}
|
||||
|
||||
interface ItemTreeNode {
|
||||
item: DriveItem;
|
||||
children: DriveItem[] | null; // null = not loaded, [] = loaded but empty
|
||||
isExpanded: boolean;
|
||||
isLoading: boolean;
|
||||
}
|
||||
|
||||
interface SelectedFolder {
|
||||
id: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
interface GoogleDriveFolderTreeProps {
|
||||
connectorId: number;
|
||||
selectedFolders: SelectedFolder[];
|
||||
onSelectFolders: (folders: SelectedFolder[]) => void;
|
||||
}
|
||||
|
||||
// Helper to get appropriate icon for file type
|
||||
function getFileIcon(mimeType: string, className: string = "h-4 w-4") {
|
||||
if (mimeType.includes("spreadsheet") || mimeType.includes("excel")) {
|
||||
return <Sheet className={`${className} text-green-600`} />;
|
||||
}
|
||||
if (mimeType.includes("presentation") || mimeType.includes("powerpoint")) {
|
||||
return <Presentation className={`${className} text-orange-600`} />;
|
||||
}
|
||||
if (mimeType.includes("document") || mimeType.includes("word") || mimeType.includes("text")) {
|
||||
return <FileText className={`${className} text-blue-600`} />;
|
||||
}
|
||||
if (mimeType.includes("image")) {
|
||||
return <Image className={`${className} text-purple-600`} />;
|
||||
}
|
||||
return <File className={`${className} text-gray-500`} />;
|
||||
}
|
||||
|
||||
export function GoogleDriveFolderTree({
|
||||
connectorId,
|
||||
selectedFolders,
|
||||
onSelectFolders,
|
||||
}: GoogleDriveFolderTreeProps) {
|
||||
const [itemStates, setItemStates] = useState<Map<string, ItemTreeNode>>(new Map());
|
||||
|
||||
const { data: rootData, isLoading: isLoadingRoot } = useGoogleDriveFolders({
|
||||
connectorId,
|
||||
});
|
||||
|
||||
const rootItems = rootData?.items || [];
|
||||
|
||||
const isFolderSelected = (folderId: string): boolean => {
|
||||
return selectedFolders.some((f) => f.id === folderId);
|
||||
};
|
||||
|
||||
const toggleFolderSelection = (folderId: string, folderName: string) => {
|
||||
if (isFolderSelected(folderId)) {
|
||||
onSelectFolders(selectedFolders.filter((f) => f.id !== folderId));
|
||||
} else {
|
||||
onSelectFolders([...selectedFolders, { id: folderId, name: folderName }]);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Find an item by ID across all loaded items (root and nested).
|
||||
*/
|
||||
const findItem = (itemId: string): DriveItem | undefined => {
|
||||
const state = itemStates.get(itemId);
|
||||
if (state?.item) return state.item;
|
||||
|
||||
const rootItem = rootItems.find((item) => item.id === itemId);
|
||||
if (rootItem) return rootItem;
|
||||
|
||||
for (const [, nodeState] of itemStates) {
|
||||
if (nodeState.children) {
|
||||
const found = nodeState.children.find((child) => child.id === itemId);
|
||||
if (found) return found;
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* Load and display contents of a specific folder.
|
||||
*/
|
||||
const loadFolderContents = async (folderId: string) => {
|
||||
try {
|
||||
setItemStates((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
const existing = newMap.get(folderId);
|
||||
if (existing) {
|
||||
newMap.set(folderId, { ...existing, isLoading: true });
|
||||
} else {
|
||||
const item = findItem(folderId);
|
||||
if (item) {
|
||||
newMap.set(folderId, {
|
||||
item,
|
||||
children: null,
|
||||
isExpanded: false,
|
||||
isLoading: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
return newMap;
|
||||
});
|
||||
|
||||
const data = await connectorsApiService.listGoogleDriveFolders({
|
||||
connector_id: connectorId,
|
||||
parent_id: folderId,
|
||||
});
|
||||
const items = data.items || [];
|
||||
|
||||
setItemStates((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
const existing = newMap.get(folderId);
|
||||
const item = existing?.item || findItem(folderId);
|
||||
|
||||
if (item) {
|
||||
newMap.set(folderId, {
|
||||
item,
|
||||
children: items,
|
||||
isExpanded: true,
|
||||
isLoading: false,
|
||||
});
|
||||
} else {
|
||||
console.error(`Could not find item for folderId: ${folderId}`);
|
||||
}
|
||||
return newMap;
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error loading folder contents:", error);
|
||||
setItemStates((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
const existing = newMap.get(folderId);
|
||||
if (existing) {
|
||||
newMap.set(folderId, { ...existing, isLoading: false });
|
||||
}
|
||||
return newMap;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Toggle folder expand/collapse state.
|
||||
*/
|
||||
const toggleFolder = async (item: DriveItem) => {
|
||||
if (!item.isFolder) return;
|
||||
|
||||
const state = itemStates.get(item.id);
|
||||
|
||||
if (!state || state.children === null) {
|
||||
await loadFolderContents(item.id);
|
||||
} else {
|
||||
setItemStates((prev) => {
|
||||
const newMap = new Map(prev);
|
||||
newMap.set(item.id, {
|
||||
...state,
|
||||
isExpanded: !state.isExpanded,
|
||||
});
|
||||
return newMap;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Render a single item (folder or file) with its children.
|
||||
*/
|
||||
const renderItem = (item: DriveItem, level: number = 0) => {
|
||||
const state = itemStates.get(item.id);
|
||||
const isExpanded = state?.isExpanded || false;
|
||||
const isLoading = state?.isLoading || false;
|
||||
const children = state?.children;
|
||||
const isSelected = isFolderSelected(item.id);
|
||||
const isFolder = item.isFolder;
|
||||
|
||||
const childFolders = children?.filter((c) => c.isFolder) || [];
|
||||
const childFiles = children?.filter((c) => !c.isFolder) || [];
|
||||
|
||||
return (
|
||||
<div key={item.id} className="w-full" style={{ marginLeft: `${level * 1.25}rem` }}>
|
||||
<div
|
||||
className={cn(
|
||||
"flex items-center gap-2 h-auto py-2 px-2 rounded-md",
|
||||
isFolder && "hover:bg-accent cursor-pointer",
|
||||
!isFolder && "cursor-default opacity-60",
|
||||
isSelected && isFolder && "bg-accent/50"
|
||||
)}
|
||||
>
|
||||
{isFolder ? (
|
||||
<span
|
||||
className="flex items-center justify-center w-4 h-4 shrink-0"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
toggleFolder(item);
|
||||
}}
|
||||
>
|
||||
{isLoading ? (
|
||||
<Loader2 className="h-3 w-3 animate-spin" />
|
||||
) : isExpanded ? (
|
||||
<ChevronDown className="h-4 w-4" />
|
||||
) : (
|
||||
<ChevronRight className="h-4 w-4" />
|
||||
)}
|
||||
</span>
|
||||
) : (
|
||||
<span className="w-4 h-4 shrink-0" />
|
||||
)}
|
||||
|
||||
{isFolder && (
|
||||
<Checkbox
|
||||
checked={isSelected}
|
||||
onCheckedChange={() => toggleFolderSelection(item.id, item.name)}
|
||||
className="shrink-0"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
/>
|
||||
)}
|
||||
|
||||
<div className="shrink-0" style={{ marginLeft: isFolder ? "0" : "1.25rem" }}>
|
||||
{isFolder ? (
|
||||
isExpanded ? (
|
||||
<FolderOpen className="h-4 w-4 text-blue-500" />
|
||||
) : (
|
||||
<Folder className="h-4 w-4 text-gray-500" />
|
||||
)
|
||||
) : (
|
||||
getFileIcon(item.mimeType, "h-4 w-4")
|
||||
)}
|
||||
</div>
|
||||
|
||||
<span
|
||||
className="truncate flex-1 text-left text-sm min-w-0"
|
||||
onClick={() => isFolder && toggleFolder(item)}
|
||||
>
|
||||
{item.name}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{isExpanded && isFolder && children && (
|
||||
<div className="w-full">
|
||||
{childFolders.map((child) => renderItem(child, level + 1))}
|
||||
{childFiles.map((child) => renderItem(child, level + 1))}
|
||||
|
||||
{children.length === 0 && (
|
||||
<div className="text-xs text-muted-foreground py-2 pl-2">Empty folder</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="border rounded-md w-full overflow-hidden">
|
||||
<ScrollArea className="h-[450px] w-full">
|
||||
<div className="p-2 pr-4 w-full overflow-x-hidden">
|
||||
<div className="mb-2 pb-2 border-b">
|
||||
<div className="flex items-center gap-2 h-auto py-2 px-2 rounded-md hover:bg-accent cursor-pointer">
|
||||
<Checkbox
|
||||
checked={isFolderSelected("root")}
|
||||
onCheckedChange={() => toggleFolderSelection("root", "My Drive")}
|
||||
className="shrink-0"
|
||||
/>
|
||||
<HardDrive className="h-4 w-4 text-primary shrink-0" />
|
||||
<span className="font-semibold truncate" onClick={() => toggleFolderSelection("root", "My Drive")}>
|
||||
My Drive
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{isLoadingRoot && (
|
||||
<div className="flex items-center justify-center py-8">
|
||||
<Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="w-full overflow-x-hidden">
|
||||
{!isLoadingRoot && rootItems.map((item) => renderItem(item, 0))}
|
||||
</div>
|
||||
|
||||
{!isLoadingRoot && rootItems.length === 0 && (
|
||||
<div className="text-center text-sm text-muted-foreground py-8">
|
||||
No files or folders found in your Google Drive
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</ScrollArea>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -183,6 +183,13 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.GOOGLE_GMAIL_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "google-drive-connector",
|
||||
title: "Google Drive",
|
||||
description: "google_drive_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.GOOGLE_DRIVE_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "luma-connector",
|
||||
title: "Luma",
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ export enum EnumConnectorName {
|
|||
CLICKUP_CONNECTOR = "CLICKUP_CONNECTOR",
|
||||
GOOGLE_CALENDAR_CONNECTOR = "GOOGLE_CALENDAR_CONNECTOR",
|
||||
GOOGLE_GMAIL_CONNECTOR = "GOOGLE_GMAIL_CONNECTOR",
|
||||
GOOGLE_DRIVE_CONNECTOR = "GOOGLE_DRIVE_CONNECTOR",
|
||||
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR",
|
||||
LUMA_CONNECTOR = "LUMA_CONNECTOR",
|
||||
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR",
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import {
|
|||
Sparkles,
|
||||
Telescope,
|
||||
Webhook,
|
||||
HardDrive,
|
||||
} from "lucide-react";
|
||||
import { EnumConnectorName } from "./connector";
|
||||
|
||||
|
|
@ -57,6 +58,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
|
|||
return <IconCalendar {...iconProps} />;
|
||||
case EnumConnectorName.GOOGLE_GMAIL_CONNECTOR:
|
||||
return <IconMail {...iconProps} />;
|
||||
case EnumConnectorName.GOOGLE_DRIVE_CONNECTOR:
|
||||
return <HardDrive {...iconProps} />;
|
||||
case EnumConnectorName.AIRTABLE_CONNECTOR:
|
||||
return <IconTable {...iconProps} />;
|
||||
case EnumConnectorName.CONFLUENCE_CONNECTOR:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ export const searchSourceConnectorTypeEnum = z.enum([
|
|||
"CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_CONNECTOR",
|
||||
"AIRTABLE_CONNECTOR",
|
||||
"LUMA_CONNECTOR",
|
||||
"ELASTICSEARCH_CONNECTOR",
|
||||
|
|
@ -39,6 +40,19 @@ export const searchSourceConnector = z.object({
|
|||
created_at: z.string(),
|
||||
});
|
||||
|
||||
export const googleDriveItem = z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
mimeType: z.string(),
|
||||
isFolder: z.boolean(),
|
||||
parents: z.array(z.string()).optional(),
|
||||
size: z.number().optional(),
|
||||
iconLink: z.string().optional(),
|
||||
webViewLink: z.string().optional(),
|
||||
createdTime: z.string().optional(),
|
||||
modifiedTime: z.string().optional(),
|
||||
});
|
||||
|
||||
/**
|
||||
* Get connectors
|
||||
*/
|
||||
|
|
@ -120,6 +134,9 @@ export const indexConnectorRequest = z.object({
|
|||
search_space_id: z.number().or(z.string()),
|
||||
start_date: z.string().optional(),
|
||||
end_date: z.string().optional(),
|
||||
// Google Drive only
|
||||
folder_ids: z.string().optional(),
|
||||
folder_names: z.string().optional(),
|
||||
}),
|
||||
});
|
||||
|
||||
|
|
@ -140,6 +157,18 @@ export const listGitHubRepositoriesRequest = z.object({
|
|||
|
||||
export const listGitHubRepositoriesResponse = z.array(z.record(z.string(), z.any()));
|
||||
|
||||
/**
|
||||
* List Google Drive folders
|
||||
*/
|
||||
export const listGoogleDriveFoldersRequest = z.object({
|
||||
connector_id: z.number(),
|
||||
parent_id: z.string().optional(),
|
||||
});
|
||||
|
||||
export const listGoogleDriveFoldersResponse = z.object({
|
||||
items: z.array(googleDriveItem),
|
||||
});
|
||||
|
||||
// Inferred types
|
||||
export type SearchSourceConnectorType = z.infer<typeof searchSourceConnectorTypeEnum>;
|
||||
export type SearchSourceConnector = z.infer<typeof searchSourceConnector>;
|
||||
|
|
@ -157,3 +186,6 @@ export type IndexConnectorRequest = z.infer<typeof indexConnectorRequest>;
|
|||
export type IndexConnectorResponse = z.infer<typeof indexConnectorResponse>;
|
||||
export type ListGitHubRepositoriesRequest = z.infer<typeof listGitHubRepositoriesRequest>;
|
||||
export type ListGitHubRepositoriesResponse = z.infer<typeof listGitHubRepositoriesResponse>;
|
||||
export type ListGoogleDriveFoldersRequest = z.infer<typeof listGoogleDriveFoldersRequest>;
|
||||
export type ListGoogleDriveFoldersResponse = z.infer<typeof listGoogleDriveFoldersResponse>;
|
||||
export type GoogleDriveItem = z.infer<typeof googleDriveItem>;
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ export const documentTypeEnum = z.enum([
|
|||
"CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_FILE",
|
||||
"AIRTABLE_CONNECTOR",
|
||||
"LUMA_CONNECTOR",
|
||||
"ELASTICSEARCH_CONNECTOR",
|
||||
|
|
|
|||
29
surfsense_web/hooks/use-google-drive-folders.ts
Normal file
29
surfsense_web/hooks/use-google-drive-folders.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import { useQuery } from "@tanstack/react-query";
|
||||
import { connectorsApiService } from "@/lib/apis/connectors-api.service";
|
||||
import { cacheKeys } from "@/lib/query-client/cache-keys";
|
||||
|
||||
interface UseGoogleDriveFoldersOptions {
|
||||
connectorId: number;
|
||||
parentId?: string;
|
||||
enabled?: boolean;
|
||||
}
|
||||
|
||||
export function useGoogleDriveFolders({
|
||||
connectorId,
|
||||
parentId,
|
||||
enabled = true,
|
||||
}: UseGoogleDriveFoldersOptions) {
|
||||
return useQuery({
|
||||
queryKey: cacheKeys.connectors.googleDrive.folders(connectorId, parentId),
|
||||
queryFn: async () => {
|
||||
return connectorsApiService.listGoogleDriveFolders({
|
||||
connector_id: connectorId,
|
||||
parent_id: parentId,
|
||||
});
|
||||
},
|
||||
enabled: enabled && !!connectorId,
|
||||
staleTime: 5 * 60 * 1000, // 5 minutes
|
||||
retry: 2,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -267,7 +267,9 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
|
|||
connectorId: number,
|
||||
searchSpaceId: string | number,
|
||||
startDate?: string,
|
||||
endDate?: string
|
||||
endDate?: string,
|
||||
folderIds?: string,
|
||||
folderNames?: string
|
||||
) => {
|
||||
try {
|
||||
// Build query parameters
|
||||
|
|
@ -280,6 +282,12 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?:
|
|||
if (endDate) {
|
||||
params.append("end_date", endDate);
|
||||
}
|
||||
if (folderIds) {
|
||||
params.append("folder_ids", folderIds);
|
||||
}
|
||||
if (folderNames) {
|
||||
params.append("folder_names", folderNames);
|
||||
}
|
||||
|
||||
const response = await authenticatedFetch(
|
||||
`${
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ import {
|
|||
type ListGitHubRepositoriesRequest,
|
||||
listGitHubRepositoriesRequest,
|
||||
listGitHubRepositoriesResponse,
|
||||
type ListGoogleDriveFoldersRequest,
|
||||
listGoogleDriveFoldersRequest,
|
||||
listGoogleDriveFoldersResponse,
|
||||
type UpdateConnectorRequest,
|
||||
updateConnectorRequest,
|
||||
updateConnectorResponse,
|
||||
|
|
@ -195,6 +198,29 @@ class ConnectorsApiService {
|
|||
body: parsedRequest.data,
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* List Google Drive folders and files
|
||||
*/
|
||||
listGoogleDriveFolders = async (request: ListGoogleDriveFoldersRequest) => {
|
||||
const parsedRequest = listGoogleDriveFoldersRequest.safeParse(request);
|
||||
|
||||
if (!parsedRequest.success) {
|
||||
console.error("Invalid request:", parsedRequest.error);
|
||||
|
||||
const errorMessage = parsedRequest.error.issues.map((issue) => issue.message).join(", ");
|
||||
throw new ValidationError(`Invalid request: ${errorMessage}`);
|
||||
}
|
||||
|
||||
const { connector_id, parent_id } = parsedRequest.data;
|
||||
|
||||
const queryParams = parent_id ? `?parent_id=${encodeURIComponent(parent_id)}` : "";
|
||||
|
||||
return baseApiService.get(
|
||||
`/api/v1/connectors/${connector_id}/google-drive/folders${queryParams}`,
|
||||
listGoogleDriveFoldersResponse
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
export const connectorsApiService = new ConnectorsApiService();
|
||||
|
|
|
|||
|
|
@ -67,5 +67,9 @@ export const cacheKeys = {
|
|||
["connectors", ...(queries ? Object.values(queries) : [])] as const,
|
||||
byId: (connectorId: string) => ["connector", connectorId] as const,
|
||||
index: () => ["connector", "index"] as const,
|
||||
googleDrive: {
|
||||
folders: (connectorId: number, parentId?: string) =>
|
||||
["connectors", "google-drive", connectorId, "folders", parentId] as const,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -307,6 +307,7 @@
|
|||
"luma_desc": "Connect to Luma to search events, meetups and gatherings.",
|
||||
"calendar_desc": "Connect to Google Calendar to search events, meetings and schedules.",
|
||||
"gmail_desc": "Connect to your Gmail account to search through your emails.",
|
||||
"google_drive_desc": "Connect to Google Drive to search and index your files and documents.",
|
||||
"zoom_desc": "Connect to Zoom to access meeting recordings and transcripts.",
|
||||
"webcrawler_desc": "Crawl and index content from any public web pages."
|
||||
},
|
||||
|
|
|
|||
|
|
@ -307,6 +307,7 @@
|
|||
"luma_desc": "连接到 Luma 以搜索活动、聚会和集会。",
|
||||
"calendar_desc": "连接到 Google 日历以搜索活动、会议和日程。",
|
||||
"gmail_desc": "连接到您的 Gmail 账户以搜索您的电子邮件。",
|
||||
"google_drive_desc": "连接到 Google 云端硬盘以搜索和索引您的文件和文档。",
|
||||
"zoom_desc": "连接到 Zoom 以访问会议录制和转录。",
|
||||
"webcrawler_desc": "爬取和索引任何公开网页的内容。"
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue