mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-30 03:16:25 +02:00
feat: enhance Google Drive indexing with new options
- Updated the Google Drive indexing functionality to include indexing options such as max files per folder, incremental sync, and inclusion of subfolders. - Modified the API to accept a new 'indexing_options' parameter in the request body. - Enhanced the UI to allow users to configure these options when selecting folders and files for indexing. - Updated related components and tasks to support the new indexing options, ensuring a more flexible and efficient indexing process.
This commit is contained in:
parent
cf53338119
commit
a3112a24fe
9 changed files with 381 additions and 178 deletions
|
|
@ -461,7 +461,7 @@ def index_google_drive_files_task(
|
|||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
items_dict: dict, # Dictionary with 'folders' and 'files' lists
|
||||
items_dict: dict, # Dictionary with 'folders', 'files', and 'indexing_options'
|
||||
):
|
||||
"""Celery task to index Google Drive folders and files."""
|
||||
import asyncio
|
||||
|
|
@ -486,7 +486,7 @@ async def _index_google_drive_files(
|
|||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
items_dict: dict, # Dictionary with 'folders' and 'files' lists
|
||||
items_dict: dict, # Dictionary with 'folders', 'files', and 'indexing_options'
|
||||
):
|
||||
"""Index Google Drive folders and files with new session."""
|
||||
from app.routes.search_source_connectors_routes import (
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ async def _check_and_trigger_schedules():
|
|||
index_elasticsearch_documents_task,
|
||||
index_github_repos_task,
|
||||
index_google_calendar_events_task,
|
||||
index_google_drive_files_task,
|
||||
index_google_gmail_messages_task,
|
||||
index_jira_issues_task,
|
||||
index_linear_issues_task,
|
||||
|
|
@ -96,6 +97,7 @@ async def _check_and_trigger_schedules():
|
|||
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
|
||||
SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
|
||||
}
|
||||
|
||||
# Trigger indexing for each due connector
|
||||
|
|
@ -106,13 +108,42 @@ async def _check_and_trigger_schedules():
|
|||
f"Triggering periodic indexing for connector {connector.id} "
|
||||
f"({connector.connector_type.value})"
|
||||
)
|
||||
task.delay(
|
||||
connector.id,
|
||||
connector.search_space_id,
|
||||
str(connector.user_id),
|
||||
None, # start_date - uses last_indexed_at
|
||||
None, # end_date - uses now
|
||||
)
|
||||
|
||||
# Special handling for Google Drive - uses config for folder/file selection
|
||||
if connector.connector_type == SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR:
|
||||
config = connector.config or {}
|
||||
selected_folders = config.get("selected_folders", [])
|
||||
selected_files = config.get("selected_files", [])
|
||||
indexing_options = config.get("indexing_options", {
|
||||
"max_files_per_folder": 100,
|
||||
"incremental_sync": True,
|
||||
"include_subfolders": True,
|
||||
})
|
||||
|
||||
if selected_folders or selected_files:
|
||||
task.delay(
|
||||
connector.id,
|
||||
connector.search_space_id,
|
||||
str(connector.user_id),
|
||||
{
|
||||
"folders": selected_folders,
|
||||
"files": selected_files,
|
||||
"indexing_options": indexing_options,
|
||||
},
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Google Drive connector {connector.id} has no folders or files selected, skipping periodic indexing"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
task.delay(
|
||||
connector.id,
|
||||
connector.search_space_id,
|
||||
str(connector.user_id),
|
||||
None, # start_date - uses last_indexed_at
|
||||
None, # end_date - uses now
|
||||
)
|
||||
|
||||
# Update next_scheduled_at for next run
|
||||
from datetime import timedelta
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue