mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: introduce incremental sync option for Dropbox indexing, enhancing performance and user control
This commit is contained in:
parent
b950299072
commit
d8d5102416
3 changed files with 64 additions and 27 deletions
|
|
@ -1071,6 +1071,7 @@ async def index_connector_content(
|
|||
"indexing_options",
|
||||
{
|
||||
"max_files_per_folder": 100,
|
||||
"incremental_sync": True,
|
||||
"include_subfolders": True,
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -262,6 +262,7 @@ async def _index_full_scan(
|
|||
log_entry: object,
|
||||
max_files: int,
|
||||
include_subfolders: bool = True,
|
||||
incremental_sync: bool = True,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
enable_summary: bool = True,
|
||||
) -> tuple[int, int]:
|
||||
|
|
@ -273,6 +274,7 @@ async def _index_full_scan(
|
|||
"stage": "full_scan",
|
||||
"folder_path": folder_path,
|
||||
"include_subfolders": include_subfolders,
|
||||
"incremental_sync": incremental_sync,
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -294,12 +296,16 @@ async def _index_full_scan(
|
|||
raise Exception(f"Failed to list Dropbox files: {error}")
|
||||
|
||||
for file in all_files[:max_files]:
|
||||
skip, msg = await _should_skip_file(session, file, search_space_id)
|
||||
if skip:
|
||||
if msg and "renamed" in msg.lower():
|
||||
renamed_count += 1
|
||||
else:
|
||||
skipped += 1
|
||||
if incremental_sync:
|
||||
skip, msg = await _should_skip_file(session, file, search_space_id)
|
||||
if skip:
|
||||
if msg and "renamed" in msg.lower():
|
||||
renamed_count += 1
|
||||
else:
|
||||
skipped += 1
|
||||
continue
|
||||
elif skip_item(file):
|
||||
skipped += 1
|
||||
continue
|
||||
files_to_download.append(file)
|
||||
|
||||
|
|
@ -330,6 +336,7 @@ async def _index_selected_files(
|
|||
search_space_id: int,
|
||||
user_id: str,
|
||||
enable_summary: bool,
|
||||
incremental_sync: bool = True,
|
||||
on_heartbeat: HeartbeatCallbackType | None = None,
|
||||
) -> tuple[int, int, list[str]]:
|
||||
"""Index user-selected files using the parallel pipeline."""
|
||||
|
|
@ -345,12 +352,16 @@ async def _index_selected_files(
|
|||
errors.append(f"File '{display}': {error or 'File not found'}")
|
||||
continue
|
||||
|
||||
skip, msg = await _should_skip_file(session, file, search_space_id)
|
||||
if skip:
|
||||
if msg and "renamed" in msg.lower():
|
||||
renamed_count += 1
|
||||
else:
|
||||
skipped += 1
|
||||
if incremental_sync:
|
||||
skip, msg = await _should_skip_file(session, file, search_space_id)
|
||||
if skip:
|
||||
if msg and "renamed" in msg.lower():
|
||||
renamed_count += 1
|
||||
else:
|
||||
skipped += 1
|
||||
continue
|
||||
elif skip_item(file):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
files_to_download.append(file)
|
||||
|
|
@ -382,7 +393,11 @@ async def index_dropbox_files(
|
|||
{
|
||||
"folders": [{"path": "...", "name": "..."}, ...],
|
||||
"files": [{"path": "...", "name": "..."}, ...],
|
||||
"indexing_options": {"max_files": 500, "include_subfolders": true}
|
||||
"indexing_options": {
|
||||
"max_files": 500,
|
||||
"incremental_sync": true,
|
||||
"include_subfolders": true,
|
||||
}
|
||||
}
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
|
@ -420,6 +435,7 @@ async def index_dropbox_files(
|
|||
|
||||
indexing_options = items_dict.get("indexing_options", {})
|
||||
max_files = indexing_options.get("max_files", 500)
|
||||
incremental_sync = indexing_options.get("incremental_sync", True)
|
||||
include_subfolders = indexing_options.get("include_subfolders", True)
|
||||
|
||||
total_indexed = 0
|
||||
|
|
@ -439,6 +455,7 @@ async def index_dropbox_files(
|
|||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
enable_summary=connector_enable_summary,
|
||||
incremental_sync=incremental_sync,
|
||||
)
|
||||
total_indexed += indexed
|
||||
total_skipped += skipped
|
||||
|
|
@ -461,6 +478,7 @@ async def index_dropbox_files(
|
|||
log_entry,
|
||||
max_files,
|
||||
include_subfolders,
|
||||
incremental_sync=incremental_sync,
|
||||
enable_summary=connector_enable_summary,
|
||||
)
|
||||
total_indexed += indexed
|
||||
|
|
|
|||
|
|
@ -28,11 +28,13 @@ import type { ConnectorConfigProps } from "../index";
|
|||
|
||||
interface IndexingOptions {
|
||||
max_files_per_folder: number;
|
||||
incremental_sync: boolean;
|
||||
include_subfolders: boolean;
|
||||
}
|
||||
|
||||
const DEFAULT_INDEXING_OPTIONS: IndexingOptions = {
|
||||
max_files_per_folder: 100,
|
||||
incremental_sync: true,
|
||||
include_subfolders: true,
|
||||
};
|
||||
|
||||
|
|
@ -295,21 +297,37 @@ export const DropboxConfig: FC<ConnectorConfigProps> = ({ connector, onConfigCha
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between pt-2 border-t border-slate-400/20">
|
||||
<div className="space-y-0.5">
|
||||
<Label htmlFor="db-include-subfolders" className="text-sm font-medium">
|
||||
Include subfolders
|
||||
</Label>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Recursively index files in subfolders of selected folders
|
||||
</p>
|
||||
</div>
|
||||
<Switch
|
||||
id="db-include-subfolders"
|
||||
checked={indexingOptions.include_subfolders}
|
||||
onCheckedChange={(checked) => handleIndexingOptionChange("include_subfolders", checked)}
|
||||
/>
|
||||
<div className="flex items-center justify-between pt-2 border-t border-slate-400/20">
|
||||
<div className="space-y-0.5">
|
||||
<Label htmlFor="db-incremental-sync" className="text-sm font-medium">
|
||||
Incremental sync
|
||||
</Label>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Only sync changes since last index (faster). Disable for a full re-index.
|
||||
</p>
|
||||
</div>
|
||||
<Switch
|
||||
id="db-incremental-sync"
|
||||
checked={indexingOptions.incremental_sync}
|
||||
onCheckedChange={(checked) => handleIndexingOptionChange("incremental_sync", checked)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between pt-2 border-t border-slate-400/20">
|
||||
<div className="space-y-0.5">
|
||||
<Label htmlFor="db-include-subfolders" className="text-sm font-medium">
|
||||
Include subfolders
|
||||
</Label>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Recursively index files in subfolders of selected folders
|
||||
</p>
|
||||
</div>
|
||||
<Switch
|
||||
id="db-include-subfolders"
|
||||
checked={indexingOptions.include_subfolders}
|
||||
onCheckedChange={(checked) => handleIndexingOptionChange("include_subfolders", checked)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue