From ef358e067a66edf5a6142e4ce347a1f4cbb360e5 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 14:43:21 +0530 Subject: [PATCH 01/18] feat: add chat session and message synchronization hooks --- .../[search_space_id]/new-chat/[[...chat_id]]/page.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 7bef1fff2..080a36167 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -38,6 +38,8 @@ import { removeChatTabAtom, updateChatTabTitleAtom } from "@/atoms/tabs/tabs.ato import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { ThinkingStepsDataUI } from "@/components/assistant-ui/thinking-steps"; import { Thread } from "@/components/assistant-ui/thread"; +import { useChatSessionStateSync } from "@/hooks/use-chat-session-state"; +import { useMessagesSync } from "@/hooks/use-messages-sync"; import Loading from "../loading"; const MobileEditorPanel = dynamic( From cd71893985c50c9baa6edb6784a3e7c006693b13 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 14:48:40 +0530 Subject: [PATCH 02/18] fix: update relationship backref to enable passive deletes for document versions --- surfsense_backend/app/db.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 77a001a0d..01a6bbda0 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -25,7 +25,7 @@ from sqlalchemy import ( ) from sqlalchemy.dialects.postgresql import JSONB, UUID from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine -from sqlalchemy.orm import DeclarativeBase, Mapped, declared_attr, relationship +from sqlalchemy.orm import DeclarativeBase, Mapped, backref, declared_attr, relationship from app.config import config @@ -1086,7 +1086,9 @@ class DocumentVersion(BaseModel, TimestampMixin): content_hash = Column(String, nullable=False) title = Column(String, nullable=True) - document = relationship("Document", backref="versions") + document = relationship( + "Document", backref=backref("versions", passive_deletes=True) + ) class Chunk(BaseModel, TimestampMixin): From b3925654dd122fddf4e33744afe080840b941b5a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:20:19 +0530 Subject: [PATCH 03/18] fix: update MoreHorizontal icon styling in FolderNode component --- surfsense_web/components/documents/FolderNode.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index d9df69e9c..4b0103ce5 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -354,7 +354,7 @@ export const FolderNode = React.memo(function FolderNode({ className="hidden sm:inline-flex h-6 w-6 shrink-0 opacity-0 group-hover:opacity-100 transition-opacity" onClick={(e) => e.stopPropagation()} > - + From 5f5954e9327a509b6be366cf7c97719aa28b44fb Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:46:52 +0530 Subject: [PATCH 04/18] feat: implement upload-based folder indexing and synchronization features --- .../app/routes/documents_routes.py | 376 ++++++++++++++++++ .../app/tasks/celery_tasks/document_tasks.py | 134 ++++++- .../local_folder_indexer.py | 291 ++++++++++++++ surfsense_desktop/src/ipc/channels.ts | 1 + surfsense_desktop/src/ipc/handlers.ts | 6 + .../src/modules/folder-watcher.ts | 25 ++ surfsense_desktop/src/preload.ts | 1 + .../layout/ui/sidebar/DocumentsSidebar.tsx | 21 +- .../components/sources/FolderWatchDialog.tsx | 104 ++++- surfsense_web/hooks/use-folder-sync.ts | 67 +++- .../lib/apis/documents-api.service.ts | 70 ++++ surfsense_web/lib/folder-sync-upload.ts | 214 ++++++++++ surfsense_web/types/window.d.ts | 8 + 13 files changed, 1273 insertions(+), 45 deletions(-) create mode 100644 surfsense_web/lib/folder-sync-upload.ts diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 5008b1a10..c28fddfe0 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1543,3 +1543,379 @@ async def folder_index_files( "status": "processing", "file_count": len(request.target_file_paths), } + + +# ===== Upload-based local folder indexing endpoints ===== +# These work for ALL deployment modes (cloud, self-hosted remote, self-hosted local). +# The desktop app reads files locally and uploads them here. + + +class FolderMtimeCheckFile(PydanticBaseModel): + relative_path: str + mtime: float + + +class FolderMtimeCheckRequest(PydanticBaseModel): + folder_name: str + search_space_id: int + files: list[FolderMtimeCheckFile] + + +class FolderUnlinkRequest(PydanticBaseModel): + folder_name: str + search_space_id: int + root_folder_id: int | None = None + relative_paths: list[str] + + +class FolderSyncFinalizeRequest(PydanticBaseModel): + folder_name: str + search_space_id: int + root_folder_id: int | None = None + all_relative_paths: list[str] + + +@router.post("/documents/folder-mtime-check") +async def folder_mtime_check( + request: FolderMtimeCheckRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Pre-upload optimization: check which files need uploading based on mtime. + + Returns the subset of relative paths where the file is new or has a + different mtime, so the client can skip reading/uploading unchanged files. + """ + from app.indexing_pipeline.document_hashing import compute_identifier_hash + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_CREATE.value, + "You don't have permission to create documents in this search space", + ) + + uid_hashes = {} + for f in request.files: + uid = f"{request.folder_name}:{f.relative_path}" + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, uid, request.search_space_id + ) + uid_hashes[uid_hash] = f + + existing_docs = ( + ( + await session.execute( + select(Document).where( + Document.unique_identifier_hash.in_(list(uid_hashes.keys())), + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + ) + ) + ) + .scalars() + .all() + ) + + existing_by_hash = {doc.unique_identifier_hash: doc for doc in existing_docs} + + MTIME_TOLERANCE = 1.0 + files_to_upload: list[str] = [] + + for uid_hash, file_info in uid_hashes.items(): + doc = existing_by_hash.get(uid_hash) + if doc is None: + files_to_upload.append(file_info.relative_path) + continue + + stored_mtime = (doc.document_metadata or {}).get("mtime") + if stored_mtime is None: + files_to_upload.append(file_info.relative_path) + continue + + if abs(file_info.mtime - stored_mtime) >= MTIME_TOLERANCE: + files_to_upload.append(file_info.relative_path) + + return {"files_to_upload": files_to_upload} + + +@router.post("/documents/folder-upload") +async def folder_upload( + files: list[UploadFile], + folder_name: str = Form(...), + search_space_id: int = Form(...), + relative_paths: str = Form(...), + root_folder_id: int | None = Form(None), + enable_summary: bool = Form(False), + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Upload files from the desktop app for folder indexing. + + Files are written to temp storage and dispatched to a Celery task. + Works for all deployment modes (no is_self_hosted guard). + """ + import json + import tempfile + + await check_permission( + session, + user, + search_space_id, + Permission.DOCUMENTS_CREATE.value, + "You don't have permission to create documents in this search space", + ) + + if not files: + raise HTTPException(status_code=400, detail="No files provided") + + try: + rel_paths: list[str] = json.loads(relative_paths) + except (json.JSONDecodeError, TypeError) as e: + raise HTTPException( + status_code=400, detail=f"Invalid relative_paths JSON: {e}" + ) from e + + if len(rel_paths) != len(files): + raise HTTPException( + status_code=400, + detail=f"Mismatch: {len(files)} files but {len(rel_paths)} relative_paths", + ) + + for file in files: + file_size = file.size or 0 + if file_size > MAX_FILE_SIZE_BYTES: + raise HTTPException( + status_code=413, + detail=f"File '{file.filename}' ({file_size / (1024 * 1024):.1f} MB) " + f"exceeds the {MAX_FILE_SIZE_BYTES // (1024 * 1024)} MB per-file limit.", + ) + + if not root_folder_id: + watched_metadata = { + "watched": True, + "folder_path": folder_name, + } + existing_root = ( + await session.execute( + select(Folder).where( + Folder.name == folder_name, + Folder.parent_id.is_(None), + Folder.search_space_id == search_space_id, + ) + ) + ).scalar_one_or_none() + + if existing_root: + root_folder_id = existing_root.id + existing_root.folder_metadata = watched_metadata + else: + root_folder = Folder( + name=folder_name, + search_space_id=search_space_id, + created_by_id=str(user.id), + position="a0", + folder_metadata=watched_metadata, + ) + session.add(root_folder) + await session.flush() + root_folder_id = root_folder.id + + await session.commit() + + async def _read_and_save(file: UploadFile, idx: int) -> dict: + content = await file.read() + filename = file.filename or rel_paths[idx].split("/")[-1] + + def _write_temp() -> str: + with tempfile.NamedTemporaryFile( + delete=False, suffix=os.path.splitext(filename)[1] + ) as tmp: + tmp.write(content) + return tmp.name + + temp_path = await asyncio.to_thread(_write_temp) + return { + "temp_path": temp_path, + "relative_path": rel_paths[idx], + "filename": filename, + } + + file_mappings = await asyncio.gather( + *(_read_and_save(f, i) for i, f in enumerate(files)) + ) + + from app.tasks.celery_tasks.document_tasks import ( + index_uploaded_folder_files_task, + ) + + index_uploaded_folder_files_task.delay( + search_space_id=search_space_id, + user_id=str(user.id), + folder_name=folder_name, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + file_mappings=list(file_mappings), + ) + + return { + "message": f"Folder upload started for {len(files)} file(s)", + "status": "processing", + "root_folder_id": root_folder_id, + "file_count": len(files), + } + + +@router.post("/documents/folder-unlink") +async def folder_unlink( + request: FolderUnlinkRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Handle file deletion events from the desktop watcher. + + For each relative path, find the matching document and delete it. + """ + from app.indexing_pipeline.document_hashing import compute_identifier_hash + from app.tasks.connector_indexers.local_folder_indexer import ( + _cleanup_empty_folder_chain, + ) + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_DELETE.value, + "You don't have permission to delete documents in this search space", + ) + + deleted_count = 0 + + for rel_path in request.relative_paths: + unique_id = f"{request.folder_name}:{rel_path}" + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, + unique_id, + request.search_space_id, + ) + + existing = ( + await session.execute( + select(Document).where( + Document.unique_identifier_hash == uid_hash + ) + ) + ).scalar_one_or_none() + + if existing: + deleted_folder_id = existing.folder_id + await session.delete(existing) + await session.flush() + + if deleted_folder_id and request.root_folder_id: + await _cleanup_empty_folder_chain( + session, deleted_folder_id, request.root_folder_id + ) + deleted_count += 1 + + await session.commit() + return {"deleted_count": deleted_count} + + +@router.post("/documents/folder-sync-finalize") +async def folder_sync_finalize( + request: FolderSyncFinalizeRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Finalize a full folder scan by deleting orphaned documents. + + The client sends the complete list of relative paths currently in the + folder. Any document in the DB for this folder that is NOT in the list + gets deleted. + """ + from app.indexing_pipeline.document_hashing import compute_identifier_hash + from app.tasks.connector_indexers.local_folder_indexer import ( + _cleanup_empty_folders, + ) + + await check_permission( + session, + user, + request.search_space_id, + Permission.DOCUMENTS_DELETE.value, + "You don't have permission to delete documents in this search space", + ) + + seen_hashes: set[str] = set() + for rel_path in request.all_relative_paths: + unique_id = f"{request.folder_name}:{rel_path}" + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, + unique_id, + request.search_space_id, + ) + seen_hashes.add(uid_hash) + + all_root_folder_ids: set[int] = set() + if request.root_folder_id: + all_root_folder_ids.add(request.root_folder_id) + + all_db_folders = ( + ( + await session.execute( + select(Folder.id).where( + Folder.search_space_id == request.search_space_id, + ) + ) + ) + .scalars() + .all() + ) + all_root_folder_ids.update(all_db_folders) + + all_folder_docs = ( + ( + await session.execute( + select(Document).where( + Document.document_type == DocumentType.LOCAL_FOLDER_FILE, + Document.search_space_id == request.search_space_id, + Document.folder_id.in_(list(all_root_folder_ids)) + if all_root_folder_ids + else True, + ) + ) + ) + .scalars() + .all() + ) + + deleted_count = 0 + for doc in all_folder_docs: + if doc.unique_identifier_hash not in seen_hashes: + await session.delete(doc) + deleted_count += 1 + + await session.flush() + + if request.root_folder_id: + existing_dirs: set[str] = set() + for rel_path in request.all_relative_paths: + parent = str(os.path.dirname(rel_path)) + if parent and parent != ".": + existing_dirs.add(parent) + + folder_mapping: dict[str, int] = {} + if request.root_folder_id: + folder_mapping[""] = request.root_folder_id + + await _cleanup_empty_folders( + session, + request.root_folder_id, + request.search_space_id, + existing_dirs, + folder_mapping, + ) + + await session.commit() + return {"deleted_count": deleted_count} diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 4e9249d34..62720826f 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -11,7 +11,10 @@ from app.config import config from app.services.notification_service import NotificationService from app.services.task_logging_service import TaskLoggingService from app.tasks.celery_tasks import get_celery_session_maker -from app.tasks.connector_indexers.local_folder_indexer import index_local_folder +from app.tasks.connector_indexers.local_folder_indexer import ( + index_local_folder, + index_uploaded_files, +) from app.tasks.document_processors import ( add_extension_received_document, add_youtube_video_document, @@ -1411,3 +1414,132 @@ async def _index_local_folder_async( heartbeat_task.cancel() if notification_id is not None: _stop_heartbeat(notification_id) + + +# ===== Upload-based folder indexing task ===== + + +@celery_app.task(name="index_uploaded_folder_files", bind=True) +def index_uploaded_folder_files_task( + self, + search_space_id: int, + user_id: str, + folder_name: str, + root_folder_id: int, + enable_summary: bool, + file_mappings: list[dict], +): + """Celery task to index files uploaded from the desktop app.""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete( + _index_uploaded_folder_files_async( + search_space_id=search_space_id, + user_id=user_id, + folder_name=folder_name, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + file_mappings=file_mappings, + ) + ) + finally: + loop.close() + + +async def _index_uploaded_folder_files_async( + search_space_id: int, + user_id: str, + folder_name: str, + root_folder_id: int, + enable_summary: bool, + file_mappings: list[dict], +): + """Run upload-based folder indexing with notification + heartbeat.""" + file_count = len(file_mappings) + doc_name = f"{folder_name} ({file_count} file{'s' if file_count != 1 else ''})" + + notification = None + notification_id: int | None = None + heartbeat_task = None + + async with get_celery_session_maker()() as session: + try: + notification = ( + await NotificationService.document_processing.notify_processing_started( + session=session, + user_id=UUID(user_id), + document_type="LOCAL_FOLDER_FILE", + document_name=doc_name, + search_space_id=search_space_id, + ) + ) + notification_id = notification.id + _start_heartbeat(notification_id) + heartbeat_task = asyncio.create_task(_run_heartbeat_loop(notification_id)) + except Exception: + logger.warning( + "Failed to create notification for uploaded folder indexing", + exc_info=True, + ) + + async def _heartbeat_progress(completed_count: int) -> None: + if notification: + with contextlib.suppress(Exception): + await NotificationService.document_processing.notify_processing_progress( + session=session, + notification=notification, + stage="indexing", + stage_message=f"Syncing files ({completed_count}/{file_count})", + ) + + try: + _indexed, _failed, err = await index_uploaded_files( + session=session, + search_space_id=search_space_id, + user_id=user_id, + folder_name=folder_name, + root_folder_id=root_folder_id, + enable_summary=enable_summary, + file_mappings=file_mappings, + on_heartbeat_callback=_heartbeat_progress, + ) + + if notification: + try: + await session.refresh(notification) + if err: + await NotificationService.document_processing.notify_processing_completed( + session=session, + notification=notification, + error_message=err, + ) + else: + await NotificationService.document_processing.notify_processing_completed( + session=session, + notification=notification, + ) + except Exception: + logger.warning( + "Failed to update notification after uploaded folder indexing", + exc_info=True, + ) + + except Exception as e: + logger.exception(f"Uploaded folder indexing failed: {e}") + if notification: + try: + await session.refresh(notification) + await NotificationService.document_processing.notify_processing_completed( + session=session, + notification=notification, + error_message=str(e)[:200], + ) + except Exception: + pass + raise + finally: + if heartbeat_task: + heartbeat_task.cancel() + if notification_id is not None: + _stop_heartbeat(notification_id) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 7f42f4638..7b433cf62 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -1081,3 +1081,294 @@ async def _index_single_file( logger.exception(f"Error indexing single file {target_file_path}: {e}") await session.rollback() return 0, 0, str(e) + + +# ======================================================================== +# Upload-based folder indexing (works for all deployment modes) +# ======================================================================== + + +async def _mirror_folder_structure_from_paths( + session: AsyncSession, + relative_paths: list[str], + folder_name: str, + search_space_id: int, + user_id: str, + root_folder_id: int | None = None, +) -> tuple[dict[str, int], int]: + """Create DB Folder rows from a list of relative file paths. + + Unlike ``_mirror_folder_structure`` this does not walk the filesystem; + it derives the directory tree from the paths provided by the client. + + Returns (mapping, root_folder_id) where mapping is + relative_dir_path -> folder_id. The empty-string key maps to root. + """ + dir_set: set[str] = set() + for rp in relative_paths: + parent = str(Path(rp).parent) + if parent == ".": + continue + parts = Path(parent).parts + for i in range(len(parts)): + dir_set.add(str(Path(*parts[: i + 1]))) + + subdirs = sorted(dir_set, key=lambda p: p.count(os.sep)) + + mapping: dict[str, int] = {} + + if root_folder_id: + existing = ( + await session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one_or_none() + if existing: + mapping[""] = existing.id + else: + root_folder_id = None + + if not root_folder_id: + root_folder = Folder( + name=folder_name, + search_space_id=search_space_id, + created_by_id=user_id, + position="a0", + ) + session.add(root_folder) + await session.flush() + mapping[""] = root_folder.id + root_folder_id = root_folder.id + + for rel_dir in subdirs: + dir_parts = Path(rel_dir).parts + dir_name = dir_parts[-1] + parent_rel = str(Path(*dir_parts[:-1])) if len(dir_parts) > 1 else "" + + parent_id = mapping.get(parent_rel, mapping[""]) + + existing_folder = ( + await session.execute( + select(Folder).where( + Folder.name == dir_name, + Folder.parent_id == parent_id, + Folder.search_space_id == search_space_id, + ) + ) + ).scalar_one_or_none() + + if existing_folder: + mapping[rel_dir] = existing_folder.id + else: + new_folder = Folder( + name=dir_name, + parent_id=parent_id, + search_space_id=search_space_id, + created_by_id=user_id, + position="a0", + ) + session.add(new_folder) + await session.flush() + mapping[rel_dir] = new_folder.id + + await session.flush() + return mapping, root_folder_id + + +UPLOAD_BATCH_CONCURRENCY = 5 + + +async def index_uploaded_files( + session: AsyncSession, + search_space_id: int, + user_id: str, + folder_name: str, + root_folder_id: int, + enable_summary: bool, + file_mappings: list[dict], + on_heartbeat_callback: HeartbeatCallbackType | None = None, +) -> tuple[int, int, str | None]: + """Index files uploaded from the desktop app via temp paths. + + Each entry in *file_mappings* is ``{temp_path, relative_path, filename}``. + This function mirrors the folder structure from the provided relative + paths, then indexes each file exactly like ``_index_single_file`` but + reads from the temp path. Temp files are cleaned up after processing. + + Returns ``(indexed_count, failed_count, error_summary_or_none)``. + """ + task_logger = TaskLoggingService(session, search_space_id) + log_entry = await task_logger.log_task_start( + task_name="local_folder_indexing", + source="uploaded_folder_indexing", + message=f"Indexing {len(file_mappings)} uploaded file(s) for {folder_name}", + metadata={"file_count": len(file_mappings)}, + ) + + try: + all_relative_paths = [m["relative_path"] for m in file_mappings] + folder_mapping, root_folder_id = await _mirror_folder_structure_from_paths( + session=session, + relative_paths=all_relative_paths, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + root_folder_id=root_folder_id, + ) + await session.flush() + + page_limit_service = PageLimitService(session) + pipeline = IndexingPipelineService(session) + llm = await get_user_long_context_llm(session, user_id, search_space_id) + + indexed_count = 0 + failed_count = 0 + errors: list[str] = [] + + for i, mapping in enumerate(file_mappings): + temp_path = mapping["temp_path"] + relative_path = mapping["relative_path"] + filename = mapping["filename"] + + try: + unique_id = f"{folder_name}:{relative_path}" + uid_hash = compute_identifier_hash( + DocumentType.LOCAL_FOLDER_FILE.value, + unique_id, + search_space_id, + ) + + try: + estimated_pages = await _check_page_limit_or_skip( + page_limit_service, user_id, temp_path + ) + except PageLimitExceededError: + logger.warning(f"Page limit exceeded, skipping: {relative_path}") + failed_count += 1 + continue + + try: + content, content_hash = await _compute_file_content_hash( + temp_path, filename, search_space_id + ) + except Exception as e: + logger.warning(f"Could not read {relative_path}: {e}") + failed_count += 1 + errors.append(f"{filename}: {e}") + continue + + if not content.strip(): + failed_count += 1 + continue + + existing = await check_document_by_unique_identifier( + session, uid_hash + ) + + if existing: + if existing.content_hash == content_hash: + meta = dict(existing.document_metadata or {}) + meta["mtime"] = datetime.now(UTC).timestamp() + existing.document_metadata = meta + if not DocumentStatus.is_state( + existing.status, DocumentStatus.READY + ): + existing.status = DocumentStatus.ready() + await session.commit() + continue + + await create_version_snapshot(session, existing) + + connector_doc = _build_connector_doc( + title=filename, + content=content, + relative_path=relative_path, + folder_name=folder_name, + search_space_id=search_space_id, + user_id=user_id, + enable_summary=enable_summary, + ) + + documents = await pipeline.prepare_for_indexing([connector_doc]) + if not documents: + failed_count += 1 + continue + + db_doc = documents[0] + + try: + db_doc.folder_id = await _resolve_folder_for_file( + session, + relative_path, + root_folder_id, + search_space_id, + user_id, + ) + await session.commit() + except IntegrityError: + await session.rollback() + await session.refresh(db_doc) + + await pipeline.index(db_doc, connector_doc, llm) + + await session.refresh(db_doc) + doc_meta = dict(db_doc.document_metadata or {}) + doc_meta["mtime"] = datetime.now(UTC).timestamp() + db_doc.document_metadata = doc_meta + await session.commit() + + if DocumentStatus.is_state(db_doc.status, DocumentStatus.READY): + indexed_count += 1 + final_pages = _compute_final_pages( + page_limit_service, estimated_pages, len(content) + ) + await page_limit_service.update_page_usage( + user_id, final_pages, allow_exceed=True + ) + else: + failed_count += 1 + + if on_heartbeat_callback and (i + 1) % 5 == 0: + await on_heartbeat_callback(i + 1) + + except Exception as e: + logger.exception( + f"Error indexing uploaded file {relative_path}: {e}" + ) + await session.rollback() + failed_count += 1 + errors.append(f"{filename}: {e}") + finally: + try: + os.unlink(temp_path) + except OSError: + pass + + error_summary = None + if errors: + error_summary = ( + f"{failed_count} file(s) failed: " + "; ".join(errors[:5]) + ) + if len(errors) > 5: + error_summary += f" ... and {len(errors) - 5} more" + + await task_logger.log_task_success( + log_entry, + f"Upload indexing complete: {indexed_count} indexed, {failed_count} failed", + {"indexed": indexed_count, "failed": failed_count}, + ) + + return indexed_count, failed_count, error_summary + + except SQLAlchemyError as e: + logger.exception(f"Database error during uploaded file indexing: {e}") + await session.rollback() + await task_logger.log_task_failure( + log_entry, f"DB error: {e}", "Database error", {} + ) + return 0, 0, f"Database error: {e}" + + except Exception as e: + logger.exception(f"Error during uploaded file indexing: {e}") + await task_logger.log_task_failure( + log_entry, f"Error: {e}", "Unexpected error", {} + ) + return 0, 0, str(e) diff --git a/surfsense_desktop/src/ipc/channels.ts b/surfsense_desktop/src/ipc/channels.ts index 39e75f046..1921dcda2 100644 --- a/surfsense_desktop/src/ipc/channels.ts +++ b/surfsense_desktop/src/ipc/channels.ts @@ -30,6 +30,7 @@ export const IPC_CHANNELS = { FOLDER_SYNC_RENDERER_READY: 'folder-sync:renderer-ready', FOLDER_SYNC_GET_PENDING_EVENTS: 'folder-sync:get-pending-events', FOLDER_SYNC_ACK_EVENTS: 'folder-sync:ack-events', + FOLDER_SYNC_LIST_FILES: 'folder-sync:list-files', BROWSE_FILES: 'browse:files', READ_LOCAL_FILES: 'browse:read-local-files', // Auth token sync across windows diff --git a/surfsense_desktop/src/ipc/handlers.ts b/surfsense_desktop/src/ipc/handlers.ts index 200fa75bd..a1d5552c9 100644 --- a/surfsense_desktop/src/ipc/handlers.ts +++ b/surfsense_desktop/src/ipc/handlers.ts @@ -19,6 +19,8 @@ import { markRendererReady, browseFiles, readLocalFiles, + listFolderFiles, + type WatchedFolderConfig, } from '../modules/folder-watcher'; import { getShortcuts, setShortcuts, type ShortcutConfig } from '../modules/shortcuts'; import { getActiveSearchSpaceId, setActiveSearchSpaceId } from '../modules/active-search-space'; @@ -91,6 +93,10 @@ export function registerIpcHandlers(): void { acknowledgeFileEvents(eventIds) ); + ipcMain.handle(IPC_CHANNELS.FOLDER_SYNC_LIST_FILES, (_event, config: WatchedFolderConfig) => + listFolderFiles(config) + ); + ipcMain.handle(IPC_CHANNELS.BROWSE_FILES, () => browseFiles()); ipcMain.handle(IPC_CHANNELS.READ_LOCAL_FILES, (_event, paths: string[]) => diff --git a/surfsense_desktop/src/modules/folder-watcher.ts b/surfsense_desktop/src/modules/folder-watcher.ts index 969dabe97..a39d7855a 100644 --- a/surfsense_desktop/src/modules/folder-watcher.ts +++ b/surfsense_desktop/src/modules/folder-watcher.ts @@ -188,6 +188,31 @@ function walkFolderMtimes(config: WatchedFolderConfig): MtimeMap { return result; } +export interface FolderFileEntry { + relativePath: string; + fullPath: string; + size: number; + mtimeMs: number; +} + +export function listFolderFiles(config: WatchedFolderConfig): FolderFileEntry[] { + const root = config.path; + const mtimeMap = walkFolderMtimes(config); + const entries: FolderFileEntry[] = []; + + for (const [relativePath, mtimeMs] of Object.entries(mtimeMap)) { + const fullPath = path.join(root, relativePath); + try { + const stat = fs.statSync(fullPath); + entries.push({ relativePath, fullPath, size: stat.size, mtimeMs }); + } catch { + // File may have been removed between walk and stat + } + } + + return entries; +} + function getMainWindow(): BrowserWindow | null { const windows = BrowserWindow.getAllWindows(); return windows.length > 0 ? windows[0] : null; diff --git a/surfsense_desktop/src/preload.ts b/surfsense_desktop/src/preload.ts index 4d9537c91..7cc63aea1 100644 --- a/surfsense_desktop/src/preload.ts +++ b/surfsense_desktop/src/preload.ts @@ -64,6 +64,7 @@ contextBridge.exposeInMainWorld('electronAPI', { signalRendererReady: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_RENDERER_READY), getPendingFileEvents: () => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_GET_PENDING_EVENTS), acknowledgeFileEvents: (eventIds: string[]) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_ACK_EVENTS, eventIds), + listFolderFiles: (config: any) => ipcRenderer.invoke(IPC_CHANNELS.FOLDER_SYNC_LIST_FILES, config), // Browse files via native dialog browseFiles: () => ipcRenderer.invoke(IPC_CHANNELS.BROWSE_FILES), diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index c806c61d8..85c3a9897 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -23,7 +23,9 @@ import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog"; import { FolderTreeView } from "@/components/documents/FolderTreeView"; import { VersionHistoryDialog } from "@/components/documents/version-history"; import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems"; -import { FolderWatchDialog, type SelectedFolder } from "@/components/sources/FolderWatchDialog"; +import { DEFAULT_EXCLUDE_PATTERNS, FolderWatchDialog, type SelectedFolder } from "@/components/sources/FolderWatchDialog"; +import { uploadFolderScan } from "@/lib/folder-sync-upload"; +import { getSupportedExtensionsSet } from "@/lib/supported-extensions"; import { AlertDialog, AlertDialogAction, @@ -304,14 +306,17 @@ export function DocumentsSidebar({ } try { - await documentsApiService.folderIndex(searchSpaceId, { - folder_path: matched.path, - folder_name: matched.name, - search_space_id: searchSpaceId, - root_folder_id: folder.id, - file_extensions: matched.fileExtensions ?? undefined, + toast.info(`Re-scanning folder: ${matched.name}`); + await uploadFolderScan({ + folderPath: matched.path, + folderName: matched.name, + searchSpaceId, + excludePatterns: matched.excludePatterns ?? DEFAULT_EXCLUDE_PATTERNS, + fileExtensions: matched.fileExtensions ?? Array.from(getSupportedExtensionsSet()), + enableSummary: false, + rootFolderId: folder.id, }); - toast.success(`Re-scanning folder: ${matched.name}`); + toast.success(`Re-scan complete: ${matched.name}`); } catch (err) { toast.error((err as Error)?.message || "Failed to re-scan folder"); } diff --git a/surfsense_web/components/sources/FolderWatchDialog.tsx b/surfsense_web/components/sources/FolderWatchDialog.tsx index 1c66ea6b9..8ccf12afe 100644 --- a/surfsense_web/components/sources/FolderWatchDialog.tsx +++ b/surfsense_web/components/sources/FolderWatchDialog.tsx @@ -1,7 +1,7 @@ "use client"; import { X } from "lucide-react"; -import { useCallback, useEffect, useMemo, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { Button } from "@/components/ui/button"; import { @@ -13,8 +13,8 @@ import { } from "@/components/ui/dialog"; import { Spinner } from "@/components/ui/spinner"; import { Switch } from "@/components/ui/switch"; -import { documentsApiService } from "@/lib/apis/documents-api.service"; import { getSupportedExtensionsSet } from "@/lib/supported-extensions"; +import { type FolderSyncProgress, uploadFolderScan } from "@/lib/folder-sync-upload"; export interface SelectedFolder { path: string; @@ -29,7 +29,7 @@ interface FolderWatchDialogProps { initialFolder?: SelectedFolder | null; } -const DEFAULT_EXCLUDE_PATTERNS = [ +export const DEFAULT_EXCLUDE_PATTERNS = [ ".git", "node_modules", "__pycache__", @@ -48,6 +48,8 @@ export function FolderWatchDialog({ const [selectedFolder, setSelectedFolder] = useState(null); const [shouldSummarize, setShouldSummarize] = useState(false); const [submitting, setSubmitting] = useState(false); + const [progress, setProgress] = useState(null); + const abortRef = useRef(null); useEffect(() => { if (open && initialFolder) { @@ -68,29 +70,38 @@ export function FolderWatchDialog({ setSelectedFolder({ path: folderPath, name: folderName }); }, []); + const handleCancel = useCallback(() => { + abortRef.current?.abort(); + }, []); + const handleSubmit = useCallback(async () => { if (!selectedFolder) return; const api = window.electronAPI; if (!api) return; + const controller = new AbortController(); + abortRef.current = controller; setSubmitting(true); - try { - const result = await documentsApiService.folderIndex(searchSpaceId, { - folder_path: selectedFolder.path, - folder_name: selectedFolder.name, - search_space_id: searchSpaceId, - enable_summary: shouldSummarize, - file_extensions: supportedExtensions, - }); + setProgress(null); - const rootFolderId = (result as { root_folder_id?: number })?.root_folder_id ?? null; + try { + const rootFolderId = await uploadFolderScan({ + folderPath: selectedFolder.path, + folderName: selectedFolder.name, + searchSpaceId, + excludePatterns: DEFAULT_EXCLUDE_PATTERNS, + fileExtensions: supportedExtensions, + enableSummary: shouldSummarize, + onProgress: setProgress, + signal: controller.signal, + }); await api.addWatchedFolder({ path: selectedFolder.path, name: selectedFolder.name, excludePatterns: DEFAULT_EXCLUDE_PATTERNS, fileExtensions: supportedExtensions, - rootFolderId, + rootFolderId: rootFolderId ?? null, searchSpaceId, active: true, }); @@ -98,12 +109,19 @@ export function FolderWatchDialog({ toast.success(`Watching folder: ${selectedFolder.name}`); setSelectedFolder(null); setShouldSummarize(false); + setProgress(null); onOpenChange(false); onSuccess?.(); } catch (err) { - toast.error((err as Error)?.message || "Failed to watch folder"); + if ((err as Error)?.name === "AbortError") { + toast.info("Folder sync cancelled. Partial progress was saved."); + } else { + toast.error((err as Error)?.message || "Failed to watch folder"); + } } finally { + abortRef.current = null; setSubmitting(false); + setProgress(null); } }, [ selectedFolder, @@ -119,12 +137,31 @@ export function FolderWatchDialog({ if (!nextOpen && !submitting) { setSelectedFolder(null); setShouldSummarize(false); + setProgress(null); } onOpenChange(nextOpen); }, [onOpenChange, submitting] ); + const progressLabel = useMemo(() => { + if (!progress) return null; + switch (progress.phase) { + case "listing": + return "Scanning folder..."; + case "checking": + return `Checking ${progress.total} file(s)...`; + case "uploading": + return `Uploading ${progress.uploaded}/${progress.total} file(s)...`; + case "finalizing": + return "Finalizing..."; + case "done": + return "Done!"; + default: + return null; + } + }, [progress]); + return ( @@ -174,14 +211,39 @@ export function FolderWatchDialog({ - + + + ) : ( + )} - + )} diff --git a/surfsense_web/hooks/use-folder-sync.ts b/surfsense_web/hooks/use-folder-sync.ts index 847d0081b..7a85c31fe 100644 --- a/surfsense_web/hooks/use-folder-sync.ts +++ b/surfsense_web/hooks/use-folder-sync.ts @@ -20,12 +20,18 @@ const DEBOUNCE_MS = 2000; const MAX_WAIT_MS = 10_000; const MAX_BATCH_SIZE = 50; +interface FileEntry { + fullPath: string; + relativePath: string; + action: string; +} + interface BatchItem { folderPath: string; folderName: string; searchSpaceId: number; rootFolderId: number | null; - filePaths: string[]; + files: FileEntry[]; ackIds: string[]; } @@ -44,18 +50,40 @@ export function useFolderSync() { while (queueRef.current.length > 0) { const batch = queueRef.current.shift()!; try { - await documentsApiService.folderIndexFiles(batch.searchSpaceId, { - folder_path: batch.folderPath, - folder_name: batch.folderName, - search_space_id: batch.searchSpaceId, - target_file_paths: batch.filePaths, - root_folder_id: batch.rootFolderId, - }); + const addChangeFiles = batch.files.filter((f) => f.action === "add" || f.action === "change"); + const unlinkFiles = batch.files.filter((f) => f.action === "unlink"); + + if (addChangeFiles.length > 0 && electronAPI?.readLocalFiles) { + const fullPaths = addChangeFiles.map((f) => f.fullPath); + const fileDataArr = await electronAPI.readLocalFiles(fullPaths); + + const files: File[] = fileDataArr.map((fd) => { + const blob = new Blob([fd.data], { type: fd.mimeType || "application/octet-stream" }); + return new File([blob], fd.name, { type: blob.type }); + }); + + await documentsApiService.folderUploadFiles(files, { + folder_name: batch.folderName, + search_space_id: batch.searchSpaceId, + relative_paths: addChangeFiles.map((f) => f.relativePath), + root_folder_id: batch.rootFolderId, + }); + } + + if (unlinkFiles.length > 0) { + await documentsApiService.folderNotifyUnlinked({ + folder_name: batch.folderName, + search_space_id: batch.searchSpaceId, + root_folder_id: batch.rootFolderId, + relative_paths: unlinkFiles.map((f) => f.relativePath), + }); + } + if (electronAPI?.acknowledgeFileEvents && batch.ackIds.length > 0) { await electronAPI.acknowledgeFileEvents(batch.ackIds); } } catch (err) { - console.error("[FolderSync] Failed to trigger batch re-index:", err); + console.error("[FolderSync] Failed to process batch:", err); } } processingRef.current = false; @@ -68,10 +96,10 @@ export function useFolderSync() { if (!pending) return; pendingByFolder.current.delete(folderKey); - for (let i = 0; i < pending.filePaths.length; i += MAX_BATCH_SIZE) { + for (let i = 0; i < pending.files.length; i += MAX_BATCH_SIZE) { queueRef.current.push({ ...pending, - filePaths: pending.filePaths.slice(i, i + MAX_BATCH_SIZE), + files: pending.files.slice(i, i + MAX_BATCH_SIZE), ackIds: i === 0 ? pending.ackIds : [], }); } @@ -83,9 +111,14 @@ export function useFolderSync() { const existing = pendingByFolder.current.get(folderKey); if (existing) { - const pathSet = new Set(existing.filePaths); - pathSet.add(event.fullPath); - existing.filePaths = Array.from(pathSet); + const pathSet = new Set(existing.files.map((f) => f.fullPath)); + if (!pathSet.has(event.fullPath)) { + existing.files.push({ + fullPath: event.fullPath, + relativePath: event.relativePath, + action: event.action, + }); + } if (!existing.ackIds.includes(event.id)) { existing.ackIds.push(event.id); } @@ -95,7 +128,11 @@ export function useFolderSync() { folderName: event.folderName, searchSpaceId: event.searchSpaceId, rootFolderId: event.rootFolderId, - filePaths: [event.fullPath], + files: [{ + fullPath: event.fullPath, + relativePath: event.relativePath, + action: event.action, + }], ackIds: [event.id], }); firstEventTime.current.set(folderKey, Date.now()); diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index 5961522ec..3018cbc34 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -453,6 +453,76 @@ class DocumentsApiService { return baseApiService.post(`/api/v1/documents/folder-index-files`, undefined, { body }); }; + folderMtimeCheck = async (body: { + folder_name: string; + search_space_id: number; + files: { relative_path: string; mtime: number }[]; + }): Promise<{ files_to_upload: string[] }> => { + return baseApiService.post(`/api/v1/documents/folder-mtime-check`, undefined, { body }) as unknown as { files_to_upload: string[] }; + }; + + folderUploadFiles = async ( + files: File[], + metadata: { + folder_name: string; + search_space_id: number; + relative_paths: string[]; + root_folder_id?: number | null; + enable_summary?: boolean; + }, + signal?: AbortSignal, + ): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => { + const formData = new FormData(); + for (const file of files) { + formData.append("files", file); + } + formData.append("folder_name", metadata.folder_name); + formData.append("search_space_id", String(metadata.search_space_id)); + formData.append("relative_paths", JSON.stringify(metadata.relative_paths)); + if (metadata.root_folder_id != null) { + formData.append("root_folder_id", String(metadata.root_folder_id)); + } + formData.append("enable_summary", String(metadata.enable_summary ?? false)); + + const totalSize = files.reduce((acc, f) => acc + f.size, 0); + const timeoutMs = Math.min(Math.max((totalSize / (1024 * 1024)) * 5000, 30_000), 600_000); + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + + if (signal) { + signal.addEventListener("abort", () => controller.abort(), { once: true }); + } + + try { + return await baseApiService.postFormData( + `/api/v1/documents/folder-upload`, + undefined, + { body: formData, signal: controller.signal }, + ) as { message: string; status: string; root_folder_id: number; file_count: number }; + } finally { + clearTimeout(timeoutId); + } + }; + + folderNotifyUnlinked = async (body: { + folder_name: string; + search_space_id: number; + root_folder_id: number | null; + relative_paths: string[]; + }): Promise<{ deleted_count: number }> => { + return baseApiService.post(`/api/v1/documents/folder-unlink`, undefined, { body }) as unknown as { deleted_count: number }; + }; + + folderSyncFinalize = async (body: { + folder_name: string; + search_space_id: number; + root_folder_id: number | null; + all_relative_paths: string[]; + }): Promise<{ deleted_count: number }> => { + return baseApiService.post(`/api/v1/documents/folder-sync-finalize`, undefined, { body }) as unknown as { deleted_count: number }; + }; + getWatchedFolders = async (searchSpaceId: number) => { return baseApiService.get( `/api/v1/documents/watched-folders?search_space_id=${searchSpaceId}`, diff --git a/surfsense_web/lib/folder-sync-upload.ts b/surfsense_web/lib/folder-sync-upload.ts new file mode 100644 index 000000000..ef53c6b29 --- /dev/null +++ b/surfsense_web/lib/folder-sync-upload.ts @@ -0,0 +1,214 @@ +import { documentsApiService } from "@/lib/apis/documents-api.service"; + +const MAX_BATCH_SIZE_BYTES = 20 * 1024 * 1024; // 20 MB +const MAX_BATCH_FILES = 10; +const UPLOAD_CONCURRENCY = 3; + +export interface FolderSyncProgress { + phase: "listing" | "checking" | "uploading" | "finalizing" | "done"; + uploaded: number; + total: number; +} + +export interface FolderSyncParams { + folderPath: string; + folderName: string; + searchSpaceId: number; + excludePatterns: string[]; + fileExtensions: string[]; + enableSummary: boolean; + rootFolderId?: number | null; + onProgress?: (progress: FolderSyncProgress) => void; + signal?: AbortSignal; +} + +function buildBatches( + entries: FolderFileEntry[], +): FolderFileEntry[][] { + const batches: FolderFileEntry[][] = []; + let currentBatch: FolderFileEntry[] = []; + let currentSize = 0; + + for (const entry of entries) { + if (entry.size >= MAX_BATCH_SIZE_BYTES) { + if (currentBatch.length > 0) { + batches.push(currentBatch); + currentBatch = []; + currentSize = 0; + } + batches.push([entry]); + continue; + } + + if ( + currentBatch.length >= MAX_BATCH_FILES || + currentSize + entry.size > MAX_BATCH_SIZE_BYTES + ) { + batches.push(currentBatch); + currentBatch = []; + currentSize = 0; + } + + currentBatch.push(entry); + currentSize += entry.size; + } + + if (currentBatch.length > 0) { + batches.push(currentBatch); + } + + return batches; +} + +async function uploadBatchesWithConcurrency( + batches: FolderFileEntry[][], + params: { + folderName: string; + searchSpaceId: number; + rootFolderId: number | null; + enableSummary: boolean; + signal?: AbortSignal; + onBatchComplete?: (filesInBatch: number) => void; + }, +) { + const api = window.electronAPI; + if (!api) throw new Error("Electron API not available"); + + let batchIdx = 0; + const errors: string[] = []; + + async function processNext(): Promise { + while (true) { + if (params.signal?.aborted) return; + + const idx = batchIdx++; + if (idx >= batches.length) return; + + const batch = batches[idx]; + const fullPaths = batch.map((e) => e.fullPath); + + try { + const fileDataArr = await api.readLocalFiles(fullPaths); + + const files: File[] = fileDataArr.map((fd) => { + const blob = new Blob([fd.data], { type: fd.mimeType || "application/octet-stream" }); + return new File([blob], fd.name, { type: blob.type }); + }); + + await documentsApiService.folderUploadFiles( + files, + { + folder_name: params.folderName, + search_space_id: params.searchSpaceId, + relative_paths: batch.map((e) => e.relativePath), + root_folder_id: params.rootFolderId, + enable_summary: params.enableSummary, + }, + params.signal, + ); + + params.onBatchComplete?.(batch.length); + } catch (err) { + if (params.signal?.aborted) return; + const msg = (err as Error)?.message || "Upload failed"; + errors.push(`Batch ${idx}: ${msg}`); + } + } + } + + const workers = Array.from({ length: Math.min(UPLOAD_CONCURRENCY, batches.length) }, () => processNext()); + await Promise.all(workers); + + if (errors.length > 0 && !params.signal?.aborted) { + console.error("Some batches failed:", errors); + } +} + +/** + * Run a full upload-based folder scan: list files, mtime-check, upload + * changed files in parallel batches, and finalize (delete orphans). + * + * Returns the root_folder_id to pass to addWatchedFolder. + */ +export async function uploadFolderScan(params: FolderSyncParams): Promise { + const api = window.electronAPI; + if (!api) throw new Error("Electron API not available"); + + const { folderPath, folderName, searchSpaceId, excludePatterns, fileExtensions, enableSummary, signal } = params; + let rootFolderId = params.rootFolderId ?? null; + + params.onProgress?.({ phase: "listing", uploaded: 0, total: 0 }); + + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + const allFiles = await api.listFolderFiles({ + path: folderPath, + name: folderName, + excludePatterns, + fileExtensions, + rootFolderId: rootFolderId ?? null, + searchSpaceId, + active: true, + }); + + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + params.onProgress?.({ phase: "checking", uploaded: 0, total: allFiles.length }); + + const mtimeCheckResult = await documentsApiService.folderMtimeCheck({ + folder_name: folderName, + search_space_id: searchSpaceId, + files: allFiles.map((f) => ({ relative_path: f.relativePath, mtime: f.mtimeMs / 1000 })), + }); + + const filesToUpload = mtimeCheckResult.files_to_upload; + const uploadSet = new Set(filesToUpload); + const entriesToUpload = allFiles.filter((f) => uploadSet.has(f.relativePath)); + + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + if (entriesToUpload.length > 0) { + const batches = buildBatches(entriesToUpload); + + let uploaded = 0; + params.onProgress?.({ phase: "uploading", uploaded: 0, total: entriesToUpload.length }); + + await uploadBatchesWithConcurrency(batches, { + folderName, + searchSpaceId, + rootFolderId: rootFolderId ?? null, + enableSummary, + signal, + onBatchComplete: (count) => { + uploaded += count; + params.onProgress?.({ phase: "uploading", uploaded, total: entriesToUpload.length }); + }, + }); + + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + if (!rootFolderId) { + const watchedFolders = await documentsApiService.getWatchedFolders(searchSpaceId); + const folderList = watchedFolders as Array<{ id: number; name: string }> | undefined; + const matched = folderList?.find((f) => f.name === folderName); + if (matched?.id) { + rootFolderId = matched.id; + } + } + } + + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + params.onProgress?.({ phase: "finalizing", uploaded: entriesToUpload.length, total: entriesToUpload.length }); + + await documentsApiService.folderSyncFinalize({ + folder_name: folderName, + search_space_id: searchSpaceId, + root_folder_id: rootFolderId ?? null, + all_relative_paths: allFiles.map((f) => f.relativePath), + }); + + params.onProgress?.({ phase: "done", uploaded: entriesToUpload.length, total: entriesToUpload.length }); + + return rootFolderId; +} diff --git a/surfsense_web/types/window.d.ts b/surfsense_web/types/window.d.ts index eeef51b0b..4373cdaac 100644 --- a/surfsense_web/types/window.d.ts +++ b/surfsense_web/types/window.d.ts @@ -34,6 +34,13 @@ interface LocalFileData { size: number; } +interface FolderFileEntry { + relativePath: string; + fullPath: string; + size: number; + mtimeMs: number; +} + interface ElectronAPI { versions: { electron: string; @@ -82,6 +89,7 @@ interface ElectronAPI { signalRendererReady: () => Promise; getPendingFileEvents: () => Promise; acknowledgeFileEvents: (eventIds: string[]) => Promise<{ acknowledged: number }>; + listFolderFiles: (config: WatchedFolderConfig) => Promise; // Browse files/folders via native dialogs browseFiles: () => Promise; readLocalFiles: (paths: string[]) => Promise; From 6f9f69c3e8c59ef722d71fb4bac805f57277fb28 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:49:36 +0530 Subject: [PATCH 05/18] refactor: remove local folder indexing endpoints and update related API calls --- .../app/routes/documents_routes.py | 160 ------------------ .../lib/apis/documents-api.service.ts | 29 ---- surfsense_web/lib/folder-sync-upload.ts | 24 +-- 3 files changed, 13 insertions(+), 200 deletions(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index c28fddfe0..39458dc5f 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1385,166 +1385,6 @@ async def restore_document_version( } -# ===== Local folder indexing endpoints ===== - - -class FolderIndexRequest(PydanticBaseModel): - folder_path: str - folder_name: str - search_space_id: int - exclude_patterns: list[str] | None = None - file_extensions: list[str] | None = None - root_folder_id: int | None = None - enable_summary: bool = False - - -class FolderIndexFilesRequest(PydanticBaseModel): - folder_path: str - folder_name: str - search_space_id: int - target_file_paths: list[str] - root_folder_id: int | None = None - enable_summary: bool = False - - -@router.post("/documents/folder-index") -async def folder_index( - request: FolderIndexRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Full-scan index of a local folder. Creates the root Folder row synchronously - and dispatches the heavy indexing work to a Celery task. - Returns the root_folder_id so the desktop can persist it. - """ - from app.config import config as app_config - - if not app_config.is_self_hosted(): - raise HTTPException( - status_code=400, - detail="Local folder indexing is only available in self-hosted mode", - ) - - await check_permission( - session, - user, - request.search_space_id, - Permission.DOCUMENTS_CREATE.value, - "You don't have permission to create documents in this search space", - ) - - watched_metadata = { - "watched": True, - "folder_path": request.folder_path, - "exclude_patterns": request.exclude_patterns, - "file_extensions": request.file_extensions, - } - - root_folder_id = request.root_folder_id - if root_folder_id: - existing = ( - await session.execute(select(Folder).where(Folder.id == root_folder_id)) - ).scalar_one_or_none() - if not existing: - root_folder_id = None - else: - existing.folder_metadata = watched_metadata - await session.commit() - - if not root_folder_id: - root_folder = Folder( - name=request.folder_name, - search_space_id=request.search_space_id, - created_by_id=str(user.id), - position="a0", - folder_metadata=watched_metadata, - ) - session.add(root_folder) - await session.flush() - root_folder_id = root_folder.id - await session.commit() - - from app.tasks.celery_tasks.document_tasks import index_local_folder_task - - index_local_folder_task.delay( - search_space_id=request.search_space_id, - user_id=str(user.id), - folder_path=request.folder_path, - folder_name=request.folder_name, - exclude_patterns=request.exclude_patterns, - file_extensions=request.file_extensions, - root_folder_id=root_folder_id, - enable_summary=request.enable_summary, - ) - - return { - "message": "Folder indexing started", - "status": "processing", - "root_folder_id": root_folder_id, - } - - -@router.post("/documents/folder-index-files") -async def folder_index_files( - request: FolderIndexFilesRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Index multiple files within a watched folder (batched chokidar trigger). - Validates that all target_file_paths are under folder_path. - Dispatches a single Celery task that processes them in parallel. - """ - from app.config import config as app_config - - if not app_config.is_self_hosted(): - raise HTTPException( - status_code=400, - detail="Local folder indexing is only available in self-hosted mode", - ) - - if not request.target_file_paths: - raise HTTPException( - status_code=400, detail="target_file_paths must not be empty" - ) - - await check_permission( - session, - user, - request.search_space_id, - Permission.DOCUMENTS_CREATE.value, - "You don't have permission to create documents in this search space", - ) - - from pathlib import Path - - for fp in request.target_file_paths: - try: - Path(fp).relative_to(request.folder_path) - except ValueError as err: - raise HTTPException( - status_code=400, - detail=f"target_file_path {fp} must be inside folder_path", - ) from err - - from app.tasks.celery_tasks.document_tasks import index_local_folder_task - - index_local_folder_task.delay( - search_space_id=request.search_space_id, - user_id=str(user.id), - folder_path=request.folder_path, - folder_name=request.folder_name, - target_file_paths=request.target_file_paths, - root_folder_id=request.root_folder_id, - enable_summary=request.enable_summary, - ) - - return { - "message": f"Batch indexing started for {len(request.target_file_paths)} file(s)", - "status": "processing", - "file_count": len(request.target_file_paths), - } - - # ===== Upload-based local folder indexing endpoints ===== # These work for ALL deployment modes (cloud, self-hosted remote, self-hosted local). # The desktop app reads files locally and uploads them here. diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index 3018cbc34..34a0b6dce 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -424,35 +424,6 @@ class DocumentsApiService { return baseApiService.post(`/api/v1/documents/${documentId}/versions/${versionNumber}/restore`); }; - folderIndex = async ( - searchSpaceId: number, - body: { - folder_path: string; - folder_name: string; - search_space_id: number; - exclude_patterns?: string[]; - file_extensions?: string[]; - root_folder_id?: number; - enable_summary?: boolean; - } - ) => { - return baseApiService.post(`/api/v1/documents/folder-index`, undefined, { body }); - }; - - folderIndexFiles = async ( - searchSpaceId: number, - body: { - folder_path: string; - folder_name: string; - search_space_id: number; - target_file_paths: string[]; - root_folder_id?: number | null; - enable_summary?: boolean; - } - ) => { - return baseApiService.post(`/api/v1/documents/folder-index-files`, undefined, { body }); - }; - folderMtimeCheck = async (body: { folder_name: string; search_space_id: number; diff --git a/surfsense_web/lib/folder-sync-upload.ts b/surfsense_web/lib/folder-sync-upload.ts index ef53c6b29..28f38ced4 100644 --- a/surfsense_web/lib/folder-sync-upload.ts +++ b/surfsense_web/lib/folder-sync-upload.ts @@ -70,11 +70,12 @@ async function uploadBatchesWithConcurrency( signal?: AbortSignal; onBatchComplete?: (filesInBatch: number) => void; }, -) { +): Promise { const api = window.electronAPI; if (!api) throw new Error("Electron API not available"); let batchIdx = 0; + let resolvedRootFolderId = params.rootFolderId; const errors: string[] = []; async function processNext(): Promise { @@ -95,18 +96,22 @@ async function uploadBatchesWithConcurrency( return new File([blob], fd.name, { type: blob.type }); }); - await documentsApiService.folderUploadFiles( + const result = await documentsApiService.folderUploadFiles( files, { folder_name: params.folderName, search_space_id: params.searchSpaceId, relative_paths: batch.map((e) => e.relativePath), - root_folder_id: params.rootFolderId, + root_folder_id: resolvedRootFolderId, enable_summary: params.enableSummary, }, params.signal, ); + if (result.root_folder_id && !resolvedRootFolderId) { + resolvedRootFolderId = result.root_folder_id; + } + params.onBatchComplete?.(batch.length); } catch (err) { if (params.signal?.aborted) return; @@ -122,6 +127,8 @@ async function uploadBatchesWithConcurrency( if (errors.length > 0 && !params.signal?.aborted) { console.error("Some batches failed:", errors); } + + return resolvedRootFolderId; } /** @@ -173,7 +180,7 @@ export async function uploadFolderScan(params: FolderSyncParams): Promise | undefined; - const matched = folderList?.find((f) => f.name === folderName); - if (matched?.id) { - rootFolderId = matched.id; - } + if (uploadedRootId) { + rootFolderId = uploadedRootId; } } From 5eada7bcab264a09e59501dc6170568094266011 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:55:37 +0530 Subject: [PATCH 06/18] fix: update button variant in FolderWatchDialog and refine metadata type in folder.types --- surfsense_web/components/sources/FolderWatchDialog.tsx | 2 +- surfsense_web/contracts/types/folder.types.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_web/components/sources/FolderWatchDialog.tsx b/surfsense_web/components/sources/FolderWatchDialog.tsx index 8ccf12afe..b44f42d77 100644 --- a/surfsense_web/components/sources/FolderWatchDialog.tsx +++ b/surfsense_web/components/sources/FolderWatchDialog.tsx @@ -228,7 +228,7 @@ export function FolderWatchDialog({
{submitting ? ( <> - diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 85c3a9897..b8634bb3b 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -116,48 +116,48 @@ export function DocumentsSidebar({ setFolderWatchOpen(true); }, []); - useEffect(() => { + const refreshWatchedIds = useCallback(async () => { if (!electronAPI?.getWatchedFolders) return; const api = electronAPI; - async function loadWatchedIds() { - const folders = await api.getWatchedFolders(); + const folders = await api.getWatchedFolders(); - if (folders.length === 0) { - try { - const backendFolders = await documentsApiService.getWatchedFolders(searchSpaceId); - for (const bf of backendFolders) { - const meta = bf.metadata as Record | null; - if (!meta?.watched || !meta.folder_path) continue; - await api.addWatchedFolder({ - path: meta.folder_path as string, - name: bf.name, - rootFolderId: bf.id, - searchSpaceId: bf.search_space_id, - excludePatterns: (meta.exclude_patterns as string[]) ?? [], - fileExtensions: (meta.file_extensions as string[] | null) ?? null, - active: true, - }); - } - const recovered = await api.getWatchedFolders(); - const ids = new Set( - recovered.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) - ); - setWatchedFolderIds(ids); - return; - } catch (err) { - console.error("[DocumentsSidebar] Recovery from backend failed:", err); + if (folders.length === 0) { + try { + const backendFolders = await documentsApiService.getWatchedFolders(searchSpaceId); + for (const bf of backendFolders) { + const meta = bf.metadata as Record | null; + if (!meta?.watched || !meta.folder_path) continue; + await api.addWatchedFolder({ + path: meta.folder_path as string, + name: bf.name, + rootFolderId: bf.id, + searchSpaceId: bf.search_space_id, + excludePatterns: (meta.exclude_patterns as string[]) ?? [], + fileExtensions: (meta.file_extensions as string[] | null) ?? null, + active: true, + }); } + const recovered = await api.getWatchedFolders(); + const ids = new Set( + recovered.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) + ); + setWatchedFolderIds(ids); + return; + } catch (err) { + console.error("[DocumentsSidebar] Recovery from backend failed:", err); } - - const ids = new Set( - folders.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) - ); - setWatchedFolderIds(ids); } - loadWatchedIds(); + const ids = new Set( + folders.filter((f) => f.rootFolderId != null).map((f) => f.rootFolderId as number) + ); + setWatchedFolderIds(ids); }, [searchSpaceId, electronAPI]); + + useEffect(() => { + refreshWatchedIds(); + }, [refreshWatchedIds]); const { mutateAsync: deleteDocumentMutation } = useAtomValue(deleteDocumentMutationAtom); const [sidebarDocs, setSidebarDocs] = useAtom(sidebarSelectedDocumentsAtom); @@ -342,8 +342,9 @@ export function DocumentsSidebar({ console.error("[DocumentsSidebar] Failed to clear watched metadata:", err); } toast.success(`Stopped watching: ${matched.name}`); + refreshWatchedIds(); }, - [electronAPI] + [electronAPI, refreshWatchedIds] ); const handleRenameFolder = useCallback(async (folder: FolderDisplay, newName: string) => { @@ -872,6 +873,7 @@ export function DocumentsSidebar({ }} searchSpaceId={searchSpaceId} initialFolder={watchInitialFolder} + onSuccess={refreshWatchedIds} /> )} From b76a6babc1d3f641e0c359c133de3bb45c293752 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:20:41 +0530 Subject: [PATCH 10/18] refactor: update terminology in AllConnectorsTab and enhance styling in FolderTreeView --- .../assistant-ui/connector-popup/tabs/all-connectors-tab.tsx | 4 ++-- surfsense_web/components/documents/FolderTreeView.tsx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx index bbbf6dd57..0e01f3006 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/tabs/all-connectors-tab.tsx @@ -302,12 +302,12 @@ export const AllConnectorsTab: FC = ({ return (
- {/* Document/Files Connectors */} + {/* File Storage Integrations */} {hasDocumentFileConnectors && (

- Document/Files Connectors + File Storage Integrations

diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 47cd17596..65f7fd9a2 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -283,7 +283,7 @@ export function FolderTreeView({ if (treeNodes.length === 0 && folders.length === 0 && documents.length === 0) { return ( -
+

No documents found

Use the upload button or connect a source above From 60eb1e406062cff88a0abd215d53cdea085a4e30 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:28:51 +0530 Subject: [PATCH 11/18] feat: implement raw file hash computation to optimize content extraction during local folder indexing --- .../local_folder_indexer.py | 94 +++++++++++++++++-- 1 file changed, 88 insertions(+), 6 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 7b433cf62..5c4878a04 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -178,6 +178,22 @@ def _content_hash(content: str, search_space_id: int) -> str: return hashlib.sha256(f"{search_space_id}:{content}".encode()).hexdigest() +def _compute_raw_file_hash(file_path: str) -> str: + """SHA-256 hash of the raw file bytes. + + Much cheaper than ETL/OCR extraction -- only performs sequential I/O. + Used as a pre-filter to skip expensive content extraction when the + underlying file hasn't changed at all. + """ + import hashlib + + h = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + return h.hexdigest() + + async def _compute_file_content_hash( file_path: str, filename: str, @@ -630,6 +646,24 @@ async def index_local_folder( skipped_count += 1 continue + raw_hash = await asyncio.to_thread( + _compute_raw_file_hash, file_path_abs + ) + + stored_raw_hash = ( + existing_document.document_metadata or {} + ).get("raw_file_hash") + if stored_raw_hash and stored_raw_hash == raw_hash: + meta = dict(existing_document.document_metadata or {}) + meta["mtime"] = current_mtime + existing_document.document_metadata = meta + if not DocumentStatus.is_state( + existing_document.status, DocumentStatus.READY + ): + existing_document.status = DocumentStatus.ready() + skipped_count += 1 + continue + try: estimated_pages = await _check_page_limit_or_skip( page_limit_service, user_id, file_path_abs @@ -653,6 +687,7 @@ async def index_local_folder( if existing_document.content_hash == content_hash: meta = dict(existing_document.document_metadata or {}) meta["mtime"] = current_mtime + meta["raw_file_hash"] = raw_hash existing_document.document_metadata = meta if not DocumentStatus.is_state( existing_document.status, DocumentStatus.READY @@ -687,6 +722,10 @@ async def index_local_folder( skipped_count += 1 continue + raw_hash = await asyncio.to_thread( + _compute_raw_file_hash, file_path_abs + ) + doc = _build_connector_doc( title=file_info["name"], content=content, @@ -702,6 +741,7 @@ async def index_local_folder( "mtime": file_info["modified_at"].timestamp(), "estimated_pages": estimated_pages, "content_length": len(content), + "raw_file_hash": raw_hash, } except Exception as e: @@ -795,6 +835,7 @@ async def index_local_folder( doc_meta = dict(result.document_metadata or {}) doc_meta["mtime"] = mtime_info.get("mtime") + doc_meta["raw_file_hash"] = mtime_info.get("raw_file_hash") result.document_metadata = doc_meta est = mtime_info.get("estimated_pages", 1) @@ -988,6 +1029,26 @@ async def _index_single_file( DocumentType.LOCAL_FOLDER_FILE.value, unique_id, search_space_id ) + raw_hash = await asyncio.to_thread(_compute_raw_file_hash, str(full_path)) + + existing = await check_document_by_unique_identifier(session, uid_hash) + + if existing: + stored_raw_hash = (existing.document_metadata or {}).get( + "raw_file_hash" + ) + if stored_raw_hash and stored_raw_hash == raw_hash: + mtime = full_path.stat().st_mtime + meta = dict(existing.document_metadata or {}) + meta["mtime"] = mtime + existing.document_metadata = meta + if not DocumentStatus.is_state( + existing.status, DocumentStatus.READY + ): + existing.status = DocumentStatus.ready() + await session.commit() + return 0, 0, None + page_limit_service = PageLimitService(session) try: estimated_pages = await _check_page_limit_or_skip( @@ -1006,13 +1067,12 @@ async def _index_single_file( if not content.strip(): return 0, 1, None - existing = await check_document_by_unique_identifier(session, uid_hash) - if existing: if existing.content_hash == content_hash: mtime = full_path.stat().st_mtime meta = dict(existing.document_metadata or {}) meta["mtime"] = mtime + meta["raw_file_hash"] = raw_hash existing.document_metadata = meta await session.commit() return 0, 1, None @@ -1055,6 +1115,7 @@ async def _index_single_file( await session.refresh(db_doc) doc_meta = dict(db_doc.document_metadata or {}) doc_meta["mtime"] = mtime + doc_meta["raw_file_hash"] = raw_hash db_doc.document_metadata = doc_meta await session.commit() @@ -1236,6 +1297,29 @@ async def index_uploaded_files( search_space_id, ) + raw_hash = await asyncio.to_thread( + _compute_raw_file_hash, temp_path + ) + + existing = await check_document_by_unique_identifier( + session, uid_hash + ) + + if existing: + stored_raw_hash = (existing.document_metadata or {}).get( + "raw_file_hash" + ) + if stored_raw_hash and stored_raw_hash == raw_hash: + meta = dict(existing.document_metadata or {}) + meta["mtime"] = datetime.now(UTC).timestamp() + existing.document_metadata = meta + if not DocumentStatus.is_state( + existing.status, DocumentStatus.READY + ): + existing.status = DocumentStatus.ready() + await session.commit() + continue + try: estimated_pages = await _check_page_limit_or_skip( page_limit_service, user_id, temp_path @@ -1259,14 +1343,11 @@ async def index_uploaded_files( failed_count += 1 continue - existing = await check_document_by_unique_identifier( - session, uid_hash - ) - if existing: if existing.content_hash == content_hash: meta = dict(existing.document_metadata or {}) meta["mtime"] = datetime.now(UTC).timestamp() + meta["raw_file_hash"] = raw_hash existing.document_metadata = meta if not DocumentStatus.is_state( existing.status, DocumentStatus.READY @@ -1312,6 +1393,7 @@ async def index_uploaded_files( await session.refresh(db_doc) doc_meta = dict(db_doc.document_metadata or {}) doc_meta["mtime"] = datetime.now(UTC).timestamp() + doc_meta["raw_file_hash"] = raw_hash db_doc.document_metadata = doc_meta await session.commit() From ae98f647608c63455f0b5d1c6a1696187f1ed08a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:48:40 +0530 Subject: [PATCH 12/18] feat: enhance folder indexing with metadata management and improve folder structure handling in UI components --- .../connector_indexers/local_folder_indexer.py | 18 ++++++++++++++++++ .../components/documents/FolderNode.tsx | 1 + .../components/documents/FolderTreeView.tsx | 13 ++++++++++++- .../layout/ui/sidebar/DocumentsSidebar.tsx | 1 + .../components/sources/FolderWatchDialog.tsx | 2 +- surfsense_web/zero/schema/folders.ts | 3 ++- 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 5c4878a04..c23fe55c3 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -1276,6 +1276,13 @@ async def index_uploaded_files( ) await session.flush() + root_folder = await session.get(Folder, root_folder_id) + if root_folder: + meta = dict(root_folder.folder_metadata or {}) + meta["indexing_in_progress"] = True + root_folder.folder_metadata = meta + await session.commit() + page_limit_service = PageLimitService(session) pipeline = IndexingPipelineService(session) llm = await get_user_long_context_llm(session, user_id, search_space_id) @@ -1454,3 +1461,14 @@ async def index_uploaded_files( log_entry, f"Error: {e}", "Unexpected error", {} ) return 0, 0, str(e) + + finally: + try: + root_folder = await session.get(Folder, root_folder_id) + if root_folder: + meta = dict(root_folder.folder_metadata or {}) + meta.pop("indexing_in_progress", None) + root_folder.folder_metadata = meta + await session.commit() + except Exception: + pass diff --git a/surfsense_web/components/documents/FolderNode.tsx b/surfsense_web/components/documents/FolderNode.tsx index 4b0103ce5..7f75f8abf 100644 --- a/surfsense_web/components/documents/FolderNode.tsx +++ b/surfsense_web/components/documents/FolderNode.tsx @@ -49,6 +49,7 @@ export interface FolderDisplay { position: string; parentId: number | null; searchSpaceId: number; + metadata?: Record | null; } interface FolderNodeProps { diff --git a/surfsense_web/components/documents/FolderTreeView.tsx b/surfsense_web/components/documents/FolderTreeView.tsx index 65f7fd9a2..6eb53da50 100644 --- a/surfsense_web/components/documents/FolderTreeView.tsx +++ b/surfsense_web/components/documents/FolderTreeView.tsx @@ -168,6 +168,12 @@ export function FolderTreeView({ return states; }, [folders, docsByFolder, foldersByParent, mentionedDocIds]); + const folderMap = useMemo(() => { + const map: Record = {}; + for (const f of folders) map[f.id] = f; + return map; + }, [folders]); + const folderProcessingStates = useMemo(() => { const states: Record = {}; @@ -178,6 +184,11 @@ export function FolderTreeView({ ); let hasFailed = directDocs.some((d) => d.status?.state === "failed"); + const folder = folderMap[folderId]; + if (folder?.metadata?.indexing_in_progress) { + hasProcessing = true; + } + for (const child of foldersByParent[folderId] ?? []) { const sub = compute(child.id); hasProcessing = hasProcessing || sub.hasProcessing; @@ -195,7 +206,7 @@ export function FolderTreeView({ if (states[f.id] === undefined) compute(f.id); } return states; - }, [folders, docsByFolder, foldersByParent]); + }, [folders, docsByFolder, foldersByParent, folderMap]); function renderLevel(parentId: number | null, depth: number): React.ReactNode[] { const key = parentId ?? "root"; diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index b8634bb3b..7679faae5 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -194,6 +194,7 @@ export function DocumentsSidebar({ position: f.position, parentId: f.parentId ?? null, searchSpaceId: f.searchSpaceId, + metadata: f.metadata as Record | null | undefined, })), [zeroFolders] ); diff --git a/surfsense_web/components/sources/FolderWatchDialog.tsx b/surfsense_web/components/sources/FolderWatchDialog.tsx index b44f42d77..bb3972a09 100644 --- a/surfsense_web/components/sources/FolderWatchDialog.tsx +++ b/surfsense_web/components/sources/FolderWatchDialog.tsx @@ -66,7 +66,7 @@ export function FolderWatchDialog({ const folderPath = await api.selectFolder(); if (!folderPath) return; - const folderName = folderPath.split("/").pop() || folderPath.split("\\").pop() || folderPath; + const folderName = folderPath.split(/[/\\]/).pop() || folderPath; setSelectedFolder({ path: folderPath, name: folderName }); }, []); diff --git a/surfsense_web/zero/schema/folders.ts b/surfsense_web/zero/schema/folders.ts index 2313506ab..c5b192942 100644 --- a/surfsense_web/zero/schema/folders.ts +++ b/surfsense_web/zero/schema/folders.ts @@ -1,4 +1,4 @@ -import { number, string, table } from "@rocicorp/zero"; +import { json, number, string, table } from "@rocicorp/zero"; export const folderTable = table("folders") .columns({ @@ -10,5 +10,6 @@ export const folderTable = table("folders") createdById: string().optional().from("created_by_id"), createdAt: number().from("created_at"), updatedAt: number().from("updated_at"), + metadata: json>().optional().from("metadata"), }) .primaryKey("id"); From cab0d1bdfee27a3d2e9586cc9b97a525c8e8e20b Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 17:10:22 +0530 Subject: [PATCH 13/18] feat: enhance folder synchronization by integrating subtree ID retrieval and optimizing empty folder cleanup process --- .../app/routes/documents_routes.py | 57 +++++++------------ .../local_folder_indexer.py | 19 +++---- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index 39458dc5f..e71cef7e4 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -1675,6 +1675,7 @@ async def folder_sync_finalize( gets deleted. """ from app.indexing_pipeline.document_hashing import compute_identifier_hash + from app.services.folder_service import get_folder_subtree_ids from app.tasks.connector_indexers.local_folder_indexer import ( _cleanup_empty_folders, ) @@ -1687,6 +1688,11 @@ async def folder_sync_finalize( "You don't have permission to delete documents in this search space", ) + if not request.root_folder_id: + return {"deleted_count": 0} + + subtree_ids = await get_folder_subtree_ids(session, request.root_folder_id) + seen_hashes: set[str] = set() for rel_path in request.all_relative_paths: unique_id = f"{request.folder_name}:{rel_path}" @@ -1697,32 +1703,13 @@ async def folder_sync_finalize( ) seen_hashes.add(uid_hash) - all_root_folder_ids: set[int] = set() - if request.root_folder_id: - all_root_folder_ids.add(request.root_folder_id) - - all_db_folders = ( - ( - await session.execute( - select(Folder.id).where( - Folder.search_space_id == request.search_space_id, - ) - ) - ) - .scalars() - .all() - ) - all_root_folder_ids.update(all_db_folders) - all_folder_docs = ( ( await session.execute( select(Document).where( Document.document_type == DocumentType.LOCAL_FOLDER_FILE, Document.search_space_id == request.search_space_id, - Document.folder_id.in_(list(all_root_folder_ids)) - if all_root_folder_ids - else True, + Document.folder_id.in_(subtree_ids), ) ) ) @@ -1738,24 +1725,22 @@ async def folder_sync_finalize( await session.flush() - if request.root_folder_id: - existing_dirs: set[str] = set() - for rel_path in request.all_relative_paths: - parent = str(os.path.dirname(rel_path)) - if parent and parent != ".": - existing_dirs.add(parent) + existing_dirs: set[str] = set() + for rel_path in request.all_relative_paths: + parent = str(os.path.dirname(rel_path)) + if parent and parent != ".": + existing_dirs.add(parent) - folder_mapping: dict[str, int] = {} - if request.root_folder_id: - folder_mapping[""] = request.root_folder_id + folder_mapping: dict[str, int] = {"": request.root_folder_id} - await _cleanup_empty_folders( - session, - request.root_folder_id, - request.search_space_id, - existing_dirs, - folder_mapping, - ) + await _cleanup_empty_folders( + session, + request.root_folder_id, + request.search_space_id, + existing_dirs, + folder_mapping, + subtree_ids=subtree_ids, + ) await session.commit() return {"deleted_count": deleted_count} diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index c23fe55c3..5cac13b1b 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -387,24 +387,21 @@ async def _cleanup_empty_folders( search_space_id: int, existing_dirs_on_disk: set[str], folder_mapping: dict[str, int], + subtree_ids: list[int] | None = None, ) -> None: """Delete Folder rows that are empty (no docs, no children) and no longer on disk.""" from sqlalchemy import delete as sa_delete id_to_rel: dict[int, str] = {fid: rel for rel, fid in folder_mapping.items() if rel} - all_folders = ( - ( - await session.execute( - select(Folder).where( - Folder.search_space_id == search_space_id, - Folder.id != root_folder_id, - ) - ) - ) - .scalars() - .all() + query = select(Folder).where( + Folder.search_space_id == search_space_id, + Folder.id != root_folder_id, ) + if subtree_ids is not None: + query = query.where(Folder.id.in_(subtree_ids)) + + all_folders = (await session.execute(query)).scalars().all() candidates: list[Folder] = [] for folder in all_folders: From f3aa514240335bf9f0c3009cda3e2bfc095f6c69 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 17:25:18 +0530 Subject: [PATCH 14/18] feat: integrate subtree ID retrieval in local folder cleanup process and enhance UI component styling for folder selection --- .../app/tasks/connector_indexers/local_folder_indexer.py | 6 +++++- surfsense_web/components/sources/FolderWatchDialog.tsx | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 5cac13b1b..1d890c8d3 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -861,8 +861,12 @@ async def index_local_folder( root_fid = folder_mapping.get("") if root_fid: + from app.services.folder_service import get_folder_subtree_ids + + subtree_ids = await get_folder_subtree_ids(session, root_fid) await _cleanup_empty_folders( - session, root_fid, search_space_id, existing_dirs, folder_mapping + session, root_fid, search_space_id, existing_dirs, folder_mapping, + subtree_ids=subtree_ids, ) try: diff --git a/surfsense_web/components/sources/FolderWatchDialog.tsx b/surfsense_web/components/sources/FolderWatchDialog.tsx index bb3972a09..f6814bcfb 100644 --- a/surfsense_web/components/sources/FolderWatchDialog.tsx +++ b/surfsense_web/components/sources/FolderWatchDialog.tsx @@ -170,7 +170,7 @@ export function FolderWatchDialog({ Select a folder to sync and watch for changes. -

+
{selectedFolder ? (
From a8b83dcf3f52346fc9297d5f834e04969216fe4a Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 17:48:50 +0530 Subject: [PATCH 15/18] feat: add folder_id support in ConnectorDocument and indexing pipeline for improved document organization --- .../indexing_pipeline/connector_document.py | 1 + .../indexing_pipeline_service.py | 5 ++ .../local_folder_indexer.py | 59 ++++++------------- 3 files changed, 25 insertions(+), 40 deletions(-) diff --git a/surfsense_backend/app/indexing_pipeline/connector_document.py b/surfsense_backend/app/indexing_pipeline/connector_document.py index 019efe287..4f5d6e2e0 100644 --- a/surfsense_backend/app/indexing_pipeline/connector_document.py +++ b/surfsense_backend/app/indexing_pipeline/connector_document.py @@ -17,6 +17,7 @@ class ConnectorDocument(BaseModel): metadata: dict = {} connector_id: int | None = None created_by_id: str + folder_id: int | None = None @field_validator("title", "source_markdown", "unique_id", "created_by_id") @classmethod diff --git a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py index 0fa4006f5..22c552e5c 100644 --- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py +++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py @@ -268,6 +268,8 @@ class IndexingPipelineService: ): existing.status = DocumentStatus.pending() existing.updated_at = datetime.now(UTC) + if connector_doc.folder_id is not None: + existing.folder_id = connector_doc.folder_id documents.append(existing) log_document_requeued(ctx) continue @@ -294,6 +296,8 @@ class IndexingPipelineService: existing.document_metadata = connector_doc.metadata existing.updated_at = datetime.now(UTC) existing.status = DocumentStatus.pending() + if connector_doc.folder_id is not None: + existing.folder_id = connector_doc.folder_id documents.append(existing) log_document_updated(ctx) continue @@ -317,6 +321,7 @@ class IndexingPipelineService: created_by_id=connector_doc.created_by_id, updated_at=datetime.now(UTC), status=DocumentStatus.pending(), + folder_id=connector_doc.folder_id, ) self.session.add(document) documents.append(document) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 1d890c8d3..3360cd343 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -790,29 +790,18 @@ async def index_local_folder( compute_unique_identifier_hash, ) - pipeline = IndexingPipelineService(session) - doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs} - documents = await pipeline.prepare_for_indexing(connector_docs) - - # Assign folder_id immediately so docs appear in the correct - # folder while still pending/processing (visible via Zero sync). - for document in documents: - cd = doc_map.get(document.unique_identifier_hash) - if cd is None: - continue + for cd in connector_docs: rel_path = (cd.metadata or {}).get("file_path", "") parent_dir = str(Path(rel_path).parent) if rel_path else "" if parent_dir == ".": parent_dir = "" - document.folder_id = folder_mapping.get( + cd.folder_id = folder_mapping.get( parent_dir, folder_mapping.get("") ) - try: - await session.commit() - except IntegrityError: - await session.rollback() - for document in documents: - await session.refresh(document) + + pipeline = IndexingPipelineService(session) + doc_map = {compute_unique_identifier_hash(cd): cd for cd in connector_docs} + documents = await pipeline.prepare_for_indexing(connector_docs) llm = await get_user_long_context_llm(session, user_id, search_space_id) @@ -1092,6 +1081,11 @@ async def _index_single_file( enable_summary=enable_summary, ) + if root_folder_id: + connector_doc.folder_id = await _resolve_folder_for_file( + session, rel_path, root_folder_id, search_space_id, user_id + ) + pipeline = IndexingPipelineService(session) llm = await get_user_long_context_llm(session, user_id, search_space_id) documents = await pipeline.prepare_for_indexing([connector_doc]) @@ -1101,16 +1095,6 @@ async def _index_single_file( db_doc = documents[0] - if root_folder_id: - try: - db_doc.folder_id = await _resolve_folder_for_file( - session, rel_path, root_folder_id, search_space_id, user_id - ) - await session.commit() - except IntegrityError: - await session.rollback() - await session.refresh(db_doc) - await pipeline.index(db_doc, connector_doc, llm) await session.refresh(db_doc) @@ -1376,6 +1360,14 @@ async def index_uploaded_files( enable_summary=enable_summary, ) + connector_doc.folder_id = await _resolve_folder_for_file( + session, + relative_path, + root_folder_id, + search_space_id, + user_id, + ) + documents = await pipeline.prepare_for_indexing([connector_doc]) if not documents: failed_count += 1 @@ -1383,19 +1375,6 @@ async def index_uploaded_files( db_doc = documents[0] - try: - db_doc.folder_id = await _resolve_folder_for_file( - session, - relative_path, - root_folder_id, - search_space_id, - user_id, - ) - await session.commit() - except IntegrityError: - await session.rollback() - await session.refresh(db_doc) - await pipeline.index(db_doc, connector_doc, llm) await session.refresh(db_doc) From 37c52ce7eaaf78dc9390d84a837fae1cdadc6bab Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 8 Apr 2026 18:01:55 +0530 Subject: [PATCH 16/18] feat: implement indexing progress management in local folder indexing process and enhance related test coverage --- .../local_folder_indexer.py | 113 +++++++++------- .../test_local_folder_pipeline.py | 128 +++++++++++++++++- .../assistant-ui/tooltip-icon-button.tsx | 5 +- .../components/sources/FolderWatchDialog.tsx | 2 +- 4 files changed, 198 insertions(+), 50 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py index 3360cd343..8805558bd 100644 --- a/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/local_folder_indexer.py @@ -344,6 +344,27 @@ async def _resolve_folder_for_file( return current_parent_id +async def _set_indexing_flag(session: AsyncSession, folder_id: int) -> None: + folder = await session.get(Folder, folder_id) + if folder: + meta = dict(folder.folder_metadata or {}) + meta["indexing_in_progress"] = True + folder.folder_metadata = meta + await session.commit() + + +async def _clear_indexing_flag(session: AsyncSession, folder_id: int) -> None: + try: + folder = await session.get(Folder, folder_id) + if folder: + meta = dict(folder.folder_metadata or {}) + meta.pop("indexing_in_progress", None) + folder.folder_metadata = meta + await session.commit() + except Exception: + pass + + async def _cleanup_empty_folder_chain( session: AsyncSession, folder_id: int, @@ -531,44 +552,50 @@ async def index_local_folder( # BATCH MODE (1..N files) # ==================================================================== if target_file_paths: - if len(target_file_paths) == 1: - indexed, skipped, err = await _index_single_file( - session=session, + if root_folder_id: + await _set_indexing_flag(session, root_folder_id) + try: + if len(target_file_paths) == 1: + indexed, skipped, err = await _index_single_file( + session=session, + search_space_id=search_space_id, + user_id=user_id, + folder_path=folder_path, + folder_name=folder_name, + target_file_path=target_file_paths[0], + enable_summary=enable_summary, + root_folder_id=root_folder_id, + task_logger=task_logger, + log_entry=log_entry, + ) + return indexed, skipped, root_folder_id, err + + indexed, failed, err = await _index_batch_files( search_space_id=search_space_id, user_id=user_id, folder_path=folder_path, folder_name=folder_name, - target_file_path=target_file_paths[0], + target_file_paths=target_file_paths, enable_summary=enable_summary, root_folder_id=root_folder_id, - task_logger=task_logger, - log_entry=log_entry, + on_progress_callback=on_heartbeat_callback, ) - return indexed, skipped, root_folder_id, err - - indexed, failed, err = await _index_batch_files( - search_space_id=search_space_id, - user_id=user_id, - folder_path=folder_path, - folder_name=folder_name, - target_file_paths=target_file_paths, - enable_summary=enable_summary, - root_folder_id=root_folder_id, - on_progress_callback=on_heartbeat_callback, - ) - if err: - await task_logger.log_task_success( - log_entry, - f"Batch indexing: {indexed} indexed, {failed} failed", - {"indexed": indexed, "failed": failed}, - ) - else: - await task_logger.log_task_success( - log_entry, - f"Batch indexing complete: {indexed} indexed", - {"indexed": indexed, "failed": failed}, - ) - return indexed, failed, root_folder_id, err + if err: + await task_logger.log_task_success( + log_entry, + f"Batch indexing: {indexed} indexed, {failed} failed", + {"indexed": indexed, "failed": failed}, + ) + else: + await task_logger.log_task_success( + log_entry, + f"Batch indexing complete: {indexed} indexed", + {"indexed": indexed, "failed": failed}, + ) + return indexed, failed, root_folder_id, err + finally: + if root_folder_id: + await _clear_indexing_flag(session, root_folder_id) # ==================================================================== # FULL-SCAN MODE @@ -588,6 +615,7 @@ async def index_local_folder( exclude_patterns=exclude_patterns, ) await session.flush() + await _set_indexing_flag(session, root_folder_id) try: files = scan_folder(folder_path, file_extensions, exclude_patterns) @@ -595,6 +623,7 @@ async def index_local_folder( await task_logger.log_task_failure( log_entry, f"Failed to scan folder: {e}", "Scan error", {} ) + await _clear_indexing_flag(session, root_folder_id) return 0, 0, root_folder_id, f"Failed to scan folder: {e}" logger.info(f"Found {len(files)} files in folder") @@ -882,6 +911,7 @@ async def index_local_folder( }, ) + await _clear_indexing_flag(session, root_folder_id) return indexed_count, skipped_count, root_folder_id, warning_message except SQLAlchemyError as e: @@ -890,6 +920,8 @@ async def index_local_folder( await task_logger.log_task_failure( log_entry, f"DB error: {e}", "Database error", {} ) + if root_folder_id: + await _clear_indexing_flag(session, root_folder_id) return 0, 0, root_folder_id, f"Database error: {e}" except Exception as e: @@ -897,6 +929,8 @@ async def index_local_folder( await task_logger.log_task_failure( log_entry, f"Error: {e}", "Unexpected error", {} ) + if root_folder_id: + await _clear_indexing_flag(session, root_folder_id) return 0, 0, root_folder_id, str(e) @@ -1261,12 +1295,7 @@ async def index_uploaded_files( ) await session.flush() - root_folder = await session.get(Folder, root_folder_id) - if root_folder: - meta = dict(root_folder.folder_metadata or {}) - meta["indexing_in_progress"] = True - root_folder.folder_metadata = meta - await session.commit() + await _set_indexing_flag(session, root_folder_id) page_limit_service = PageLimitService(session) pipeline = IndexingPipelineService(session) @@ -1443,12 +1472,4 @@ async def index_uploaded_files( return 0, 0, str(e) finally: - try: - root_folder = await session.get(Folder, root_folder_id) - if root_folder: - meta = dict(root_folder.folder_metadata or {}) - meta.pop("indexing_in_progress", None) - root_folder.folder_metadata = meta - await session.commit() - except Exception: - pass + await _clear_indexing_flag(session, root_folder_id) diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py index 000f43aa8..1508fb26f 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_local_folder_pipeline.py @@ -1,4 +1,4 @@ -"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1), Tier 6 (B1-B2).""" +"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1), Tier 6 (B1-B2), Tier 7 (IP1-IP3).""" import os from contextlib import asynccontextmanager @@ -1178,3 +1178,129 @@ class TestPageLimits: await db_session.refresh(db_user) assert db_user.pages_used > 0 assert db_user.pages_used <= db_user.pages_limit + 1 + + +# ==================================================================== +# Tier 7: Indexing Progress Flag (IP1-IP3) +# ==================================================================== + + +class TestIndexingProgressFlag: + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_ip1_full_scan_clears_flag( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """IP1: Full-scan mode clears indexing_in_progress after completion.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "note.md").write_text("# Hello\n\nContent.") + + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + assert root_folder_id is not None + root_folder = ( + await db_session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one() + meta = root_folder.folder_metadata or {} + assert "indexing_in_progress" not in meta + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_ip2_single_file_clears_flag( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """IP2: Single-file (Chokidar) mode clears indexing_in_progress after completion.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "root.md").write_text("root") + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + + (tmp_path / "new.md").write_text("new file content") + + await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + target_file_paths=[str(tmp_path / "new.md")], + root_folder_id=root_folder_id, + ) + + root_folder = ( + await db_session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one() + meta = root_folder.folder_metadata or {} + assert "indexing_in_progress" not in meta + + @pytest.mark.usefixtures(*UNIFIED_FIXTURES) + async def test_ip3_flag_set_during_indexing( + self, + db_session: AsyncSession, + db_user: User, + db_search_space: SearchSpace, + tmp_path: Path, + ): + """IP3: indexing_in_progress is True on the root folder while indexing is running.""" + from app.tasks.connector_indexers.local_folder_indexer import index_local_folder + + (tmp_path / "note.md").write_text("# Check flag\n\nDuring indexing.") + + from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService + + original_index = IndexingPipelineService.index + flag_observed = [] + + async def patched_index(self_pipe, document, connector_doc, llm): + folder = ( + await db_session.execute( + select(Folder).where( + Folder.search_space_id == db_search_space.id, + Folder.parent_id.is_(None), + ) + ) + ).scalar_one_or_none() + if folder: + meta = folder.folder_metadata or {} + flag_observed.append(meta.get("indexing_in_progress", False)) + return await original_index(self_pipe, document, connector_doc, llm) + + IndexingPipelineService.index = patched_index + try: + _, _, root_folder_id, _ = await index_local_folder( + session=db_session, + search_space_id=db_search_space.id, + user_id=str(db_user.id), + folder_path=str(tmp_path), + folder_name="test-folder", + ) + finally: + IndexingPipelineService.index = original_index + + assert len(flag_observed) > 0, "index() should have been called at least once" + assert all(flag_observed), "indexing_in_progress should be True during indexing" + + root_folder = ( + await db_session.execute(select(Folder).where(Folder.id == root_folder_id)) + ).scalar_one() + meta = root_folder.folder_metadata or {} + assert "indexing_in_progress" not in meta diff --git a/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx b/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx index 3db00e990..f003c55c0 100644 --- a/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx +++ b/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx @@ -1,7 +1,7 @@ "use client"; import { Slottable } from "@radix-ui/react-slot"; -import { type ComponentPropsWithRef, forwardRef, type ReactNode } from "react"; +import { type ComponentPropsWithRef, forwardRef, type ReactNode, useState } from "react"; import { Button } from "@/components/ui/button"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { useMediaQuery } from "@/hooks/use-media-query"; @@ -17,9 +17,10 @@ export const TooltipIconButton = forwardRef { const isTouchDevice = useMediaQuery("(pointer: coarse)"); const suppressTooltip = disableTooltip || isTouchDevice; + const [tooltipOpen, setTooltipOpen] = useState(false); return ( - + @@ -225,7 +225,7 @@ export function FolderWatchDialog({
)} -
+
{submitting ? ( <>
@@ -68,10 +68,7 @@ function UnlimitedSkeleton({ className }: { className?: string }) { ]; return ( -
+
{items.map((item, index) => ( {item.icon} - - {item.label} - + {item.label}
{item.notebookLm} @@ -125,10 +120,7 @@ function LLMFlexibilitySkeleton({ className }: { className?: string }) { return (
-

- {model.name} -

-

- {model.provider} -

+

{model.name}

+

{model.provider}

{selected === index && ( -
+
))} @@ -295,9 +275,7 @@ function MultiplayerSkeleton({ className }: { className?: string }) {
{collaborator.name[0]}
- - {collaborator.name} - + {collaborator.name} {collaborator.role} @@ -321,9 +299,7 @@ function FeatureCard({
{skeleton}
-

- {title} -

+

{title}

{description}

@@ -408,9 +384,7 @@ function ComparisonStrip() { transition={{ duration: 0.3, delay: 0.15 + index * 0.06 }} >
- - {row.feature} - + {row.feature} {typeof row.notebookLm === "boolean" ? ( row.notebookLm ? ( @@ -419,9 +393,7 @@ function ComparisonStrip() { ) ) : ( - - {row.notebookLm} - + {row.notebookLm} )} @@ -436,9 +408,7 @@ function ComparisonStrip() { )}
- {index !== comparisonRows.length - 1 && ( - - )} + {index !== comparisonRows.length - 1 && } ))} diff --git a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx index 2fca68c1e..346e3fa9e 100644 --- a/surfsense_web/components/layout/ui/shell/LayoutShell.tsx +++ b/surfsense_web/components/layout/ui/shell/LayoutShell.tsx @@ -410,7 +410,7 @@ export function LayoutShell({ pageUsage={pageUsage} theme={theme} setTheme={setTheme} - className={cn( + className={cn( "flex shrink-0 transition-[border-radius] duration-200", anySlideOutOpen ? "rounded-l-xl delay-0" : "rounded-xl delay-150" )} diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index 7679faae5..8b3a119ae 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -23,9 +23,11 @@ import { FolderPickerDialog } from "@/components/documents/FolderPickerDialog"; import { FolderTreeView } from "@/components/documents/FolderTreeView"; import { VersionHistoryDialog } from "@/components/documents/version-history"; import { EXPORT_FILE_EXTENSIONS } from "@/components/shared/ExportMenuItems"; -import { DEFAULT_EXCLUDE_PATTERNS, FolderWatchDialog, type SelectedFolder } from "@/components/sources/FolderWatchDialog"; -import { uploadFolderScan } from "@/lib/folder-sync-upload"; -import { getSupportedExtensionsSet } from "@/lib/supported-extensions"; +import { + DEFAULT_EXCLUDE_PATTERNS, + FolderWatchDialog, + type SelectedFolder, +} from "@/components/sources/FolderWatchDialog"; import { AlertDialog, AlertDialogAction, @@ -48,6 +50,8 @@ import { useElectronAPI } from "@/hooks/use-platform"; import { documentsApiService } from "@/lib/apis/documents-api.service"; import { foldersApiService } from "@/lib/apis/folders-api.service"; import { authenticatedFetch } from "@/lib/auth-utils"; +import { uploadFolderScan } from "@/lib/folder-sync-upload"; +import { getSupportedExtensionsSet } from "@/lib/supported-extensions"; import { queries } from "@/zero/queries/index"; import { SidebarSlideOutPanel } from "./SidebarSlideOutPanel"; diff --git a/surfsense_web/components/layout/ui/sidebar/SidebarSlideOutPanel.tsx b/surfsense_web/components/layout/ui/sidebar/SidebarSlideOutPanel.tsx index 79d27a1ac..5195082cd 100644 --- a/surfsense_web/components/layout/ui/sidebar/SidebarSlideOutPanel.tsx +++ b/surfsense_web/components/layout/ui/sidebar/SidebarSlideOutPanel.tsx @@ -90,14 +90,14 @@ export function SidebarSlideOutPanel({ /> {/* Panel extending from sidebar's right edge, flush with the wrapper border */} - +
import("@/components/tool-ui/video-presentation").then((m) => ({ default: m.GenerateVideoPresentationToolUI })), + () => + import("@/components/tool-ui/video-presentation").then((m) => ({ + default: m.GenerateVideoPresentationToolUI, + })), { ssr: false } ); diff --git a/surfsense_web/components/settings/search-space-settings-dialog.tsx b/surfsense_web/components/settings/search-space-settings-dialog.tsx index dc0627305..34d28eb2a 100644 --- a/surfsense_web/components/settings/search-space-settings-dialog.tsx +++ b/surfsense_web/components/settings/search-space-settings-dialog.tsx @@ -1,43 +1,62 @@ "use client"; -import dynamic from "next/dynamic"; import { useAtom } from "jotai"; import { Bot, Brain, Eye, FileText, Globe, ImageIcon, MessageSquare, Shield } from "lucide-react"; +import dynamic from "next/dynamic"; import { useTranslations } from "next-intl"; import type React from "react"; import { searchSpaceSettingsDialogAtom } from "@/atoms/settings/settings-dialog.atoms"; import { SettingsDialog } from "@/components/settings/settings-dialog"; const GeneralSettingsManager = dynamic( - () => import("@/components/settings/general-settings-manager").then(m => ({ default: m.GeneralSettingsManager })), + () => + import("@/components/settings/general-settings-manager").then((m) => ({ + default: m.GeneralSettingsManager, + })), { ssr: false } ); const ModelConfigManager = dynamic( - () => import("@/components/settings/model-config-manager").then(m => ({ default: m.ModelConfigManager })), + () => + import("@/components/settings/model-config-manager").then((m) => ({ + default: m.ModelConfigManager, + })), { ssr: false } ); const LLMRoleManager = dynamic( - () => import("@/components/settings/llm-role-manager").then(m => ({ default: m.LLMRoleManager })), + () => + import("@/components/settings/llm-role-manager").then((m) => ({ default: m.LLMRoleManager })), { ssr: false } ); const ImageModelManager = dynamic( - () => import("@/components/settings/image-model-manager").then(m => ({ default: m.ImageModelManager })), + () => + import("@/components/settings/image-model-manager").then((m) => ({ + default: m.ImageModelManager, + })), { ssr: false } ); const VisionModelManager = dynamic( - () => import("@/components/settings/vision-model-manager").then(m => ({ default: m.VisionModelManager })), + () => + import("@/components/settings/vision-model-manager").then((m) => ({ + default: m.VisionModelManager, + })), { ssr: false } ); const RolesManager = dynamic( - () => import("@/components/settings/roles-manager").then(m => ({ default: m.RolesManager })), + () => import("@/components/settings/roles-manager").then((m) => ({ default: m.RolesManager })), { ssr: false } ); const PromptConfigManager = dynamic( - () => import("@/components/settings/prompt-config-manager").then(m => ({ default: m.PromptConfigManager })), + () => + import("@/components/settings/prompt-config-manager").then((m) => ({ + default: m.PromptConfigManager, + })), { ssr: false } ); const PublicChatSnapshotsManager = dynamic( - () => import("@/components/public-chat-snapshots/public-chat-snapshots-manager").then(m => ({ default: m.PublicChatSnapshotsManager })), + () => + import("@/components/public-chat-snapshots/public-chat-snapshots-manager").then((m) => ({ + default: m.PublicChatSnapshotsManager, + })), { ssr: false } ); diff --git a/surfsense_web/components/settings/user-settings-dialog.tsx b/surfsense_web/components/settings/user-settings-dialog.tsx index ee0f7e62d..e755da197 100644 --- a/surfsense_web/components/settings/user-settings-dialog.tsx +++ b/surfsense_web/components/settings/user-settings-dialog.tsx @@ -1,8 +1,8 @@ "use client"; -import dynamic from "next/dynamic"; import { useAtom } from "jotai"; import { Globe, KeyRound, Monitor, Receipt, Sparkles, User } from "lucide-react"; +import dynamic from "next/dynamic"; import { useTranslations } from "next-intl"; import { useMemo } from "react"; import { userSettingsDialogAtom } from "@/atoms/settings/settings-dialog.atoms"; @@ -10,27 +10,45 @@ import { SettingsDialog } from "@/components/settings/settings-dialog"; import { usePlatform } from "@/hooks/use-platform"; const ProfileContent = dynamic( - () => import("@/app/dashboard/[search_space_id]/user-settings/components/ProfileContent").then(m => ({ default: m.ProfileContent })), + () => + import("@/app/dashboard/[search_space_id]/user-settings/components/ProfileContent").then( + (m) => ({ default: m.ProfileContent }) + ), { ssr: false } ); const ApiKeyContent = dynamic( - () => import("@/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent").then(m => ({ default: m.ApiKeyContent })), + () => + import("@/app/dashboard/[search_space_id]/user-settings/components/ApiKeyContent").then( + (m) => ({ default: m.ApiKeyContent }) + ), { ssr: false } ); const PromptsContent = dynamic( - () => import("@/app/dashboard/[search_space_id]/user-settings/components/PromptsContent").then(m => ({ default: m.PromptsContent })), + () => + import("@/app/dashboard/[search_space_id]/user-settings/components/PromptsContent").then( + (m) => ({ default: m.PromptsContent }) + ), { ssr: false } ); const CommunityPromptsContent = dynamic( - () => import("@/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent").then(m => ({ default: m.CommunityPromptsContent })), + () => + import( + "@/app/dashboard/[search_space_id]/user-settings/components/CommunityPromptsContent" + ).then((m) => ({ default: m.CommunityPromptsContent })), { ssr: false } ); const PurchaseHistoryContent = dynamic( - () => import("@/app/dashboard/[search_space_id]/user-settings/components/PurchaseHistoryContent").then(m => ({ default: m.PurchaseHistoryContent })), + () => + import( + "@/app/dashboard/[search_space_id]/user-settings/components/PurchaseHistoryContent" + ).then((m) => ({ default: m.PurchaseHistoryContent })), { ssr: false } ); const DesktopContent = dynamic( - () => import("@/app/dashboard/[search_space_id]/user-settings/components/DesktopContent").then(m => ({ default: m.DesktopContent })), + () => + import("@/app/dashboard/[search_space_id]/user-settings/components/DesktopContent").then( + (m) => ({ default: m.DesktopContent }) + ), { ssr: false } ); diff --git a/surfsense_web/components/sources/DocumentUploadTab.tsx b/surfsense_web/components/sources/DocumentUploadTab.tsx index 636b2bb35..2802dbe93 100644 --- a/surfsense_web/components/sources/DocumentUploadTab.tsx +++ b/surfsense_web/components/sources/DocumentUploadTab.tsx @@ -341,36 +341,36 @@ export function DocumentUploadTab({ ) ) : ( -
{ - if (!isElectron) fileInputRef.current?.click(); - }} - onKeyDown={(e) => { - if (e.key === "Enter" || e.key === " ") { - e.preventDefault(); - if (!isElectron) fileInputRef.current?.click(); - } - }} - > - -
-

- {isElectron ? "Select files or folder" : "Tap to select files or folder"} -

-

{t("file_size_limit")}

-
e.stopPropagation()} - onKeyDown={(e) => e.stopPropagation()} - role="group" + role="button" + tabIndex={0} + className="flex flex-col items-center gap-4 py-12 px-4 cursor-pointer w-full bg-transparent border-none" + onClick={() => { + if (!isElectron) fileInputRef.current?.click(); + }} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + if (!isElectron) fileInputRef.current?.click(); + } + }} > - {renderBrowseButton({ fullWidth: true })} + +
+

+ {isElectron ? "Select files or folder" : "Tap to select files or folder"} +

+

{t("file_size_limit")}

+
+
e.stopPropagation()} + onKeyDown={(e) => e.stopPropagation()} + role="group" + > + {renderBrowseButton({ fullWidth: true })} +
-
)}
diff --git a/surfsense_web/components/sources/FolderWatchDialog.tsx b/surfsense_web/components/sources/FolderWatchDialog.tsx index 4cb311a46..3590279ce 100644 --- a/surfsense_web/components/sources/FolderWatchDialog.tsx +++ b/surfsense_web/components/sources/FolderWatchDialog.tsx @@ -13,8 +13,8 @@ import { } from "@/components/ui/dialog"; import { Spinner } from "@/components/ui/spinner"; import { Switch } from "@/components/ui/switch"; -import { getSupportedExtensionsSet } from "@/lib/supported-extensions"; import { type FolderSyncProgress, uploadFolderScan } from "@/lib/folder-sync-upload"; +import { getSupportedExtensionsSet } from "@/lib/supported-extensions"; export interface SelectedFolder { path: string; @@ -166,8 +166,12 @@ export function FolderWatchDialog({ - Watch Local Folder - Select a folder to sync and watch for changes + + Watch Local Folder + + + Select a folder to sync and watch for changes +
@@ -218,7 +222,9 @@ export function FolderWatchDialog({
)} diff --git a/surfsense_web/hooks/use-folder-sync.ts b/surfsense_web/hooks/use-folder-sync.ts index 7a85c31fe..8e0d0ebdc 100644 --- a/surfsense_web/hooks/use-folder-sync.ts +++ b/surfsense_web/hooks/use-folder-sync.ts @@ -50,7 +50,9 @@ export function useFolderSync() { while (queueRef.current.length > 0) { const batch = queueRef.current.shift()!; try { - const addChangeFiles = batch.files.filter((f) => f.action === "add" || f.action === "change"); + const addChangeFiles = batch.files.filter( + (f) => f.action === "add" || f.action === "change" + ); const unlinkFiles = batch.files.filter((f) => f.action === "unlink"); if (addChangeFiles.length > 0 && electronAPI?.readLocalFiles) { @@ -128,11 +130,13 @@ export function useFolderSync() { folderName: event.folderName, searchSpaceId: event.searchSpaceId, rootFolderId: event.rootFolderId, - files: [{ - fullPath: event.fullPath, - relativePath: event.relativePath, - action: event.action, - }], + files: [ + { + fullPath: event.fullPath, + relativePath: event.relativePath, + action: event.action, + }, + ], ackIds: [event.id], }); firstEventTime.current.set(folderKey, Date.now()); diff --git a/surfsense_web/lib/apis/documents-api.service.ts b/surfsense_web/lib/apis/documents-api.service.ts index 34a0b6dce..584f2e212 100644 --- a/surfsense_web/lib/apis/documents-api.service.ts +++ b/surfsense_web/lib/apis/documents-api.service.ts @@ -429,7 +429,9 @@ class DocumentsApiService { search_space_id: number; files: { relative_path: string; mtime: number }[]; }): Promise<{ files_to_upload: string[] }> => { - return baseApiService.post(`/api/v1/documents/folder-mtime-check`, undefined, { body }) as unknown as { files_to_upload: string[] }; + return baseApiService.post(`/api/v1/documents/folder-mtime-check`, undefined, { + body, + }) as unknown as { files_to_upload: string[] }; }; folderUploadFiles = async ( @@ -441,7 +443,7 @@ class DocumentsApiService { root_folder_id?: number | null; enable_summary?: boolean; }, - signal?: AbortSignal, + signal?: AbortSignal ): Promise<{ message: string; status: string; root_folder_id: number; file_count: number }> => { const formData = new FormData(); for (const file of files) { @@ -466,11 +468,10 @@ class DocumentsApiService { } try { - return await baseApiService.postFormData( - `/api/v1/documents/folder-upload`, - undefined, - { body: formData, signal: controller.signal }, - ) as { message: string; status: string; root_folder_id: number; file_count: number }; + return (await baseApiService.postFormData(`/api/v1/documents/folder-upload`, undefined, { + body: formData, + signal: controller.signal, + })) as { message: string; status: string; root_folder_id: number; file_count: number }; } finally { clearTimeout(timeoutId); } @@ -482,7 +483,9 @@ class DocumentsApiService { root_folder_id: number | null; relative_paths: string[]; }): Promise<{ deleted_count: number }> => { - return baseApiService.post(`/api/v1/documents/folder-unlink`, undefined, { body }) as unknown as { deleted_count: number }; + return baseApiService.post(`/api/v1/documents/folder-unlink`, undefined, { + body, + }) as unknown as { deleted_count: number }; }; folderSyncFinalize = async (body: { @@ -491,7 +494,9 @@ class DocumentsApiService { root_folder_id: number | null; all_relative_paths: string[]; }): Promise<{ deleted_count: number }> => { - return baseApiService.post(`/api/v1/documents/folder-sync-finalize`, undefined, { body }) as unknown as { deleted_count: number }; + return baseApiService.post(`/api/v1/documents/folder-sync-finalize`, undefined, { + body, + }) as unknown as { deleted_count: number }; }; getWatchedFolders = async (searchSpaceId: number) => { diff --git a/surfsense_web/lib/folder-sync-upload.ts b/surfsense_web/lib/folder-sync-upload.ts index ef01d52bd..7a9810d76 100644 --- a/surfsense_web/lib/folder-sync-upload.ts +++ b/surfsense_web/lib/folder-sync-upload.ts @@ -22,9 +22,7 @@ export interface FolderSyncParams { signal?: AbortSignal; } -function buildBatches( - entries: FolderFileEntry[], -): FolderFileEntry[][] { +function buildBatches(entries: FolderFileEntry[]): FolderFileEntry[][] { const batches: FolderFileEntry[][] = []; let currentBatch: FolderFileEntry[] = []; let currentSize = 0; @@ -40,10 +38,7 @@ function buildBatches( continue; } - if ( - currentBatch.length >= MAX_BATCH_FILES || - currentSize + entry.size > MAX_BATCH_SIZE_BYTES - ) { + if (currentBatch.length >= MAX_BATCH_FILES || currentSize + entry.size > MAX_BATCH_SIZE_BYTES) { batches.push(currentBatch); currentBatch = []; currentSize = 0; @@ -69,7 +64,7 @@ async function uploadBatchesWithConcurrency( enableSummary: boolean; signal?: AbortSignal; onBatchComplete?: (filesInBatch: number) => void; - }, + } ): Promise { const api = window.electronAPI; if (!api) throw new Error("Electron API not available"); @@ -105,7 +100,7 @@ async function uploadBatchesWithConcurrency( root_folder_id: resolvedRootFolderId, enable_summary: params.enableSummary, }, - params.signal, + params.signal ); if (result.root_folder_id && !resolvedRootFolderId) { @@ -121,7 +116,9 @@ async function uploadBatchesWithConcurrency( } } - const workers = Array.from({ length: Math.min(UPLOAD_CONCURRENCY, batches.length) }, () => processNext()); + const workers = Array.from({ length: Math.min(UPLOAD_CONCURRENCY, batches.length) }, () => + processNext() + ); await Promise.all(workers); if (errors.length > 0 && !params.signal?.aborted) { @@ -141,7 +138,15 @@ export async function uploadFolderScan(params: FolderSyncParams): Promise f.relativePath), }); - params.onProgress?.({ phase: "done", uploaded: entriesToUpload.length, total: entriesToUpload.length }); + params.onProgress?.({ + phase: "done", + uploaded: entriesToUpload.length, + total: entriesToUpload.length, + }); // Seed the Electron mtime store so the reconciliation scan in // startWatcher won't re-emit events for files we just indexed.