From 464e7d45544cbbe68f9643115f98764c9a064660 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 00:10:25 +0200 Subject: [PATCH] fix(onedrive): sanitize ETL reason and retry stuck pending/processing files --- .../app/tasks/connector_indexers/onedrive_indexer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py index f98b330d7..3fd8a79f2 100644 --- a/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/onedrive_indexer.py @@ -26,6 +26,7 @@ from app.connectors.onedrive.file_types import should_skip_file as skip_item from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.indexing_pipeline.connector_document import ConnectorDocument from app.indexing_pipeline.document_hashing import compute_identifier_hash +from app.indexing_pipeline.exceptions import safe_exception_message from app.indexing_pipeline.indexing_pipeline_service import IndexingPipelineService from app.services.page_limit_service import PageLimitService from app.services.task_logging_service import TaskLoggingService @@ -120,7 +121,12 @@ async def _should_skip_file( logger.info(f"Rename-only update: '{old_name}' -> '{file_name}'") return True, f"File renamed: '{old_name}' -> '{file_name}'" - if not DocumentStatus.is_state(existing.status, DocumentStatus.READY): + state = DocumentStatus.get_state(existing.status) + if state in (DocumentStatus.PENDING, DocumentStatus.PROCESSING): + # Stuck placeholder/in-progress doc (e.g. worker died mid-index): re-index + # instead of skipping, otherwise it never recovers. + return False, None + if state != DocumentStatus.READY: return True, "skipped (previously failed)" return True, "unchanged" @@ -217,7 +223,7 @@ async def _download_files_parallel( continue file_id = file.get("id") if isinstance(outcome, Exception): - reason = f"Download/ETL error: {outcome}" + reason = f"Download/ETL error: {safe_exception_message(outcome)}" logger.warning( "Download/ETL exception for %s: %s", file.get("name", "Unknown"),