diff --git a/surfsense_backend/app/routes/documents_routes.py b/surfsense_backend/app/routes/documents_routes.py index b20f8cd9c..226d511cc 100644 --- a/surfsense_backend/app/routes/documents_routes.py +++ b/surfsense_backend/app/routes/documents_routes.py @@ -178,9 +178,26 @@ async def create_documents_file_upload( session, unique_identifier_hash ) if existing: - # Clean up temp file for duplicates - os.unlink(temp_path) - skipped_duplicates += 1 + if DocumentStatus.is_state(existing.status, DocumentStatus.READY): + # True duplicate — content already indexed, skip + os.unlink(temp_path) + skipped_duplicates += 1 + continue + + # Existing document is stuck (failed/pending/processing) + # Reset it to pending and re-dispatch for processing + existing.status = DocumentStatus.pending() + existing.content = "Processing..." + existing.document_metadata = { + **(existing.document_metadata or {}), + "file_size": file_size, + "upload_time": datetime.now().isoformat(), + } + existing.updated_at = get_current_timestamp() + created_documents.append(existing) + files_to_process.append( + (existing, temp_path, file.filename or "unknown") + ) continue # Create pending document (visible immediately in UI via ElectricSQL) diff --git a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py index 7fd866f1c..dfbfea432 100644 --- a/surfsense_backend/app/tasks/celery_tasks/document_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/document_tasks.py @@ -413,7 +413,7 @@ def process_file_upload_task( if not os.path.exists(file_path): logger.error( f"[process_file_upload] File does not exist: {file_path}. " - "The temp file may have been cleaned up before the task ran." + "File may have been removed before syncing could start." ) return @@ -654,7 +654,7 @@ def process_file_upload_with_document_task( if not os.path.exists(temp_path): logger.error( f"[process_file_upload_with_document] File does not exist: {temp_path}. " - "The temp file may have been cleaned up before the task ran." + "File may have been removed before syncing could start." ) # Mark document as failed since file is missing loop = asyncio.new_event_loop() @@ -663,7 +663,7 @@ def process_file_upload_with_document_task( loop.run_until_complete( _mark_document_failed( document_id, - "File not found - temp file may have been cleaned up", + "File not found. Please re-upload the file.", ) ) finally: