diff --git a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py index 1187edd98..53c438197 100644 --- a/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/confluence_indexer.py @@ -21,6 +21,7 @@ from .base import ( check_duplicate_document_by_hash, get_connector_by_id, logger, + mark_connector_documents_failed, update_connector_last_indexed, ) @@ -295,6 +296,23 @@ async def index_confluence_pages( heartbeat_interval=HEARTBEAT_INTERVAL_SECONDS, ) + # Placeholders for items skipped above (empty/duplicate/unbuildable) would + # otherwise stay stuck in 'pending' and undeletable. Fail them so they're + # recoverable. Leaves already-ready docs untouched. + indexed_ids = {doc.unique_id for doc in connector_docs} + stuck_placeholders = [ + (p.unique_id, "Skipped during sync: no indexable content") + for p in placeholders + if p.unique_id and p.unique_id not in indexed_ids + ] + if stuck_placeholders: + await mark_connector_documents_failed( + session, + document_type=DocumentType.CONFLUENCE_CONNECTOR, + search_space_id=search_space_id, + failures=stuck_placeholders, + ) + await update_connector_last_indexed(session, connector, update_last_indexed) logger.info(