From 64a170efce5c7929d4688278413d0c0532ef624d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 17 Feb 2026 21:27:58 +0200 Subject: [PATCH] Fix critical bugs in KB sync implementation - Add rollback on exception to prevent dirty database state - Fix block verification threshold from >=1 to >=80% - Remove technical 'failed' language from user messages - Add empty content validation with warning --- .../app/services/notion/kb_sync_service.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/services/notion/kb_sync_service.py b/surfsense_backend/app/services/notion/kb_sync_service.py index de06135b9..c51de09b9 100644 --- a/surfsense_backend/app/services/notion/kb_sync_service.py +++ b/surfsense_backend/app/services/notion/kb_sync_service.py @@ -64,6 +64,11 @@ class NotionKBSyncService: fetched_content = process_blocks(blocks) logger.debug(f"Fetched content length: {len(fetched_content)} chars") + if not fetched_content or not fetched_content.strip(): + logger.warning( + f"Fetched empty content for page {page_id} - document will have minimal searchable text" + ) + content_verified = False if appended_block_ids: fetched_block_ids = set(extract_all_block_ids(blocks)) @@ -78,9 +83,9 @@ class NotionKBSyncService: f"Appended IDs (first 3): {appended_block_ids[:3]}, Fetched IDs count: {len(fetched_block_ids)}" ) - if len(found_blocks) >= 1: + if len(found_blocks) >= len(appended_block_ids) * 0.8: # 80% threshold logger.info( - f"Content verified fresh: found {len(found_blocks)} appended blocks" + f"Content verified fresh: found {len(found_blocks)}/{len(appended_block_ids)} appended blocks" ) full_content = fetched_content content_verified = True @@ -154,4 +159,5 @@ class NotionKBSyncService: logger.error( f"Failed to sync KB for document {document_id}: {e}", exc_info=True ) + await self.db_session.rollback() return {"status": "error", "message": str(e)}