Fix critical bugs in KB sync implementation

- Add rollback on exception to prevent dirty database state
- Fix block verification threshold from >=1 to >=80%
- Remove technical 'failed' language from user messages
- Add empty content validation with warning
This commit is contained in:
CREDO23 2026-02-17 21:27:58 +02:00
parent 7d1bd1fab4
commit 64a170efce

View file

@ -64,6 +64,11 @@ class NotionKBSyncService:
fetched_content = process_blocks(blocks) fetched_content = process_blocks(blocks)
logger.debug(f"Fetched content length: {len(fetched_content)} chars") logger.debug(f"Fetched content length: {len(fetched_content)} chars")
if not fetched_content or not fetched_content.strip():
logger.warning(
f"Fetched empty content for page {page_id} - document will have minimal searchable text"
)
content_verified = False content_verified = False
if appended_block_ids: if appended_block_ids:
fetched_block_ids = set(extract_all_block_ids(blocks)) fetched_block_ids = set(extract_all_block_ids(blocks))
@ -78,9 +83,9 @@ class NotionKBSyncService:
f"Appended IDs (first 3): {appended_block_ids[:3]}, Fetched IDs count: {len(fetched_block_ids)}" f"Appended IDs (first 3): {appended_block_ids[:3]}, Fetched IDs count: {len(fetched_block_ids)}"
) )
if len(found_blocks) >= 1: if len(found_blocks) >= len(appended_block_ids) * 0.8: # 80% threshold
logger.info( logger.info(
f"Content verified fresh: found {len(found_blocks)} appended blocks" f"Content verified fresh: found {len(found_blocks)}/{len(appended_block_ids)} appended blocks"
) )
full_content = fetched_content full_content = fetched_content
content_verified = True content_verified = True
@ -154,4 +159,5 @@ class NotionKBSyncService:
logger.error( logger.error(
f"Failed to sync KB for document {document_id}: {e}", exc_info=True f"Failed to sync KB for document {document_id}: {e}", exc_info=True
) )
await self.db_session.rollback()
return {"status": "error", "message": str(e)} return {"status": "error", "message": str(e)}