mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
fix: update file skipping logic for failed documents in Google Drive indexer
- Modified the `_should_skip_file` function to skip previously failed documents during processing, improving error handling. - Updated the corresponding test to reflect the new behavior, ensuring that failed documents are correctly identified and skipped during automatic sync.
This commit is contained in:
parent
3ce831d01d
commit
4e0749f907
2 changed files with 6 additions and 5 deletions
|
|
@ -150,7 +150,7 @@ async def _should_skip_file(
|
||||||
return True, f"File renamed: '{old_name}' → '{file_name}'"
|
return True, f"File renamed: '{old_name}' → '{file_name}'"
|
||||||
|
|
||||||
if not DocumentStatus.is_state(existing.status, DocumentStatus.READY):
|
if not DocumentStatus.is_state(existing.status, DocumentStatus.READY):
|
||||||
return False, None
|
return True, "skipped (previously failed)"
|
||||||
return True, "unchanged"
|
return True, "unchanged"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -110,10 +110,10 @@ async def test_drive_legacy_doc_migrated(
|
||||||
assert row.unique_identifier_hash == native_hash
|
assert row.unique_identifier_hash == native_hash
|
||||||
|
|
||||||
|
|
||||||
async def test_should_skip_file_does_not_skip_failed_document(
|
async def test_should_skip_file_skips_failed_document(
|
||||||
db_session, db_search_space, db_user,
|
db_session, db_search_space, db_user,
|
||||||
):
|
):
|
||||||
"""A FAILED document with unchanged md5 must NOT be skipped — it needs reprocessing."""
|
"""A FAILED document with unchanged md5 must be skipped — user can manually retry via Quick Index."""
|
||||||
import importlib
|
import importlib
|
||||||
import sys
|
import sys
|
||||||
import types
|
import types
|
||||||
|
|
@ -164,6 +164,7 @@ async def test_should_skip_file_does_not_skip_failed_document(
|
||||||
|
|
||||||
incoming_file = {"id": file_id, "name": "Failed File.pdf", "mimeType": "application/pdf", "md5Checksum": md5}
|
incoming_file = {"id": file_id, "name": "Failed File.pdf", "mimeType": "application/pdf", "md5Checksum": md5}
|
||||||
|
|
||||||
should_skip, _msg = await _should_skip_file(db_session, incoming_file, space_id)
|
should_skip, msg = await _should_skip_file(db_session, incoming_file, space_id)
|
||||||
|
|
||||||
assert not should_skip, "FAILED documents must not be skipped even when content is unchanged"
|
assert should_skip, "FAILED documents must be skipped during automatic sync"
|
||||||
|
assert "failed" in msg.lower()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue