mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
fix: update file skipping logic for failed documents in Google Drive indexer
- Modified the `_should_skip_file` function to skip previously failed documents during processing, improving error handling. - Updated the corresponding test to reflect the new behavior, ensuring that failed documents are correctly identified and skipped during automatic sync.
This commit is contained in:
parent
3ce831d01d
commit
4e0749f907
2 changed files with 6 additions and 5 deletions
|
|
@ -110,10 +110,10 @@ async def test_drive_legacy_doc_migrated(
|
|||
assert row.unique_identifier_hash == native_hash
|
||||
|
||||
|
||||
async def test_should_skip_file_does_not_skip_failed_document(
|
||||
async def test_should_skip_file_skips_failed_document(
|
||||
db_session, db_search_space, db_user,
|
||||
):
|
||||
"""A FAILED document with unchanged md5 must NOT be skipped — it needs reprocessing."""
|
||||
"""A FAILED document with unchanged md5 must be skipped — user can manually retry via Quick Index."""
|
||||
import importlib
|
||||
import sys
|
||||
import types
|
||||
|
|
@ -164,6 +164,7 @@ async def test_should_skip_file_does_not_skip_failed_document(
|
|||
|
||||
incoming_file = {"id": file_id, "name": "Failed File.pdf", "mimeType": "application/pdf", "md5Checksum": md5}
|
||||
|
||||
should_skip, _msg = await _should_skip_file(db_session, incoming_file, space_id)
|
||||
should_skip, msg = await _should_skip_file(db_session, incoming_file, space_id)
|
||||
|
||||
assert not should_skip, "FAILED documents must not be skipped even when content is unchanged"
|
||||
assert should_skip, "FAILED documents must be skipped during automatic sync"
|
||||
assert "failed" in msg.lower()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue