mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-10 20:35:17 +02:00
test(google-drive): assert stuck pending/processing docs retry
This commit is contained in:
parent
ba687813c1
commit
77544ab768
1 changed files with 72 additions and 0 deletions
|
|
@ -177,3 +177,75 @@ async def test_should_skip_file_skips_failed_document(
|
|||
|
||||
assert should_skip, "FAILED documents must be skipped during automatic sync"
|
||||
assert "failed" in msg.lower()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stuck_state", ["pending", "processing"])
|
||||
async def test_should_skip_file_retries_stuck_document(
|
||||
db_session,
|
||||
db_search_space,
|
||||
db_user,
|
||||
stuck_state,
|
||||
):
|
||||
"""A doc stuck in pending/processing (worker died mid-index) must re-index, not skip."""
|
||||
import importlib
|
||||
import sys
|
||||
import types
|
||||
|
||||
pkg = "app.tasks.connector_indexers"
|
||||
stub = pkg not in sys.modules
|
||||
if stub:
|
||||
mod = types.ModuleType(pkg)
|
||||
mod.__path__ = ["app/tasks/connector_indexers"]
|
||||
mod.__package__ = pkg
|
||||
sys.modules[pkg] = mod
|
||||
|
||||
try:
|
||||
gdm = importlib.import_module(
|
||||
"app.tasks.connector_indexers.google_drive_indexer"
|
||||
)
|
||||
_should_skip_file = gdm._should_skip_file
|
||||
finally:
|
||||
if stub:
|
||||
sys.modules.pop(pkg, None)
|
||||
|
||||
space_id = db_search_space.id
|
||||
file_id = f"file-{stuck_state}-drive"
|
||||
md5 = "stuck123checksum"
|
||||
|
||||
doc_hash = compute_identifier_hash(
|
||||
DocumentType.GOOGLE_DRIVE_FILE.value, file_id, space_id
|
||||
)
|
||||
status = (
|
||||
DocumentStatus.pending()
|
||||
if stuck_state == "pending"
|
||||
else DocumentStatus.processing()
|
||||
)
|
||||
stuck_doc = Document(
|
||||
title="Stuck File.pdf",
|
||||
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
||||
content="Pending...",
|
||||
content_hash=f"ch-{doc_hash[:12]}",
|
||||
unique_identifier_hash=doc_hash,
|
||||
source_markdown="",
|
||||
search_space_id=space_id,
|
||||
created_by_id=str(db_user.id),
|
||||
status=status,
|
||||
document_metadata={
|
||||
"google_drive_file_id": file_id,
|
||||
"google_drive_file_name": "Stuck File.pdf",
|
||||
"md5_checksum": md5,
|
||||
},
|
||||
)
|
||||
db_session.add(stuck_doc)
|
||||
await db_session.flush()
|
||||
|
||||
incoming_file = {
|
||||
"id": file_id,
|
||||
"name": "Stuck File.pdf",
|
||||
"mimeType": "application/pdf",
|
||||
"md5Checksum": md5,
|
||||
}
|
||||
|
||||
should_skip, _msg = await _should_skip_file(db_session, incoming_file, space_id)
|
||||
|
||||
assert not should_skip, f"{stuck_state} documents must re-index, not be skipped"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue