From 77544ab768fe96e8f8430e5c57f1c7bf9f950521 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 10 Jun 2026 00:11:00 +0200
Subject: [PATCH] test(google-drive): assert stuck pending/processing docs
 retry

---
 .../indexing_pipeline/test_drive_pipeline.py  | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py
index c7565f4ba..6e85421ea 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_drive_pipeline.py
@@ -177,3 +177,75 @@ async def test_should_skip_file_skips_failed_document(
 
     assert should_skip, "FAILED documents must be skipped during automatic sync"
     assert "failed" in msg.lower()
+
+
+@pytest.mark.parametrize("stuck_state", ["pending", "processing"])
+async def test_should_skip_file_retries_stuck_document(
+    db_session,
+    db_search_space,
+    db_user,
+    stuck_state,
+):
+    """A doc stuck in pending/processing (worker died mid-index) must re-index, not skip."""
+    import importlib
+    import sys
+    import types
+
+    pkg = "app.tasks.connector_indexers"
+    stub = pkg not in sys.modules
+    if stub:
+        mod = types.ModuleType(pkg)
+        mod.__path__ = ["app/tasks/connector_indexers"]
+        mod.__package__ = pkg
+        sys.modules[pkg] = mod
+
+    try:
+        gdm = importlib.import_module(
+            "app.tasks.connector_indexers.google_drive_indexer"
+        )
+        _should_skip_file = gdm._should_skip_file
+    finally:
+        if stub:
+            sys.modules.pop(pkg, None)
+
+    space_id = db_search_space.id
+    file_id = f"file-{stuck_state}-drive"
+    md5 = "stuck123checksum"
+
+    doc_hash = compute_identifier_hash(
+        DocumentType.GOOGLE_DRIVE_FILE.value, file_id, space_id
+    )
+    status = (
+        DocumentStatus.pending()
+        if stuck_state == "pending"
+        else DocumentStatus.processing()
+    )
+    stuck_doc = Document(
+        title="Stuck File.pdf",
+        document_type=DocumentType.GOOGLE_DRIVE_FILE,
+        content="Pending...",
+        content_hash=f"ch-{doc_hash[:12]}",
+        unique_identifier_hash=doc_hash,
+        source_markdown="",
+        search_space_id=space_id,
+        created_by_id=str(db_user.id),
+        status=status,
+        document_metadata={
+            "google_drive_file_id": file_id,
+            "google_drive_file_name": "Stuck File.pdf",
+            "md5_checksum": md5,
+        },
+    )
+    db_session.add(stuck_doc)
+    await db_session.flush()
+
+    incoming_file = {
+        "id": file_id,
+        "name": "Stuck File.pdf",
+        "mimeType": "application/pdf",
+        "md5Checksum": md5,
+    }
+
+    should_skip, _msg = await _should_skip_file(db_session, incoming_file, space_id)
+
+    assert not should_skip, f"{stuck_state} documents must re-index, not be skipped"