diff --git a/surfsense_backend/tests/e2e/test_document_upload.py b/surfsense_backend/tests/e2e/test_document_upload.py index f3ff8b7a6..c1193a63b 100644 --- a/surfsense_backend/tests/e2e/test_document_upload.py +++ b/surfsense_backend/tests/e2e/test_document_upload.py @@ -353,16 +353,16 @@ class TestDuplicateContentDetection: assert resp2.status_code == 200 second_ids = resp2.json()["document_ids"] cleanup_doc_ids.extend(second_ids) + assert second_ids, ( + "Expected at least one document id for renamed duplicate content upload" + ) - if second_ids: - statuses = await poll_document_status( - client, headers, second_ids, search_space_id=search_space_id - ) - for did in second_ids: - assert statuses[did]["status"]["state"] == "failed" - assert "duplicate" in ( - statuses[did]["status"].get("reason", "").lower() - ) + statuses = await poll_document_status( + client, headers, second_ids, search_space_id=search_space_id + ) + for did in second_ids: + assert statuses[did]["status"]["state"] == "failed" + assert "duplicate" in statuses[did]["status"].get("reason", "").lower() # --------------------------------------------------------------------------- @@ -387,16 +387,16 @@ class TestEmptyFileUpload: doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) + assert doc_ids, "Expected at least one document id for empty PDF upload" - if doc_ids: - statuses = await poll_document_status( - client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0 + statuses = await poll_document_status( + client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0 + ) + for did in doc_ids: + assert statuses[did]["status"]["state"] == "failed" + assert statuses[did]["status"].get("reason"), ( + "Failed document should include a reason" ) - for did in doc_ids: - assert statuses[did]["status"]["state"] == "failed" - assert statuses[did]["status"].get("reason"), ( - "Failed document should include a reason" - ) # --------------------------------------------------------------------------- diff --git a/surfsense_backend/tests/utils/helpers.py b/surfsense_backend/tests/utils/helpers.py index ad58d5141..270db7125 100644 --- a/surfsense_backend/tests/utils/helpers.py +++ b/surfsense_backend/tests/utils/helpers.py @@ -124,20 +124,31 @@ async def poll_document_status( terminal state (``ready`` or ``failed``) or *timeout* seconds elapse. Returns a mapping of ``{document_id: status_item_dict}``. + + Retries on transient transport errors until timeout. """ ids_param = ",".join(str(d) for d in document_ids) terminal_states = {"ready", "failed"} elapsed = 0.0 + items: dict[int, dict] = {} + last_transport_error: Exception | None = None while elapsed < timeout: - resp = await client.get( - "/api/v1/documents/status", - headers=headers, - params={ - "search_space_id": search_space_id, - "document_ids": ids_param, - }, - ) + try: + resp = await client.get( + "/api/v1/documents/status", + headers=headers, + params={ + "search_space_id": search_space_id, + "document_ids": ids_param, + }, + ) + except (httpx.ReadError, httpx.ConnectError, httpx.TimeoutException) as exc: + last_transport_error = exc + await asyncio.sleep(interval) + elapsed += interval + continue + assert resp.status_code == 200, ( f"Status poll failed ({resp.status_code}): {resp.text}" ) @@ -154,7 +165,8 @@ async def poll_document_status( raise TimeoutError( f"Documents {document_ids} did not reach terminal state within {timeout}s. " - f"Last status: {items}" + f"Last status: {items}. " + f"Last transport error: {last_transport_error!r}" )