mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
fix: Improve document upload tests by adding assertions for document IDs and handling transient transport errors
This commit is contained in:
parent
25df3dff64
commit
d6e442b466
2 changed files with 38 additions and 26 deletions
|
|
@ -353,16 +353,16 @@ class TestDuplicateContentDetection:
|
|||
assert resp2.status_code == 200
|
||||
second_ids = resp2.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(second_ids)
|
||||
assert second_ids, (
|
||||
"Expected at least one document id for renamed duplicate content upload"
|
||||
)
|
||||
|
||||
if second_ids:
|
||||
statuses = await poll_document_status(
|
||||
client, headers, second_ids, search_space_id=search_space_id
|
||||
)
|
||||
for did in second_ids:
|
||||
assert statuses[did]["status"]["state"] == "failed"
|
||||
assert "duplicate" in (
|
||||
statuses[did]["status"].get("reason", "").lower()
|
||||
)
|
||||
statuses = await poll_document_status(
|
||||
client, headers, second_ids, search_space_id=search_space_id
|
||||
)
|
||||
for did in second_ids:
|
||||
assert statuses[did]["status"]["state"] == "failed"
|
||||
assert "duplicate" in statuses[did]["status"].get("reason", "").lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -387,16 +387,16 @@ class TestEmptyFileUpload:
|
|||
|
||||
doc_ids = resp.json()["document_ids"]
|
||||
cleanup_doc_ids.extend(doc_ids)
|
||||
assert doc_ids, "Expected at least one document id for empty PDF upload"
|
||||
|
||||
if doc_ids:
|
||||
statuses = await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0
|
||||
statuses = await poll_document_status(
|
||||
client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0
|
||||
)
|
||||
for did in doc_ids:
|
||||
assert statuses[did]["status"]["state"] == "failed"
|
||||
assert statuses[did]["status"].get("reason"), (
|
||||
"Failed document should include a reason"
|
||||
)
|
||||
for did in doc_ids:
|
||||
assert statuses[did]["status"]["state"] == "failed"
|
||||
assert statuses[did]["status"].get("reason"), (
|
||||
"Failed document should include a reason"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -124,20 +124,31 @@ async def poll_document_status(
|
|||
terminal state (``ready`` or ``failed``) or *timeout* seconds elapse.
|
||||
|
||||
Returns a mapping of ``{document_id: status_item_dict}``.
|
||||
|
||||
Retries on transient transport errors until timeout.
|
||||
"""
|
||||
ids_param = ",".join(str(d) for d in document_ids)
|
||||
terminal_states = {"ready", "failed"}
|
||||
elapsed = 0.0
|
||||
items: dict[int, dict] = {}
|
||||
last_transport_error: Exception | None = None
|
||||
|
||||
while elapsed < timeout:
|
||||
resp = await client.get(
|
||||
"/api/v1/documents/status",
|
||||
headers=headers,
|
||||
params={
|
||||
"search_space_id": search_space_id,
|
||||
"document_ids": ids_param,
|
||||
},
|
||||
)
|
||||
try:
|
||||
resp = await client.get(
|
||||
"/api/v1/documents/status",
|
||||
headers=headers,
|
||||
params={
|
||||
"search_space_id": search_space_id,
|
||||
"document_ids": ids_param,
|
||||
},
|
||||
)
|
||||
except (httpx.ReadError, httpx.ConnectError, httpx.TimeoutException) as exc:
|
||||
last_transport_error = exc
|
||||
await asyncio.sleep(interval)
|
||||
elapsed += interval
|
||||
continue
|
||||
|
||||
assert resp.status_code == 200, (
|
||||
f"Status poll failed ({resp.status_code}): {resp.text}"
|
||||
)
|
||||
|
|
@ -154,7 +165,8 @@ async def poll_document_status(
|
|||
|
||||
raise TimeoutError(
|
||||
f"Documents {document_ids} did not reach terminal state within {timeout}s. "
|
||||
f"Last status: {items}"
|
||||
f"Last status: {items}. "
|
||||
f"Last transport error: {last_transport_error!r}"
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue