refactor: Enhance test utilities for document upload by integrating search space handling

- Updated test fixtures to include search space ID retrieval for improved document upload tests.
- Refactored authentication and document upload functions to accept search space ID as a parameter.
- Removed hardcoded search space ID references to streamline test configurations.
This commit is contained in:
Anish Sarkar 2026-02-25 17:29:09 +05:30
parent 41eb68663a
commit 4ff712578d
3 changed files with 97 additions and 48 deletions

View file

@ -10,10 +10,10 @@ import pytest
from tests.utils.helpers import (
BACKEND_URL,
TEST_SEARCH_SPACE_ID,
auth_headers,
delete_document,
get_auth_token,
get_search_space_id,
)
@ -22,20 +22,24 @@ def backend_url() -> str:
return BACKEND_URL
@pytest.fixture(scope="session")
def search_space_id() -> int:
return TEST_SEARCH_SPACE_ID
@pytest.fixture(scope="session")
async def auth_token(backend_url: str) -> str:
"""Authenticate once per session and return the JWT token."""
"""Authenticate once per session, registering the user if needed."""
async with httpx.AsyncClient(
base_url=backend_url, timeout=30.0
) as client:
return await get_auth_token(client)
@pytest.fixture(scope="session")
async def search_space_id(backend_url: str, auth_token: str) -> int:
"""Discover the first search space belonging to the test user."""
async with httpx.AsyncClient(
base_url=backend_url, timeout=30.0
) as client:
return await get_search_space_id(client, auth_token)
@pytest.fixture(scope="session")
def headers(auth_token: str) -> dict[str, str]:
"""Authorization headers reused across all tests in the session."""

View file

@ -20,7 +20,6 @@ import httpx
from tests.utils.helpers import (
FIXTURES_DIR,
TEST_SEARCH_SPACE_ID,
delete_document,
get_document,
poll_document_status,
@ -56,9 +55,10 @@ class TestTxtFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.txt")
resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
assert resp.status_code == 200
body = resp.json()
@ -70,14 +70,15 @@ class TestTxtFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.txt")
resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
assert resp.status_code == 200
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
statuses = await poll_document_status(client, headers, doc_ids)
statuses = await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)
for did in doc_ids:
assert statuses[did]["status"]["state"] == "ready"
@ -85,13 +86,14 @@ class TestTxtFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.txt")
resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
await poll_document_status(client, headers, doc_ids)
await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)
doc = await get_document(client, headers, doc_ids[0])
_assert_document_ready(doc, expected_filename="sample.txt")
@ -110,14 +112,15 @@ class TestMarkdownFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.md")
resp = await upload_file(client, headers, "sample.md", search_space_id=search_space_id)
assert resp.status_code == 200
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
statuses = await poll_document_status(client, headers, doc_ids)
statuses = await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)
for did in doc_ids:
assert statuses[did]["status"]["state"] == "ready"
@ -125,13 +128,14 @@ class TestMarkdownFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.md")
resp = await upload_file(client, headers, "sample.md", search_space_id=search_space_id)
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
await poll_document_status(client, headers, doc_ids)
await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)
doc = await get_document(client, headers, doc_ids[0])
_assert_document_ready(doc, expected_filename="sample.md")
@ -150,15 +154,16 @@ class TestPdfFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.pdf")
resp = await upload_file(client, headers, "sample.pdf", search_space_id=search_space_id)
assert resp.status_code == 200
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
statuses = await poll_document_status(
client, headers, doc_ids, timeout=300.0
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
)
for did in doc_ids:
assert statuses[did]["status"]["state"] == "ready"
@ -167,14 +172,15 @@ class TestPdfFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.pdf")
resp = await upload_file(client, headers, "sample.pdf", search_space_id=search_space_id)
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
await poll_document_status(
client, headers, doc_ids, timeout=300.0
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
)
doc = await get_document(client, headers, doc_ids[0])
@ -198,10 +204,11 @@ class TestMultiFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_multiple_files(
client, headers, ["sample.txt", "sample.md"]
client, headers, ["sample.txt", "sample.md"], search_space_id=search_space_id
)
assert resp.status_code == 200
@ -214,15 +221,16 @@ class TestMultiFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_multiple_files(
client, headers, ["sample.txt", "sample.md"]
client, headers, ["sample.txt", "sample.md"], search_space_id=search_space_id
)
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
statuses = await poll_document_status(client, headers, doc_ids)
statuses = await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)
for did in doc_ids:
assert statuses[did]["status"]["state"] == "ready"
@ -242,18 +250,19 @@ class TestDuplicateFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
# First upload
resp1 = await upload_file(client, headers, "sample.txt")
resp1 = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
assert resp1.status_code == 200
first_ids = resp1.json()["document_ids"]
cleanup_doc_ids.extend(first_ids)
await poll_document_status(client, headers, first_ids)
await poll_document_status(client, headers, first_ids, search_space_id=search_space_id)
# Second upload of the same file
resp2 = await upload_file(client, headers, "sample.txt")
resp2 = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
assert resp2.status_code == 200
body2 = resp2.json()
@ -277,15 +286,16 @@ class TestDuplicateContentDetection:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
tmp_path: Path,
):
# First upload
resp1 = await upload_file(client, headers, "sample.txt")
resp1 = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
assert resp1.status_code == 200
first_ids = resp1.json()["document_ids"]
cleanup_doc_ids.extend(first_ids)
await poll_document_status(client, headers, first_ids)
await poll_document_status(client, headers, first_ids, search_space_id=search_space_id)
# Copy fixture content to a differently named temp file
src = FIXTURES_DIR / "sample.txt"
@ -297,14 +307,14 @@ class TestDuplicateContentDetection:
"/api/v1/documents/fileupload",
headers=headers,
files={"files": ("renamed_sample.txt", f)},
data={"search_space_id": str(TEST_SEARCH_SPACE_ID)},
data={"search_space_id": str(search_space_id)},
)
assert resp2.status_code == 200
second_ids = resp2.json()["document_ids"]
cleanup_doc_ids.extend(second_ids)
if second_ids:
statuses = await poll_document_status(client, headers, second_ids)
statuses = await poll_document_status(client, headers, second_ids, search_space_id=search_space_id)
for did in second_ids:
assert statuses[did]["status"]["state"] == "failed"
assert "duplicate" in (
@ -324,9 +334,10 @@ class TestEmptyFileUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "empty.pdf")
resp = await upload_file(client, headers, "empty.pdf", search_space_id=search_space_id)
assert resp.status_code == 200
doc_ids = resp.json()["document_ids"]
@ -334,7 +345,7 @@ class TestEmptyFileUpload:
if doc_ids:
statuses = await poll_document_status(
client, headers, doc_ids, timeout=120.0
client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0
)
for did in doc_ids:
assert statuses[did]["status"]["state"] == "failed"
@ -354,13 +365,14 @@ class TestUnauthenticatedUpload:
async def test_upload_without_auth_returns_401(
self,
client: httpx.AsyncClient,
search_space_id: int,
):
file_path = FIXTURES_DIR / "sample.txt"
with open(file_path, "rb") as f:
resp = await client.post(
"/api/v1/documents/fileupload",
files={"files": ("sample.txt", f)},
data={"search_space_id": str(TEST_SEARCH_SPACE_ID)},
data={"search_space_id": str(search_space_id)},
)
assert resp.status_code == 401
@ -377,11 +389,12 @@ class TestNoFilesUpload:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
):
resp = await client.post(
"/api/v1/documents/fileupload",
headers=headers,
data={"search_space_id": str(TEST_SEARCH_SPACE_ID)},
data={"search_space_id": str(search_space_id)},
)
assert resp.status_code in {400, 422}
@ -398,10 +411,11 @@ class TestDocumentDeletion:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
):
resp = await upload_file(client, headers, "sample.txt")
resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
doc_ids = resp.json()["document_ids"]
await poll_document_status(client, headers, doc_ids)
await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)
del_resp = await delete_document(client, headers, doc_ids[0])
assert del_resp.status_code == 200
@ -425,9 +439,10 @@ class TestDeleteWhileProcessing:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.pdf")
resp = await upload_file(client, headers, "sample.pdf", search_space_id=search_space_id)
assert resp.status_code == 200
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
@ -438,7 +453,7 @@ class TestDeleteWhileProcessing:
# Let it finish so cleanup can work
await poll_document_status(
client, headers, doc_ids, timeout=300.0
client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
)
@ -454,9 +469,10 @@ class TestStatusPolling:
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
resp = await upload_file(client, headers, "sample.txt")
resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id)
doc_ids = resp.json()["document_ids"]
cleanup_doc_ids.extend(doc_ids)
@ -464,7 +480,7 @@ class TestStatusPolling:
"/api/v1/documents/status",
headers=headers,
params={
"search_space_id": TEST_SEARCH_SPACE_ID,
"search_space_id": search_space_id,
"document_ids": ",".join(str(d) for d in doc_ids),
},
)
@ -484,4 +500,4 @@ class TestStatusPolling:
"failed",
}
await poll_document_status(client, headers, doc_ids)
await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id)

View file

@ -13,22 +13,51 @@ FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures"
BACKEND_URL = os.environ.get("TEST_BACKEND_URL", "http://localhost:8000")
TEST_EMAIL = os.environ.get("TEST_USER_EMAIL", "testuser@surfsense.com")
TEST_PASSWORD = os.environ.get("TEST_USER_PASSWORD", "testpassword123")
TEST_SEARCH_SPACE_ID = int(os.environ.get("TEST_SEARCH_SPACE_ID", "1"))
async def get_auth_token(client: httpx.AsyncClient) -> str:
"""Log in and return a Bearer JWT token."""
"""Log in and return a Bearer JWT token, registering the user first if needed."""
response = await client.post(
"/auth/jwt/login",
data={"username": TEST_EMAIL, "password": TEST_PASSWORD},
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
if response.status_code == 200:
return response.json()["access_token"]
reg_response = await client.post(
"/auth/register",
json={"email": TEST_EMAIL, "password": TEST_PASSWORD},
)
assert reg_response.status_code == 201, (
f"Registration failed ({reg_response.status_code}): {reg_response.text}"
)
response = await client.post(
"/auth/jwt/login",
data={"username": TEST_EMAIL, "password": TEST_PASSWORD},
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
assert response.status_code == 200, (
f"Login failed ({response.status_code}): {response.text}"
f"Login after registration failed ({response.status_code}): {response.text}"
)
return response.json()["access_token"]
async def get_search_space_id(client: httpx.AsyncClient, token: str) -> int:
"""Fetch the first search space owned by the test user."""
resp = await client.get(
"/api/v1/searchspaces",
headers=auth_headers(token),
)
assert resp.status_code == 200, (
f"Failed to list search spaces ({resp.status_code}): {resp.text}"
)
spaces = resp.json()
assert len(spaces) > 0, "No search spaces found for test user"
return spaces[0]["id"]
def auth_headers(token: str) -> dict[str, str]:
"""Return Authorization header dict for a Bearer token."""
return {"Authorization": f"Bearer {token}"}
@ -39,7 +68,7 @@ async def upload_file(
headers: dict[str, str],
fixture_name: str,
*,
search_space_id: int = TEST_SEARCH_SPACE_ID,
search_space_id: int,
filename_override: str | None = None,
) -> httpx.Response:
"""Upload a single fixture file and return the raw response."""
@ -59,7 +88,7 @@ async def upload_multiple_files(
headers: dict[str, str],
fixture_names: list[str],
*,
search_space_id: int = TEST_SEARCH_SPACE_ID,
search_space_id: int,
) -> httpx.Response:
"""Upload multiple fixture files in a single request."""
files = []
@ -86,7 +115,7 @@ async def poll_document_status(
headers: dict[str, str],
document_ids: list[int],
*,
search_space_id: int = TEST_SEARCH_SPACE_ID,
search_space_id: int,
timeout: float = 180.0,
interval: float = 3.0,
) -> dict[int, dict]: