feat: Add end-to-end tests for document upload pipeline and shared test utilities

- Introduced new test files for end-to-end testing of document uploads, including support for .txt, .md, and .pdf formats.
- Created shared fixtures and helper functions for authentication, document management, and cleanup.
- Added sample documents for testing purposes.
- Established a conftest.py file to provide reusable fixtures across test modules.
This commit is contained in:
Anish Sarkar 2026-02-25 16:39:45 +05:30
parent b7447b26f9
commit 41eb68663a
10 changed files with 802 additions and 0 deletions

View file

@ -0,0 +1,157 @@
"""Shared test helpers for authentication, polling, and cleanup."""
from __future__ import annotations
import asyncio
import os
from pathlib import Path
import httpx
FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures"
BACKEND_URL = os.environ.get("TEST_BACKEND_URL", "http://localhost:8000")
TEST_EMAIL = os.environ.get("TEST_USER_EMAIL", "testuser@surfsense.com")
TEST_PASSWORD = os.environ.get("TEST_USER_PASSWORD", "testpassword123")
TEST_SEARCH_SPACE_ID = int(os.environ.get("TEST_SEARCH_SPACE_ID", "1"))
async def get_auth_token(client: httpx.AsyncClient) -> str:
"""Log in and return a Bearer JWT token."""
response = await client.post(
"/auth/jwt/login",
data={"username": TEST_EMAIL, "password": TEST_PASSWORD},
headers={"Content-Type": "application/x-www-form-urlencoded"},
)
assert response.status_code == 200, (
f"Login failed ({response.status_code}): {response.text}"
)
return response.json()["access_token"]
def auth_headers(token: str) -> dict[str, str]:
"""Return Authorization header dict for a Bearer token."""
return {"Authorization": f"Bearer {token}"}
async def upload_file(
client: httpx.AsyncClient,
headers: dict[str, str],
fixture_name: str,
*,
search_space_id: int = TEST_SEARCH_SPACE_ID,
filename_override: str | None = None,
) -> httpx.Response:
"""Upload a single fixture file and return the raw response."""
file_path = FIXTURES_DIR / fixture_name
upload_name = filename_override or fixture_name
with open(file_path, "rb") as f:
return await client.post(
"/api/v1/documents/fileupload",
headers=headers,
files={"files": (upload_name, f)},
data={"search_space_id": str(search_space_id)},
)
async def upload_multiple_files(
client: httpx.AsyncClient,
headers: dict[str, str],
fixture_names: list[str],
*,
search_space_id: int = TEST_SEARCH_SPACE_ID,
) -> httpx.Response:
"""Upload multiple fixture files in a single request."""
files = []
open_handles = []
try:
for name in fixture_names:
fh = open(FIXTURES_DIR / name, "rb") # noqa: SIM115
open_handles.append(fh)
files.append(("files", (name, fh)))
return await client.post(
"/api/v1/documents/fileupload",
headers=headers,
files=files,
data={"search_space_id": str(search_space_id)},
)
finally:
for fh in open_handles:
fh.close()
async def poll_document_status(
client: httpx.AsyncClient,
headers: dict[str, str],
document_ids: list[int],
*,
search_space_id: int = TEST_SEARCH_SPACE_ID,
timeout: float = 180.0,
interval: float = 3.0,
) -> dict[int, dict]:
"""
Poll ``GET /api/v1/documents/status`` until every document reaches a
terminal state (``ready`` or ``failed``) or *timeout* seconds elapse.
Returns a mapping of ``{document_id: status_item_dict}``.
"""
ids_param = ",".join(str(d) for d in document_ids)
terminal_states = {"ready", "failed"}
elapsed = 0.0
while elapsed < timeout:
resp = await client.get(
"/api/v1/documents/status",
headers=headers,
params={
"search_space_id": search_space_id,
"document_ids": ids_param,
},
)
assert resp.status_code == 200, (
f"Status poll failed ({resp.status_code}): {resp.text}"
)
items = {item["id"]: item for item in resp.json()["items"]}
if all(
items.get(did, {}).get("status", {}).get("state") in terminal_states
for did in document_ids
):
return items
await asyncio.sleep(interval)
elapsed += interval
raise TimeoutError(
f"Documents {document_ids} did not reach terminal state within {timeout}s. "
f"Last status: {items}"
)
async def get_document(
client: httpx.AsyncClient,
headers: dict[str, str],
document_id: int,
) -> dict:
"""Fetch a single document by ID."""
resp = await client.get(
f"/api/v1/documents/{document_id}",
headers=headers,
)
assert resp.status_code == 200, (
f"GET document {document_id} failed ({resp.status_code}): {resp.text}"
)
return resp.json()
async def delete_document(
client: httpx.AsyncClient,
headers: dict[str, str],
document_id: int,
) -> httpx.Response:
"""Delete a document by ID, returning the raw response."""
return await client.delete(
f"/api/v1/documents/{document_id}",
headers=headers,
)