diff --git a/surfsense_backend/tests/conftest.py b/surfsense_backend/tests/conftest.py index 0eb36016f..bfdb47a95 100644 --- a/surfsense_backend/tests/conftest.py +++ b/surfsense_backend/tests/conftest.py @@ -10,10 +10,10 @@ import pytest from tests.utils.helpers import ( BACKEND_URL, - TEST_SEARCH_SPACE_ID, auth_headers, delete_document, get_auth_token, + get_search_space_id, ) @@ -22,20 +22,24 @@ def backend_url() -> str: return BACKEND_URL -@pytest.fixture(scope="session") -def search_space_id() -> int: - return TEST_SEARCH_SPACE_ID - - @pytest.fixture(scope="session") async def auth_token(backend_url: str) -> str: - """Authenticate once per session and return the JWT token.""" + """Authenticate once per session, registering the user if needed.""" async with httpx.AsyncClient( base_url=backend_url, timeout=30.0 ) as client: return await get_auth_token(client) +@pytest.fixture(scope="session") +async def search_space_id(backend_url: str, auth_token: str) -> int: + """Discover the first search space belonging to the test user.""" + async with httpx.AsyncClient( + base_url=backend_url, timeout=30.0 + ) as client: + return await get_search_space_id(client, auth_token) + + @pytest.fixture(scope="session") def headers(auth_token: str) -> dict[str, str]: """Authorization headers reused across all tests in the session.""" diff --git a/surfsense_backend/tests/e2e/test_document_upload.py b/surfsense_backend/tests/e2e/test_document_upload.py index d4540f0b6..08dc3bd6e 100644 --- a/surfsense_backend/tests/e2e/test_document_upload.py +++ b/surfsense_backend/tests/e2e/test_document_upload.py @@ -20,7 +20,6 @@ import httpx from tests.utils.helpers import ( FIXTURES_DIR, - TEST_SEARCH_SPACE_ID, delete_document, get_document, poll_document_status, @@ -56,9 +55,10 @@ class TestTxtFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.txt") + resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) assert resp.status_code == 200 body = resp.json() @@ -70,14 +70,15 @@ class TestTxtFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.txt") + resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) assert resp.status_code == 200 doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) - statuses = await poll_document_status(client, headers, doc_ids) + statuses = await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) for did in doc_ids: assert statuses[did]["status"]["state"] == "ready" @@ -85,13 +86,14 @@ class TestTxtFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.txt") + resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) - await poll_document_status(client, headers, doc_ids) + await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) doc = await get_document(client, headers, doc_ids[0]) _assert_document_ready(doc, expected_filename="sample.txt") @@ -110,14 +112,15 @@ class TestMarkdownFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.md") + resp = await upload_file(client, headers, "sample.md", search_space_id=search_space_id) assert resp.status_code == 200 doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) - statuses = await poll_document_status(client, headers, doc_ids) + statuses = await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) for did in doc_ids: assert statuses[did]["status"]["state"] == "ready" @@ -125,13 +128,14 @@ class TestMarkdownFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.md") + resp = await upload_file(client, headers, "sample.md", search_space_id=search_space_id) doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) - await poll_document_status(client, headers, doc_ids) + await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) doc = await get_document(client, headers, doc_ids[0]) _assert_document_ready(doc, expected_filename="sample.md") @@ -150,15 +154,16 @@ class TestPdfFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.pdf") + resp = await upload_file(client, headers, "sample.pdf", search_space_id=search_space_id) assert resp.status_code == 200 doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) statuses = await poll_document_status( - client, headers, doc_ids, timeout=300.0 + client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0 ) for did in doc_ids: assert statuses[did]["status"]["state"] == "ready" @@ -167,14 +172,15 @@ class TestPdfFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.pdf") + resp = await upload_file(client, headers, "sample.pdf", search_space_id=search_space_id) doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) await poll_document_status( - client, headers, doc_ids, timeout=300.0 + client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0 ) doc = await get_document(client, headers, doc_ids[0]) @@ -198,10 +204,11 @@ class TestMultiFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): resp = await upload_multiple_files( - client, headers, ["sample.txt", "sample.md"] + client, headers, ["sample.txt", "sample.md"], search_space_id=search_space_id ) assert resp.status_code == 200 @@ -214,15 +221,16 @@ class TestMultiFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): resp = await upload_multiple_files( - client, headers, ["sample.txt", "sample.md"] + client, headers, ["sample.txt", "sample.md"], search_space_id=search_space_id ) doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) - statuses = await poll_document_status(client, headers, doc_ids) + statuses = await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) for did in doc_ids: assert statuses[did]["status"]["state"] == "ready" @@ -242,18 +250,19 @@ class TestDuplicateFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): # First upload - resp1 = await upload_file(client, headers, "sample.txt") + resp1 = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) assert resp1.status_code == 200 first_ids = resp1.json()["document_ids"] cleanup_doc_ids.extend(first_ids) - await poll_document_status(client, headers, first_ids) + await poll_document_status(client, headers, first_ids, search_space_id=search_space_id) # Second upload of the same file - resp2 = await upload_file(client, headers, "sample.txt") + resp2 = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) assert resp2.status_code == 200 body2 = resp2.json() @@ -277,15 +286,16 @@ class TestDuplicateContentDetection: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], tmp_path: Path, ): # First upload - resp1 = await upload_file(client, headers, "sample.txt") + resp1 = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) assert resp1.status_code == 200 first_ids = resp1.json()["document_ids"] cleanup_doc_ids.extend(first_ids) - await poll_document_status(client, headers, first_ids) + await poll_document_status(client, headers, first_ids, search_space_id=search_space_id) # Copy fixture content to a differently named temp file src = FIXTURES_DIR / "sample.txt" @@ -297,14 +307,14 @@ class TestDuplicateContentDetection: "/api/v1/documents/fileupload", headers=headers, files={"files": ("renamed_sample.txt", f)}, - data={"search_space_id": str(TEST_SEARCH_SPACE_ID)}, + data={"search_space_id": str(search_space_id)}, ) assert resp2.status_code == 200 second_ids = resp2.json()["document_ids"] cleanup_doc_ids.extend(second_ids) if second_ids: - statuses = await poll_document_status(client, headers, second_ids) + statuses = await poll_document_status(client, headers, second_ids, search_space_id=search_space_id) for did in second_ids: assert statuses[did]["status"]["state"] == "failed" assert "duplicate" in ( @@ -324,9 +334,10 @@ class TestEmptyFileUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "empty.pdf") + resp = await upload_file(client, headers, "empty.pdf", search_space_id=search_space_id) assert resp.status_code == 200 doc_ids = resp.json()["document_ids"] @@ -334,7 +345,7 @@ class TestEmptyFileUpload: if doc_ids: statuses = await poll_document_status( - client, headers, doc_ids, timeout=120.0 + client, headers, doc_ids, search_space_id=search_space_id, timeout=120.0 ) for did in doc_ids: assert statuses[did]["status"]["state"] == "failed" @@ -354,13 +365,14 @@ class TestUnauthenticatedUpload: async def test_upload_without_auth_returns_401( self, client: httpx.AsyncClient, + search_space_id: int, ): file_path = FIXTURES_DIR / "sample.txt" with open(file_path, "rb") as f: resp = await client.post( "/api/v1/documents/fileupload", files={"files": ("sample.txt", f)}, - data={"search_space_id": str(TEST_SEARCH_SPACE_ID)}, + data={"search_space_id": str(search_space_id)}, ) assert resp.status_code == 401 @@ -377,11 +389,12 @@ class TestNoFilesUpload: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, ): resp = await client.post( "/api/v1/documents/fileupload", headers=headers, - data={"search_space_id": str(TEST_SEARCH_SPACE_ID)}, + data={"search_space_id": str(search_space_id)}, ) assert resp.status_code in {400, 422} @@ -398,10 +411,11 @@ class TestDocumentDeletion: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, ): - resp = await upload_file(client, headers, "sample.txt") + resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) doc_ids = resp.json()["document_ids"] - await poll_document_status(client, headers, doc_ids) + await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) del_resp = await delete_document(client, headers, doc_ids[0]) assert del_resp.status_code == 200 @@ -425,9 +439,10 @@ class TestDeleteWhileProcessing: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.pdf") + resp = await upload_file(client, headers, "sample.pdf", search_space_id=search_space_id) assert resp.status_code == 200 doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) @@ -438,7 +453,7 @@ class TestDeleteWhileProcessing: # Let it finish so cleanup can work await poll_document_status( - client, headers, doc_ids, timeout=300.0 + client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0 ) @@ -454,9 +469,10 @@ class TestStatusPolling: self, client: httpx.AsyncClient, headers: dict[str, str], + search_space_id: int, cleanup_doc_ids: list[int], ): - resp = await upload_file(client, headers, "sample.txt") + resp = await upload_file(client, headers, "sample.txt", search_space_id=search_space_id) doc_ids = resp.json()["document_ids"] cleanup_doc_ids.extend(doc_ids) @@ -464,7 +480,7 @@ class TestStatusPolling: "/api/v1/documents/status", headers=headers, params={ - "search_space_id": TEST_SEARCH_SPACE_ID, + "search_space_id": search_space_id, "document_ids": ",".join(str(d) for d in doc_ids), }, ) @@ -484,4 +500,4 @@ class TestStatusPolling: "failed", } - await poll_document_status(client, headers, doc_ids) + await poll_document_status(client, headers, doc_ids, search_space_id=search_space_id) diff --git a/surfsense_backend/tests/utils/helpers.py b/surfsense_backend/tests/utils/helpers.py index 7c68bfac5..c1b362ec7 100644 --- a/surfsense_backend/tests/utils/helpers.py +++ b/surfsense_backend/tests/utils/helpers.py @@ -13,22 +13,51 @@ FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures" BACKEND_URL = os.environ.get("TEST_BACKEND_URL", "http://localhost:8000") TEST_EMAIL = os.environ.get("TEST_USER_EMAIL", "testuser@surfsense.com") TEST_PASSWORD = os.environ.get("TEST_USER_PASSWORD", "testpassword123") -TEST_SEARCH_SPACE_ID = int(os.environ.get("TEST_SEARCH_SPACE_ID", "1")) async def get_auth_token(client: httpx.AsyncClient) -> str: - """Log in and return a Bearer JWT token.""" + """Log in and return a Bearer JWT token, registering the user first if needed.""" + response = await client.post( + "/auth/jwt/login", + data={"username": TEST_EMAIL, "password": TEST_PASSWORD}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + if response.status_code == 200: + return response.json()["access_token"] + + reg_response = await client.post( + "/auth/register", + json={"email": TEST_EMAIL, "password": TEST_PASSWORD}, + ) + assert reg_response.status_code == 201, ( + f"Registration failed ({reg_response.status_code}): {reg_response.text}" + ) + response = await client.post( "/auth/jwt/login", data={"username": TEST_EMAIL, "password": TEST_PASSWORD}, headers={"Content-Type": "application/x-www-form-urlencoded"}, ) assert response.status_code == 200, ( - f"Login failed ({response.status_code}): {response.text}" + f"Login after registration failed ({response.status_code}): {response.text}" ) return response.json()["access_token"] +async def get_search_space_id(client: httpx.AsyncClient, token: str) -> int: + """Fetch the first search space owned by the test user.""" + resp = await client.get( + "/api/v1/searchspaces", + headers=auth_headers(token), + ) + assert resp.status_code == 200, ( + f"Failed to list search spaces ({resp.status_code}): {resp.text}" + ) + spaces = resp.json() + assert len(spaces) > 0, "No search spaces found for test user" + return spaces[0]["id"] + + def auth_headers(token: str) -> dict[str, str]: """Return Authorization header dict for a Bearer token.""" return {"Authorization": f"Bearer {token}"} @@ -39,7 +68,7 @@ async def upload_file( headers: dict[str, str], fixture_name: str, *, - search_space_id: int = TEST_SEARCH_SPACE_ID, + search_space_id: int, filename_override: str | None = None, ) -> httpx.Response: """Upload a single fixture file and return the raw response.""" @@ -59,7 +88,7 @@ async def upload_multiple_files( headers: dict[str, str], fixture_names: list[str], *, - search_space_id: int = TEST_SEARCH_SPACE_ID, + search_space_id: int, ) -> httpx.Response: """Upload multiple fixture files in a single request.""" files = [] @@ -86,7 +115,7 @@ async def poll_document_status( headers: dict[str, str], document_ids: list[int], *, - search_space_id: int = TEST_SEARCH_SPACE_ID, + search_space_id: int, timeout: float = 180.0, interval: float = 3.0, ) -> dict[int, dict]: