diff --git a/surfsense_backend/tests/__init__.py b/surfsense_backend/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/conftest.py b/surfsense_backend/tests/conftest.py new file mode 100644 index 000000000..0eb36016f --- /dev/null +++ b/surfsense_backend/tests/conftest.py @@ -0,0 +1,73 @@ +"""Root conftest — shared fixtures available to all test modules.""" + +from __future__ import annotations + +import contextlib +from collections.abc import AsyncGenerator + +import httpx +import pytest + +from tests.utils.helpers import ( + BACKEND_URL, + TEST_SEARCH_SPACE_ID, + auth_headers, + delete_document, + get_auth_token, +) + + +@pytest.fixture(scope="session") +def backend_url() -> str: + return BACKEND_URL + + +@pytest.fixture(scope="session") +def search_space_id() -> int: + return TEST_SEARCH_SPACE_ID + + +@pytest.fixture(scope="session") +async def auth_token(backend_url: str) -> str: + """Authenticate once per session and return the JWT token.""" + async with httpx.AsyncClient( + base_url=backend_url, timeout=30.0 + ) as client: + return await get_auth_token(client) + + +@pytest.fixture(scope="session") +def headers(auth_token: str) -> dict[str, str]: + """Authorization headers reused across all tests in the session.""" + return auth_headers(auth_token) + + +@pytest.fixture +async def client(backend_url: str) -> AsyncGenerator[httpx.AsyncClient]: + """Per-test async HTTP client pointing at the running backend.""" + async with httpx.AsyncClient( + base_url=backend_url, timeout=180.0 + ) as c: + yield c + + +@pytest.fixture +def cleanup_doc_ids() -> list[int]: + """Accumulator for document IDs that should be deleted after the test.""" + return [] + + +@pytest.fixture(autouse=True) +async def _cleanup_documents( + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], +): + """ + Runs after every test. Deletes any document IDs that were appended to + the ``cleanup_doc_ids`` list during the test body. + """ + yield + for doc_id in cleanup_doc_ids: + with contextlib.suppress(Exception): + await delete_document(client, headers, doc_id) diff --git a/surfsense_backend/tests/e2e/__init__.py b/surfsense_backend/tests/e2e/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/e2e/test_document_upload.py b/surfsense_backend/tests/e2e/test_document_upload.py new file mode 100644 index 000000000..d4540f0b6 --- /dev/null +++ b/surfsense_backend/tests/e2e/test_document_upload.py @@ -0,0 +1,487 @@ +""" +End-to-end tests for manual document upload. + +These tests exercise the full pipeline: + API upload → Celery task → ETL extraction → chunking → embedding → DB storage + +Prerequisites (must be running): + - FastAPI backend + - PostgreSQL + pgvector + - Redis + - Celery worker +""" + +from __future__ import annotations + +import shutil +from pathlib import Path + +import httpx + +from tests.utils.helpers import ( + FIXTURES_DIR, + TEST_SEARCH_SPACE_ID, + delete_document, + get_document, + poll_document_status, + upload_file, + upload_multiple_files, +) + +# --------------------------------------------------------------------------- +# Helpers local to this module +# --------------------------------------------------------------------------- + +def _assert_document_ready(doc: dict, *, expected_filename: str) -> None: + """Common assertions for a successfully processed document.""" + assert doc["title"] == expected_filename + assert doc["document_type"] == "FILE" + assert doc["content"], "Document content (summary) should not be empty" + assert doc["content_hash"], "content_hash should be set" + assert doc["document_metadata"].get("FILE_NAME") == expected_filename + assert doc["document_metadata"].get("ETL_SERVICE"), "ETL_SERVICE should be set" + if doc.get("status"): + assert doc["status"]["state"] == "ready" + + +# --------------------------------------------------------------------------- +# Test A: Upload a .txt file (direct read path — no ETL service needed) +# --------------------------------------------------------------------------- + + +class TestTxtFileUpload: + """Upload a plain-text file and verify the full pipeline.""" + + async def test_upload_txt_returns_document_id( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.txt") + assert resp.status_code == 200 + + body = resp.json() + assert body["pending_files"] >= 1 + assert len(body["document_ids"]) >= 1 + cleanup_doc_ids.extend(body["document_ids"]) + + async def test_txt_processing_reaches_ready( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.txt") + assert resp.status_code == 200 + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + statuses = await poll_document_status(client, headers, doc_ids) + for did in doc_ids: + assert statuses[did]["status"]["state"] == "ready" + + async def test_txt_document_fields_populated( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.txt") + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + await poll_document_status(client, headers, doc_ids) + + doc = await get_document(client, headers, doc_ids[0]) + _assert_document_ready(doc, expected_filename="sample.txt") + assert doc["document_metadata"]["ETL_SERVICE"] == "MARKDOWN" + + +# --------------------------------------------------------------------------- +# Test B: Upload a .md file (markdown direct-read path) +# --------------------------------------------------------------------------- + + +class TestMarkdownFileUpload: + """Upload a Markdown file and verify the full pipeline.""" + + async def test_md_processing_reaches_ready( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.md") + assert resp.status_code == 200 + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + statuses = await poll_document_status(client, headers, doc_ids) + for did in doc_ids: + assert statuses[did]["status"]["state"] == "ready" + + async def test_md_document_fields_populated( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.md") + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + await poll_document_status(client, headers, doc_ids) + + doc = await get_document(client, headers, doc_ids[0]) + _assert_document_ready(doc, expected_filename="sample.md") + assert doc["document_metadata"]["ETL_SERVICE"] == "MARKDOWN" + + +# --------------------------------------------------------------------------- +# Test C: Upload a .pdf file (ETL path — Docling / Unstructured) +# --------------------------------------------------------------------------- + + +class TestPdfFileUpload: + """Upload a PDF and verify it goes through the ETL extraction pipeline.""" + + async def test_pdf_processing_reaches_ready( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.pdf") + assert resp.status_code == 200 + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + statuses = await poll_document_status( + client, headers, doc_ids, timeout=300.0 + ) + for did in doc_ids: + assert statuses[did]["status"]["state"] == "ready" + + async def test_pdf_document_fields_populated( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.pdf") + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + await poll_document_status( + client, headers, doc_ids, timeout=300.0 + ) + + doc = await get_document(client, headers, doc_ids[0]) + _assert_document_ready(doc, expected_filename="sample.pdf") + assert doc["document_metadata"]["ETL_SERVICE"] in { + "DOCLING", + "UNSTRUCTURED", + "LLAMACLOUD", + } + + +# --------------------------------------------------------------------------- +# Test D: Upload multiple files in a single request +# --------------------------------------------------------------------------- + + +class TestMultiFileUpload: + """Upload several files at once and verify all are processed.""" + + async def test_multi_upload_returns_all_ids( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_multiple_files( + client, headers, ["sample.txt", "sample.md"] + ) + assert resp.status_code == 200 + + body = resp.json() + assert body["pending_files"] == 2 + assert len(body["document_ids"]) == 2 + cleanup_doc_ids.extend(body["document_ids"]) + + async def test_multi_upload_all_reach_ready( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_multiple_files( + client, headers, ["sample.txt", "sample.md"] + ) + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + statuses = await poll_document_status(client, headers, doc_ids) + for did in doc_ids: + assert statuses[did]["status"]["state"] == "ready" + + +# --------------------------------------------------------------------------- +# Test E: Duplicate file upload (same file uploaded twice) +# --------------------------------------------------------------------------- + + +class TestDuplicateFileUpload: + """ + Uploading the exact same file a second time should be detected as a + duplicate via ``unique_identifier_hash``. + """ + + async def test_duplicate_file_is_skipped( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + # First upload + resp1 = await upload_file(client, headers, "sample.txt") + assert resp1.status_code == 200 + first_ids = resp1.json()["document_ids"] + cleanup_doc_ids.extend(first_ids) + + await poll_document_status(client, headers, first_ids) + + # Second upload of the same file + resp2 = await upload_file(client, headers, "sample.txt") + assert resp2.status_code == 200 + + body2 = resp2.json() + assert body2["skipped_duplicates"] >= 1 + assert len(body2["duplicate_document_ids"]) >= 1 + cleanup_doc_ids.extend(body2.get("document_ids", [])) + + +# --------------------------------------------------------------------------- +# Test F: Duplicate content detection (different name, same content) +# --------------------------------------------------------------------------- + + +class TestDuplicateContentDetection: + """ + Uploading a file with a different name but identical content should be + detected as duplicate content via ``content_hash``. + """ + + async def test_same_content_different_name_detected( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + tmp_path: Path, + ): + # First upload + resp1 = await upload_file(client, headers, "sample.txt") + assert resp1.status_code == 200 + first_ids = resp1.json()["document_ids"] + cleanup_doc_ids.extend(first_ids) + await poll_document_status(client, headers, first_ids) + + # Copy fixture content to a differently named temp file + src = FIXTURES_DIR / "sample.txt" + dest = tmp_path / "renamed_sample.txt" + shutil.copy2(src, dest) + + with open(dest, "rb") as f: + resp2 = await client.post( + "/api/v1/documents/fileupload", + headers=headers, + files={"files": ("renamed_sample.txt", f)}, + data={"search_space_id": str(TEST_SEARCH_SPACE_ID)}, + ) + assert resp2.status_code == 200 + second_ids = resp2.json()["document_ids"] + cleanup_doc_ids.extend(second_ids) + + if second_ids: + statuses = await poll_document_status(client, headers, second_ids) + for did in second_ids: + assert statuses[did]["status"]["state"] == "failed" + assert "duplicate" in ( + statuses[did]["status"].get("reason", "").lower() + ) + + +# --------------------------------------------------------------------------- +# Test G: Empty / corrupt file handling +# --------------------------------------------------------------------------- + + +class TestEmptyFileUpload: + """An empty file should be processed but ultimately fail gracefully.""" + + async def test_empty_pdf_fails( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "empty.pdf") + assert resp.status_code == 200 + + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + if doc_ids: + statuses = await poll_document_status( + client, headers, doc_ids, timeout=120.0 + ) + for did in doc_ids: + assert statuses[did]["status"]["state"] == "failed" + assert statuses[did]["status"].get("reason"), ( + "Failed document should include a reason" + ) + + +# --------------------------------------------------------------------------- +# Test H: Upload without authentication +# --------------------------------------------------------------------------- + + +class TestUnauthenticatedUpload: + """Requests without a valid JWT should be rejected.""" + + async def test_upload_without_auth_returns_401( + self, + client: httpx.AsyncClient, + ): + file_path = FIXTURES_DIR / "sample.txt" + with open(file_path, "rb") as f: + resp = await client.post( + "/api/v1/documents/fileupload", + files={"files": ("sample.txt", f)}, + data={"search_space_id": str(TEST_SEARCH_SPACE_ID)}, + ) + assert resp.status_code == 401 + + +# --------------------------------------------------------------------------- +# Test I: Upload with no files attached +# --------------------------------------------------------------------------- + + +class TestNoFilesUpload: + """Submitting the form with zero files should return a validation error.""" + + async def test_no_files_returns_error( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + ): + resp = await client.post( + "/api/v1/documents/fileupload", + headers=headers, + data={"search_space_id": str(TEST_SEARCH_SPACE_ID)}, + ) + assert resp.status_code in {400, 422} + + +# --------------------------------------------------------------------------- +# Test J: Document deletion after successful upload +# --------------------------------------------------------------------------- + + +class TestDocumentDeletion: + """Upload, wait for ready, delete, then verify it's gone.""" + + async def test_delete_processed_document( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + ): + resp = await upload_file(client, headers, "sample.txt") + doc_ids = resp.json()["document_ids"] + await poll_document_status(client, headers, doc_ids) + + del_resp = await delete_document(client, headers, doc_ids[0]) + assert del_resp.status_code == 200 + + get_resp = await client.get( + f"/api/v1/documents/{doc_ids[0]}", + headers=headers, + ) + assert get_resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# Test K: Cannot delete a document while it is still processing +# --------------------------------------------------------------------------- + + +class TestDeleteWhileProcessing: + """Attempting to delete a pending/processing document should be rejected.""" + + async def test_delete_pending_document_returns_409( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.pdf") + assert resp.status_code == 200 + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + # Immediately try to delete before processing finishes + del_resp = await delete_document(client, headers, doc_ids[0]) + assert del_resp.status_code == 409 + + # Let it finish so cleanup can work + await poll_document_status( + client, headers, doc_ids, timeout=300.0 + ) + + +# --------------------------------------------------------------------------- +# Test L: Status polling returns correct structure +# --------------------------------------------------------------------------- + + +class TestStatusPolling: + """Verify the status endpoint returns well-formed responses.""" + + async def test_status_endpoint_returns_items( + self, + client: httpx.AsyncClient, + headers: dict[str, str], + cleanup_doc_ids: list[int], + ): + resp = await upload_file(client, headers, "sample.txt") + doc_ids = resp.json()["document_ids"] + cleanup_doc_ids.extend(doc_ids) + + status_resp = await client.get( + "/api/v1/documents/status", + headers=headers, + params={ + "search_space_id": TEST_SEARCH_SPACE_ID, + "document_ids": ",".join(str(d) for d in doc_ids), + }, + ) + assert status_resp.status_code == 200 + + body = status_resp.json() + assert "items" in body + assert len(body["items"]) == len(doc_ids) + for item in body["items"]: + assert "id" in item + assert "status" in item + assert "state" in item["status"] + assert item["status"]["state"] in { + "pending", + "processing", + "ready", + "failed", + } + + await poll_document_status(client, headers, doc_ids) diff --git a/surfsense_backend/tests/fixtures/empty.pdf b/surfsense_backend/tests/fixtures/empty.pdf new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/fixtures/sample.md b/surfsense_backend/tests/fixtures/sample.md new file mode 100644 index 000000000..7217540d8 --- /dev/null +++ b/surfsense_backend/tests/fixtures/sample.md @@ -0,0 +1,51 @@ +# SurfSense Test Document + +## Overview + +This is a **sample markdown document** used for end-to-end testing of the manual +document upload pipeline. It includes various markdown formatting elements. + +## Key Features + +- Document upload and processing +- Automatic chunking of content +- Embedding generation for semantic search +- Real-time status tracking via ElectricSQL + +## Technical Architecture + +### Backend Stack + +The SurfSense backend is built with: + +1. **FastAPI** for the REST API +2. **PostgreSQL** with pgvector for vector storage +3. **Celery** with Redis for background task processing +4. **Docling/Unstructured** for document parsing (ETL) + +### Processing Pipeline + +Documents go through a multi-stage pipeline: + +| Stage | Description | +|-------|-------------| +| Upload | File received via API endpoint | +| Parsing | Content extracted using ETL service | +| Chunking | Text split into semantic chunks | +| Embedding | Vector representations generated | +| Storage | Chunks stored with embeddings in pgvector | + +## Code Example + +```python +async def process_document(file_path: str) -> Document: + content = extract_content(file_path) + chunks = create_chunks(content) + embeddings = generate_embeddings(chunks) + return store_document(chunks, embeddings) +``` + +## Conclusion + +This document serves as a test fixture to validate the complete document processing +pipeline from upload through to chunk creation and embedding storage. diff --git a/surfsense_backend/tests/fixtures/sample.pdf b/surfsense_backend/tests/fixtures/sample.pdf new file mode 100644 index 000000000..228071def Binary files /dev/null and b/surfsense_backend/tests/fixtures/sample.pdf differ diff --git a/surfsense_backend/tests/fixtures/sample.txt b/surfsense_backend/tests/fixtures/sample.txt new file mode 100644 index 000000000..0ee6513b9 --- /dev/null +++ b/surfsense_backend/tests/fixtures/sample.txt @@ -0,0 +1,34 @@ +SurfSense Document Upload Test + +This is a sample text document used for end-to-end testing of the manual document +upload pipeline in SurfSense. The document contains multiple paragraphs to ensure +that the chunking system has enough content to work with. + +Artificial Intelligence and Machine Learning + +Artificial intelligence (AI) is a broad field of computer science concerned with +building smart machines capable of performing tasks that typically require human +intelligence. Machine learning is a subset of AI that enables systems to learn and +improve from experience without being explicitly programmed. + +Natural Language Processing + +Natural language processing (NLP) is a subfield of linguistics, computer science, +and artificial intelligence concerned with the interactions between computers and +human language. Key applications include machine translation, sentiment analysis, +text summarization, and question answering systems. + +Vector Databases and Semantic Search + +Vector databases store data as high-dimensional vectors, enabling efficient +similarity search operations. When combined with embedding models, they power +semantic search systems that understand the meaning behind queries rather than +relying on exact keyword matches. This technology is fundamental to modern +retrieval-augmented generation (RAG) systems. + +Document Processing Pipelines + +Modern document processing pipelines involve several stages: extraction, transformation, +chunking, embedding generation, and storage. Each stage plays a critical role in +converting raw documents into searchable, structured knowledge that can be retrieved +and used by AI systems for accurate information retrieval and generation. diff --git a/surfsense_backend/tests/utils/__init__.py b/surfsense_backend/tests/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/utils/helpers.py b/surfsense_backend/tests/utils/helpers.py new file mode 100644 index 000000000..7c68bfac5 --- /dev/null +++ b/surfsense_backend/tests/utils/helpers.py @@ -0,0 +1,157 @@ +"""Shared test helpers for authentication, polling, and cleanup.""" + +from __future__ import annotations + +import asyncio +import os +from pathlib import Path + +import httpx + +FIXTURES_DIR = Path(__file__).resolve().parent.parent / "fixtures" + +BACKEND_URL = os.environ.get("TEST_BACKEND_URL", "http://localhost:8000") +TEST_EMAIL = os.environ.get("TEST_USER_EMAIL", "testuser@surfsense.com") +TEST_PASSWORD = os.environ.get("TEST_USER_PASSWORD", "testpassword123") +TEST_SEARCH_SPACE_ID = int(os.environ.get("TEST_SEARCH_SPACE_ID", "1")) + + +async def get_auth_token(client: httpx.AsyncClient) -> str: + """Log in and return a Bearer JWT token.""" + response = await client.post( + "/auth/jwt/login", + data={"username": TEST_EMAIL, "password": TEST_PASSWORD}, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + assert response.status_code == 200, ( + f"Login failed ({response.status_code}): {response.text}" + ) + return response.json()["access_token"] + + +def auth_headers(token: str) -> dict[str, str]: + """Return Authorization header dict for a Bearer token.""" + return {"Authorization": f"Bearer {token}"} + + +async def upload_file( + client: httpx.AsyncClient, + headers: dict[str, str], + fixture_name: str, + *, + search_space_id: int = TEST_SEARCH_SPACE_ID, + filename_override: str | None = None, +) -> httpx.Response: + """Upload a single fixture file and return the raw response.""" + file_path = FIXTURES_DIR / fixture_name + upload_name = filename_override or fixture_name + with open(file_path, "rb") as f: + return await client.post( + "/api/v1/documents/fileupload", + headers=headers, + files={"files": (upload_name, f)}, + data={"search_space_id": str(search_space_id)}, + ) + + +async def upload_multiple_files( + client: httpx.AsyncClient, + headers: dict[str, str], + fixture_names: list[str], + *, + search_space_id: int = TEST_SEARCH_SPACE_ID, +) -> httpx.Response: + """Upload multiple fixture files in a single request.""" + files = [] + open_handles = [] + try: + for name in fixture_names: + fh = open(FIXTURES_DIR / name, "rb") # noqa: SIM115 + open_handles.append(fh) + files.append(("files", (name, fh))) + + return await client.post( + "/api/v1/documents/fileupload", + headers=headers, + files=files, + data={"search_space_id": str(search_space_id)}, + ) + finally: + for fh in open_handles: + fh.close() + + +async def poll_document_status( + client: httpx.AsyncClient, + headers: dict[str, str], + document_ids: list[int], + *, + search_space_id: int = TEST_SEARCH_SPACE_ID, + timeout: float = 180.0, + interval: float = 3.0, +) -> dict[int, dict]: + """ + Poll ``GET /api/v1/documents/status`` until every document reaches a + terminal state (``ready`` or ``failed``) or *timeout* seconds elapse. + + Returns a mapping of ``{document_id: status_item_dict}``. + """ + ids_param = ",".join(str(d) for d in document_ids) + terminal_states = {"ready", "failed"} + elapsed = 0.0 + + while elapsed < timeout: + resp = await client.get( + "/api/v1/documents/status", + headers=headers, + params={ + "search_space_id": search_space_id, + "document_ids": ids_param, + }, + ) + assert resp.status_code == 200, ( + f"Status poll failed ({resp.status_code}): {resp.text}" + ) + + items = {item["id"]: item for item in resp.json()["items"]} + if all( + items.get(did, {}).get("status", {}).get("state") in terminal_states + for did in document_ids + ): + return items + + await asyncio.sleep(interval) + elapsed += interval + + raise TimeoutError( + f"Documents {document_ids} did not reach terminal state within {timeout}s. " + f"Last status: {items}" + ) + + +async def get_document( + client: httpx.AsyncClient, + headers: dict[str, str], + document_id: int, +) -> dict: + """Fetch a single document by ID.""" + resp = await client.get( + f"/api/v1/documents/{document_id}", + headers=headers, + ) + assert resp.status_code == 200, ( + f"GET document {document_id} failed ({resp.status_code}): {resp.text}" + ) + return resp.json() + + +async def delete_document( + client: httpx.AsyncClient, + headers: dict[str, str], + document_id: int, +) -> httpx.Response: + """Delete a document by ID, returning the raw response.""" + return await client.delete( + f"/api/v1/documents/{document_id}", + headers=headers, + )