diff --git a/surfsense_backend/tests/integration/document_upload/test_document_upload.py b/surfsense_backend/tests/integration/document_upload/test_document_upload.py
index 1a9433a8f..49ba9ce0a 100644
--- a/surfsense_backend/tests/integration/document_upload/test_document_upload.py
+++ b/surfsense_backend/tests/integration/document_upload/test_document_upload.py
@@ -1,8 +1,10 @@
 """
-Integration tests for manual document upload.
+Integration tests for manual document upload - HTTP API layer.
 
-These tests exercise the full pipeline via the HTTP API:
-  API upload → inline task dispatch → ETL extraction → chunking → embedding → DB storage
+Each test verifies a distinct user-facing behavior through the public HTTP
+endpoints.  Pipeline internals (indexing, chunking, embedding) are covered by
+the ``indexing_pipeline`` test suite; this module focuses on the API contract,
+error handling, auth, and cross-cutting concerns like duplicate detection.
 
 External boundaries mocked: LLM summarization, text embedding, text chunking,
 Redis heartbeat. Task dispatch is swapped via DI (InlineTaskDispatcher).
@@ -21,8 +23,6 @@ import pytest
 
 from tests.utils.helpers import (
     FIXTURES_DIR,
-    delete_document,
-    get_document,
     poll_document_status,
     upload_file,
     upload_multiple_files,
@@ -30,27 +30,14 @@ from tests.utils.helpers import (
 
 pytestmark = pytest.mark.integration
 
-# ---------------------------------------------------------------------------
-# Helpers local to this module
-# ---------------------------------------------------------------------------
-
-
-def _assert_document_ready(doc: dict, *, expected_filename: str) -> None:
-    """Common assertions for a successfully processed document."""
-    assert doc["title"] == expected_filename
-    assert doc["document_type"] == "FILE"
-    assert doc["content"], "Document content (summary) should not be empty"
-    assert doc["content_hash"], "content_hash should be set"
-    assert doc["document_metadata"].get("FILE_NAME") == expected_filename
-
 
 # ---------------------------------------------------------------------------
-# Test A: Upload a .txt file (direct read path)
+# Upload smoke tests (one per distinct code-path: direct-read & ETL)
 # ---------------------------------------------------------------------------
 
 
 class TestTxtFileUpload:
-    """Upload a plain-text file and verify the full pipeline."""
+    """Upload a plain-text file (direct-read path) via the HTTP API."""
 
     async def test_upload_txt_returns_document_id(
         self,
@@ -89,83 +76,9 @@ class TestTxtFileUpload:
         for did in doc_ids:
             assert statuses[did]["status"]["state"] == "ready"
 
-    async def test_txt_document_fields_populated(
-        self,
-        client: httpx.AsyncClient,
-        headers: dict[str, str],
-        search_space_id: int,
-        cleanup_doc_ids: list[int],
-    ):
-        resp = await upload_file(
-            client, headers, "sample.txt", search_space_id=search_space_id
-        )
-        doc_ids = resp.json()["document_ids"]
-        cleanup_doc_ids.extend(doc_ids)
-
-        await poll_document_status(
-            client, headers, doc_ids, search_space_id=search_space_id
-        )
-
-        doc = await get_document(client, headers, doc_ids[0])
-        _assert_document_ready(doc, expected_filename="sample.txt")
-
-
-# ---------------------------------------------------------------------------
-# Test B: Upload a .md file (markdown direct-read path)
-# ---------------------------------------------------------------------------
-
-
-class TestMarkdownFileUpload:
-    """Upload a Markdown file and verify the full pipeline."""
-
-    async def test_md_processing_reaches_ready(
-        self,
-        client: httpx.AsyncClient,
-        headers: dict[str, str],
-        search_space_id: int,
-        cleanup_doc_ids: list[int],
-    ):
-        resp = await upload_file(
-            client, headers, "sample.md", search_space_id=search_space_id
-        )
-        assert resp.status_code == 200
-        doc_ids = resp.json()["document_ids"]
-        cleanup_doc_ids.extend(doc_ids)
-
-        statuses = await poll_document_status(
-            client, headers, doc_ids, search_space_id=search_space_id
-        )
-        for did in doc_ids:
-            assert statuses[did]["status"]["state"] == "ready"
-
-    async def test_md_document_fields_populated(
-        self,
-        client: httpx.AsyncClient,
-        headers: dict[str, str],
-        search_space_id: int,
-        cleanup_doc_ids: list[int],
-    ):
-        resp = await upload_file(
-            client, headers, "sample.md", search_space_id=search_space_id
-        )
-        doc_ids = resp.json()["document_ids"]
-        cleanup_doc_ids.extend(doc_ids)
-
-        await poll_document_status(
-            client, headers, doc_ids, search_space_id=search_space_id
-        )
-
-        doc = await get_document(client, headers, doc_ids[0])
-        _assert_document_ready(doc, expected_filename="sample.md")
-
-
-# ---------------------------------------------------------------------------
-# Test C: Upload a .pdf file (ETL path)
-# ---------------------------------------------------------------------------
-
 
 class TestPdfFileUpload:
-    """Upload a PDF and verify it goes through the ETL extraction pipeline."""
+    """Upload a PDF (ETL extraction path) via the HTTP API."""
 
     async def test_pdf_processing_reaches_ready(
         self,
@@ -187,26 +100,6 @@ class TestPdfFileUpload:
         for did in doc_ids:
             assert statuses[did]["status"]["state"] == "ready"
 
-    async def test_pdf_document_fields_populated(
-        self,
-        client: httpx.AsyncClient,
-        headers: dict[str, str],
-        search_space_id: int,
-        cleanup_doc_ids: list[int],
-    ):
-        resp = await upload_file(
-            client, headers, "sample.pdf", search_space_id=search_space_id
-        )
-        doc_ids = resp.json()["document_ids"]
-        cleanup_doc_ids.extend(doc_ids)
-
-        await poll_document_status(
-            client, headers, doc_ids, search_space_id=search_space_id, timeout=300.0
-        )
-
-        doc = await get_document(client, headers, doc_ids[0])
-        _assert_document_ready(doc, expected_filename="sample.pdf")
-
 
 # ---------------------------------------------------------------------------
 # Test D: Upload multiple files in a single request
@@ -214,7 +107,7 @@ class TestPdfFileUpload:
 
 
 class TestMultiFileUpload:
-    """Upload several files at once and verify all are processed."""
+    """Upload several files at once and verify the API response contract."""
 
     async def test_multi_upload_returns_all_ids(
         self,
@@ -236,28 +129,6 @@ class TestMultiFileUpload:
         assert len(body["document_ids"]) == 2
         cleanup_doc_ids.extend(body["document_ids"])
 
-    async def test_multi_upload_all_reach_ready(
-        self,
-        client: httpx.AsyncClient,
-        headers: dict[str, str],
-        search_space_id: int,
-        cleanup_doc_ids: list[int],
-    ):
-        resp = await upload_multiple_files(
-            client,
-            headers,
-            ["sample.txt", "sample.md"],
-            search_space_id=search_space_id,
-        )
-        doc_ids = resp.json()["document_ids"]
-        cleanup_doc_ids.extend(doc_ids)
-
-        statuses = await poll_document_status(
-            client, headers, doc_ids, search_space_id=search_space_id
-        )
-        for did in doc_ids:
-            assert statuses[did]["status"]["state"] == "ready"
-
 
 # ---------------------------------------------------------------------------
 # Test E: Duplicate file upload (same file uploaded twice)
@@ -433,38 +304,6 @@ class TestNoFilesUpload:
         assert resp.status_code in {400, 422}
 
 
-# ---------------------------------------------------------------------------
-# Test J: Document deletion after successful upload
-# ---------------------------------------------------------------------------
-
-
-class TestDocumentDeletion:
-    """Upload, wait for ready, delete, then verify it's gone."""
-
-    async def test_delete_processed_document(
-        self,
-        client: httpx.AsyncClient,
-        headers: dict[str, str],
-        search_space_id: int,
-    ):
-        resp = await upload_file(
-            client, headers, "sample.txt", search_space_id=search_space_id
-        )
-        doc_ids = resp.json()["document_ids"]
-        await poll_document_status(
-            client, headers, doc_ids, search_space_id=search_space_id
-        )
-
-        del_resp = await delete_document(client, headers, doc_ids[0])
-        assert del_resp.status_code == 200
-
-        get_resp = await client.get(
-            f"/api/v1/documents/{doc_ids[0]}",
-            headers=headers,
-        )
-        assert get_resp.status_code == 404
-
-
 # ---------------------------------------------------------------------------
 # Test K: Searchability after upload
 # ---------------------------------------------------------------------------