diff --git a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
index c471110fc..723c0e13b 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/adapters/test_file_upload_adapter.py
@@ -9,6 +9,7 @@ pytestmark = pytest.mark.integration
 
 @pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
 async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
+    """Document status is READY after successful indexing."""
     await index_uploaded_file(
         markdown_content="## Hello\n\nSome content.",
         filename="test.pdf",
@@ -29,6 +30,7 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
 
 @pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
 async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
+    """Document content is set to the LLM-generated summary."""
     await index_uploaded_file(
         markdown_content="## Hello\n\nSome content.",
         filename="test.pdf",
@@ -49,6 +51,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
 
 @pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
 async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker):
+    """Chunks derived from the source markdown are persisted in the DB."""
     await index_uploaded_file(
         markdown_content="## Hello\n\nSome content.",
         filename="test.pdf",
@@ -75,6 +78,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
 
 @pytest.mark.usefixtures("patched_summarize_raises", "patched_embed_text", "patched_chunk_text")
 async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
+    """RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
     with pytest.raises(RuntimeError):
         await index_uploaded_file(
             markdown_content="## Hello\n\nSome content.",
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
index 89bd722ee..7c5e1e4f4 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_document.py
@@ -11,6 +11,7 @@ pytestmark = pytest.mark.integration
 async def test_sets_status_ready(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """Document status is READY after successful indexing."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -30,6 +31,7 @@ async def test_sets_status_ready(
 async def test_content_is_summary_when_should_summarize_true(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """Document content is set to the LLM-generated summary when should_summarize=True."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -49,6 +51,7 @@ async def test_content_is_summary_when_should_summarize_true(
 async def test_content_is_source_markdown_when_should_summarize_false(
     db_session, db_search_space, make_connector_document,
 ):
+    """Document content is set to source_markdown verbatim when should_summarize=False."""
     connector_doc = make_connector_document(
         search_space_id=db_search_space.id,
         should_summarize=False,
@@ -72,6 +75,7 @@ async def test_content_is_source_markdown_when_should_summarize_false(
 async def test_chunks_written_to_db(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """Chunks derived from source_markdown are persisted in the DB."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -94,6 +98,7 @@ async def test_chunks_written_to_db(
 async def test_embedding_written_to_db(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """Document embedding vector is persisted in the DB after indexing."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -114,6 +119,7 @@ async def test_embedding_written_to_db(
 async def test_updated_at_advances_after_indexing(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """updated_at timestamp is later after indexing than it was at prepare time."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -136,6 +142,7 @@ async def test_updated_at_advances_after_indexing(
 async def test_no_llm_falls_back_to_source_markdown(
     db_session, db_search_space, make_connector_document,
 ):
+    """When llm=None and no fallback_summary, content falls back to source_markdown."""
     connector_doc = make_connector_document(
         search_space_id=db_search_space.id,
         should_summarize=True,
@@ -160,6 +167,7 @@ async def test_no_llm_falls_back_to_source_markdown(
 async def test_fallback_summary_used_when_llm_unavailable(
     db_session, db_search_space, make_connector_document,
 ):
+    """fallback_summary is used as content when llm=None and should_summarize=True."""
     connector_doc = make_connector_document(
         search_space_id=db_search_space.id,
         should_summarize=True,
@@ -184,6 +192,7 @@ async def test_fallback_summary_used_when_llm_unavailable(
 async def test_reindex_replaces_old_chunks(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """Re-indexing a document replaces its old chunks rather than appending."""
     connector_doc = make_connector_document(
         search_space_id=db_search_space.id,
         source_markdown="## v1",
@@ -215,6 +224,7 @@ async def test_reindex_replaces_old_chunks(
 async def test_llm_error_sets_status_failed(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """Document status is FAILED when the LLM raises during indexing."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -234,6 +244,7 @@ async def test_llm_error_sets_status_failed(
 async def test_llm_error_leaves_no_partial_data(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """A failed indexing attempt leaves no partial embedding or chunks in the DB."""
     connector_doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
index 8b66b8323..b6d257f7a 100644
--- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py
@@ -11,6 +11,7 @@ pytestmark = pytest.mark.integration
 async def test_new_document_is_persisted_with_pending_status(
     db_session, db_search_space, make_connector_document
 ):
+    """A new document is created in the DB with PENDING status and correct markdown."""
     doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -31,6 +32,7 @@ async def test_new_document_is_persisted_with_pending_status(
 async def test_unchanged_ready_document_is_skipped(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """A READY document with unchanged content is not returned for re-indexing."""
     doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -48,6 +50,7 @@ async def test_unchanged_ready_document_is_skipped(
 async def test_title_only_change_updates_title_in_db(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """A title-only change updates the DB title without re-queuing the document."""
     original = make_connector_document(search_space_id=db_search_space.id, title="Original Title")
     service = IndexingPipelineService(session=db_session)
 
@@ -69,6 +72,7 @@ async def test_title_only_change_updates_title_in_db(
 async def test_changed_content_is_returned_for_reprocessing(
     db_session, db_search_space, make_connector_document
 ):
+    """A document with changed content is returned for re-indexing with updated markdown."""
     original = make_connector_document(search_space_id=db_search_space.id, source_markdown="## v1")
     service = IndexingPipelineService(session=db_session)
 
@@ -91,6 +95,7 @@ async def test_changed_content_is_returned_for_reprocessing(
 async def test_all_documents_in_batch_are_persisted(
     db_session, db_search_space, make_connector_document
 ):
+    """All documents in a batch are persisted and returned."""
     docs = [
         make_connector_document(search_space_id=db_search_space.id, unique_id="id-1", title="Doc 1", source_markdown="## Content 1"),
         make_connector_document(search_space_id=db_search_space.id, unique_id="id-2", title="Doc 2", source_markdown="## Content 2"),
@@ -111,6 +116,7 @@ async def test_all_documents_in_batch_are_persisted(
 async def test_duplicate_in_batch_is_persisted_once(
     db_session, db_search_space, make_connector_document
 ):
+    """The same document passed twice in a batch is only persisted once."""
     doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -127,6 +133,7 @@ async def test_duplicate_in_batch_is_persisted_once(
 async def test_created_by_id_is_persisted(
     db_session, db_user, db_search_space, make_connector_document
 ):
+    """created_by_id from the connector document is persisted on the DB row."""
     doc = make_connector_document(
         search_space_id=db_search_space.id,
         created_by_id=str(db_user.id),
@@ -145,6 +152,7 @@ async def test_created_by_id_is_persisted(
 async def test_metadata_is_updated_when_content_changes(
     db_session, db_search_space, make_connector_document
 ):
+    """document_metadata is overwritten with the latest metadata when content changes."""
     original = make_connector_document(
         search_space_id=db_search_space.id,
         source_markdown="## v1",
@@ -171,6 +179,7 @@ async def test_metadata_is_updated_when_content_changes(
 async def test_updated_at_advances_when_title_only_changes(
     db_session, db_search_space, make_connector_document
 ):
+    """updated_at advances even when only the title changes."""
     original = make_connector_document(search_space_id=db_search_space.id, title="Old Title")
     service = IndexingPipelineService(session=db_session)
 
@@ -192,6 +201,7 @@ async def test_updated_at_advances_when_title_only_changes(
 async def test_updated_at_advances_when_content_changes(
     db_session, db_search_space, make_connector_document
 ):
+    """updated_at advances when document content changes."""
     original = make_connector_document(search_space_id=db_search_space.id, source_markdown="## v1")
     service = IndexingPipelineService(session=db_session)
 
@@ -213,6 +223,7 @@ async def test_updated_at_advances_when_content_changes(
 async def test_same_content_from_different_source_skipped_in_single_batch(
     db_session, db_search_space, make_connector_document
 ):
+    """Two documents with identical content in the same batch result in only one being persisted."""
     first = make_connector_document(
         search_space_id=db_search_space.id,
         unique_id="source-a",
@@ -238,6 +249,7 @@ async def test_same_content_from_different_source_skipped_in_single_batch(
 async def test_same_content_from_different_source_is_skipped(
     db_session, db_search_space, make_connector_document
 ):
+    """A document with content identical to an already-indexed document is skipped."""
     first = make_connector_document(
         search_space_id=db_search_space.id,
         unique_id="source-a",
@@ -265,6 +277,7 @@ async def test_same_content_from_different_source_is_skipped(
 async def test_failed_document_with_unchanged_content_is_requeued(
     db_session, db_search_space, make_connector_document, mocker,
 ):
+    """A FAILED document with unchanged content is re-queued as PENDING on the next run."""
     doc = make_connector_document(search_space_id=db_search_space.id)
     service = IndexingPipelineService(session=db_session)
 
@@ -289,6 +302,7 @@ async def test_failed_document_with_unchanged_content_is_requeued(
 async def test_title_and_content_change_updates_both_and_returns_document(
     db_session, db_search_space, make_connector_document
 ):
+    """When both title and content change, both are updated and the document is returned for re-indexing."""
     original = make_connector_document(
         search_space_id=db_search_space.id,
         title="Original Title",
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py
index fef691964..228777626 100644
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py
@@ -6,6 +6,7 @@ from app.indexing_pipeline.connector_document import ConnectorDocument
 
 
 def test_valid_document_created_with_required_fields():
+    """All optional fields default correctly when only required fields are supplied."""
     doc = ConnectorDocument(
         title="Task",
         source_markdown="## Task\n\nSome content.",
@@ -23,6 +24,7 @@ def test_valid_document_created_with_required_fields():
 
 
 def test_omitting_created_by_id_raises():
+    """Omitting created_by_id raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="Task",
@@ -35,6 +37,7 @@ def test_omitting_created_by_id_raises():
 
 
 def test_empty_source_markdown_raises():
+    """Empty source_markdown raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="Task",
@@ -46,6 +49,7 @@ def test_empty_source_markdown_raises():
 
 
 def test_whitespace_only_source_markdown_raises():
+    """Whitespace-only source_markdown raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="Task",
@@ -57,6 +61,7 @@ def test_whitespace_only_source_markdown_raises():
 
 
 def test_empty_title_raises():
+    """Empty title raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="",
@@ -68,6 +73,7 @@ def test_empty_title_raises():
 
 
 def test_empty_created_by_id_raises():
+    """Empty created_by_id raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="Task",
@@ -81,6 +87,7 @@ def test_empty_created_by_id_raises():
 
 
 def test_zero_search_space_id_raises():
+    """search_space_id of zero raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="Task",
@@ -94,6 +101,7 @@ def test_zero_search_space_id_raises():
 
 
 def test_empty_unique_id_raises():
+    """Empty unique_id raises a validation error."""
     with pytest.raises(ValidationError):
         ConnectorDocument(
             title="Task",
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_chunker.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_chunker.py
index 78d0641c1..9c52d503d 100644
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_chunker.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_chunker.py
@@ -7,6 +7,7 @@ pytestmark = pytest.mark.unit
 
 @pytest.mark.usefixtures("patched_chunker_instance", "patched_code_chunker_instance")
 def test_uses_code_chunker_when_flag_is_true():
+    """Code chunker is selected when use_code_chunker=True."""
     result = chunk_text("def foo(): pass", use_code_chunker=True)
 
     assert result == ["code chunk"]
@@ -14,6 +15,7 @@ def test_uses_code_chunker_when_flag_is_true():
 
 @pytest.mark.usefixtures("patched_chunker_instance", "patched_code_chunker_instance")
 def test_uses_default_chunker_when_flag_is_false():
+    """Default prose chunker is selected when use_code_chunker=False."""
     result = chunk_text("Some prose text.", use_code_chunker=False)
 
     assert result == ["prose chunk"]
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
index c8e2e97e9..6b7a47f51 100644
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
@@ -7,36 +7,42 @@ pytestmark = pytest.mark.unit
 
 
 def test_different_unique_id_produces_different_hash(make_connector_document):
+    """Two documents with different unique_ids produce different identifier hashes."""
     doc_a = make_connector_document(unique_id="id-001")
     doc_b = make_connector_document(unique_id="id-002")
     assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
 
 
 def test_different_search_space_produces_different_identifier_hash(make_connector_document):
+    """Same document in different search spaces produces different identifier hashes."""
     doc_a = make_connector_document(search_space_id=1)
     doc_b = make_connector_document(search_space_id=2)
     assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
 
 
 def test_different_document_type_produces_different_identifier_hash(make_connector_document):
+    """Same unique_id with different document types produces different identifier hashes."""
     doc_a = make_connector_document(document_type=DocumentType.CLICKUP_CONNECTOR)
     doc_b = make_connector_document(document_type=DocumentType.NOTION_CONNECTOR)
     assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
 
 
 def test_same_content_same_space_produces_same_content_hash(make_connector_document):
+    """Identical content in the same search space always produces the same content hash."""
     doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
     doc_b = make_connector_document(source_markdown="Hello world", search_space_id=1)
     assert compute_content_hash(doc_a) == compute_content_hash(doc_b)
 
 
 def test_same_content_different_space_produces_different_content_hash(make_connector_document):
+    """Identical content in different search spaces produces different content hashes."""
     doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
     doc_b = make_connector_document(source_markdown="Hello world", search_space_id=2)
     assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
 
 
 def test_different_content_produces_different_content_hash(make_connector_document):
+    """Different source markdown produces different content hashes."""
     doc_a = make_connector_document(source_markdown="Original content")
     doc_b = make_connector_document(source_markdown="Updated content")
     assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py
index 2f713d13d..a3a8ecfc2 100644
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py
@@ -8,6 +8,7 @@ pytestmark = pytest.mark.unit
 
 @pytest.mark.usefixtures("patched_summarizer_chain")
 async def test_without_metadata_returns_raw_summary():
+    """Summarizer returns the LLM output directly when no metadata is provided."""
     result = await summarize_document("# Content", llm=MagicMock(model="gpt-4"))
 
     assert result == "The summary."
@@ -15,6 +16,7 @@ async def test_without_metadata_returns_raw_summary():
 
 @pytest.mark.usefixtures("patched_summarizer_chain")
 async def test_with_metadata_includes_metadata_values_in_output():
+    """Non-empty metadata values are prepended to the summary output."""
     result = await summarize_document(
         "# Content",
         llm=MagicMock(model="gpt-4"),
@@ -27,6 +29,7 @@ async def test_with_metadata_includes_metadata_values_in_output():
 
 @pytest.mark.usefixtures("patched_summarizer_chain")
 async def test_with_metadata_omits_empty_fields_from_output():
+    """Empty metadata fields are omitted from the summary output."""
     result = await summarize_document(
         "# Content",
         llm=MagicMock(model="gpt-4"),