add docstrings to all indexing pipeline tests

2026-04-27 01:36:30 +02:00 · 2026-02-25 20:30:31 +02:00 · 2026-02-25 20:30:31 +02:00 · 0de74f4bf7
commit 0de74f4bf7
parent 4293910e8e
7 changed files with 48 additions and 0 deletions
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py
@ -6,6 +6,7 @@ from app.indexing_pipeline.connector_document import ConnectorDocument


 def test_valid_document_created_with_required_fields():
+    """All optional fields default correctly when only required fields are supplied."""
    doc = ConnectorDocument(
        title="Task",
        source_markdown="## Task\n\nSome content.",
@ -23,6 +24,7 @@ def test_valid_document_created_with_required_fields():


 def test_omitting_created_by_id_raises():
+    """Omitting created_by_id raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="Task",
@ -35,6 +37,7 @@ def test_omitting_created_by_id_raises():


 def test_empty_source_markdown_raises():
+    """Empty source_markdown raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="Task",
@ -46,6 +49,7 @@ def test_empty_source_markdown_raises():


 def test_whitespace_only_source_markdown_raises():
+    """Whitespace-only source_markdown raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="Task",
@ -57,6 +61,7 @@ def test_whitespace_only_source_markdown_raises():


 def test_empty_title_raises():
+    """Empty title raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="",
@ -68,6 +73,7 @@ def test_empty_title_raises():


 def test_empty_created_by_id_raises():
+    """Empty created_by_id raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="Task",
@ -81,6 +87,7 @@ def test_empty_created_by_id_raises():


 def test_zero_search_space_id_raises():
+    """search_space_id of zero raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="Task",
@ -94,6 +101,7 @@ def test_zero_search_space_id_raises():


 def test_empty_unique_id_raises():
+    """Empty unique_id raises a validation error."""
    with pytest.raises(ValidationError):
        ConnectorDocument(
            title="Task",
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_chunker.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_chunker.py
@ -7,6 +7,7 @@ pytestmark = pytest.mark.unit

@pytest.mark.usefixtures("patched_chunker_instance", "patched_code_chunker_instance")
 def test_uses_code_chunker_when_flag_is_true():
+    """Code chunker is selected when use_code_chunker=True."""
    result = chunk_text("def foo(): pass", use_code_chunker=True)

    assert result == ["code chunk"]
@ -14,6 +15,7 @@ def test_uses_code_chunker_when_flag_is_true():

@pytest.mark.usefixtures("patched_chunker_instance", "patched_code_chunker_instance")
 def test_uses_default_chunker_when_flag_is_false():
+    """Default prose chunker is selected when use_code_chunker=False."""
    result = chunk_text("Some prose text.", use_code_chunker=False)

    assert result == ["prose chunk"]
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
@ -7,36 +7,42 @@ pytestmark = pytest.mark.unit


 def test_different_unique_id_produces_different_hash(make_connector_document):
+    """Two documents with different unique_ids produce different identifier hashes."""
    doc_a = make_connector_document(unique_id="id-001")
    doc_b = make_connector_document(unique_id="id-002")
    assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)


 def test_different_search_space_produces_different_identifier_hash(make_connector_document):
+    """Same document in different search spaces produces different identifier hashes."""
    doc_a = make_connector_document(search_space_id=1)
    doc_b = make_connector_document(search_space_id=2)
    assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)


 def test_different_document_type_produces_different_identifier_hash(make_connector_document):
+    """Same unique_id with different document types produces different identifier hashes."""
    doc_a = make_connector_document(document_type=DocumentType.CLICKUP_CONNECTOR)
    doc_b = make_connector_document(document_type=DocumentType.NOTION_CONNECTOR)
    assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)


 def test_same_content_same_space_produces_same_content_hash(make_connector_document):
+    """Identical content in the same search space always produces the same content hash."""
    doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
    doc_b = make_connector_document(source_markdown="Hello world", search_space_id=1)
    assert compute_content_hash(doc_a) == compute_content_hash(doc_b)


 def test_same_content_different_space_produces_different_content_hash(make_connector_document):
+    """Identical content in different search spaces produces different content hashes."""
    doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
    doc_b = make_connector_document(source_markdown="Hello world", search_space_id=2)
    assert compute_content_hash(doc_a) != compute_content_hash(doc_b)


 def test_different_content_produces_different_content_hash(make_connector_document):
+    """Different source markdown produces different content hashes."""
    doc_a = make_connector_document(source_markdown="Original content")
    doc_b = make_connector_document(source_markdown="Updated content")
    assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
--- a/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_summarizer.py
@ -8,6 +8,7 @@ pytestmark = pytest.mark.unit

@pytest.mark.usefixtures("patched_summarizer_chain")
 async def test_without_metadata_returns_raw_summary():
+    """Summarizer returns the LLM output directly when no metadata is provided."""
    result = await summarize_document("# Content", llm=MagicMock(model="gpt-4"))

    assert result == "The summary."
@ -15,6 +16,7 @@ async def test_without_metadata_returns_raw_summary():

@pytest.mark.usefixtures("patched_summarizer_chain")
 async def test_with_metadata_includes_metadata_values_in_output():
+    """Non-empty metadata values are prepended to the summary output."""
    result = await summarize_document(
        "# Content",
        llm=MagicMock(model="gpt-4"),
@ -27,6 +29,7 @@ async def test_with_metadata_includes_metadata_values_in_output():

@pytest.mark.usefixtures("patched_summarizer_chain")
 async def test_with_metadata_omits_empty_fields_from_output():
+    """Empty metadata fields are omitted from the summary output."""
    result = await summarize_document(
        "# Content",
        llm=MagicMock(model="gpt-4"),