mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 01:36:30 +02:00
add docstrings to all indexing pipeline tests
This commit is contained in:
parent
4293910e8e
commit
0de74f4bf7
7 changed files with 48 additions and 0 deletions
|
|
@ -6,6 +6,7 @@ from app.indexing_pipeline.connector_document import ConnectorDocument
|
|||
|
||||
|
||||
def test_valid_document_created_with_required_fields():
|
||||
"""All optional fields default correctly when only required fields are supplied."""
|
||||
doc = ConnectorDocument(
|
||||
title="Task",
|
||||
source_markdown="## Task\n\nSome content.",
|
||||
|
|
@ -23,6 +24,7 @@ def test_valid_document_created_with_required_fields():
|
|||
|
||||
|
||||
def test_omitting_created_by_id_raises():
|
||||
"""Omitting created_by_id raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="Task",
|
||||
|
|
@ -35,6 +37,7 @@ def test_omitting_created_by_id_raises():
|
|||
|
||||
|
||||
def test_empty_source_markdown_raises():
|
||||
"""Empty source_markdown raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="Task",
|
||||
|
|
@ -46,6 +49,7 @@ def test_empty_source_markdown_raises():
|
|||
|
||||
|
||||
def test_whitespace_only_source_markdown_raises():
|
||||
"""Whitespace-only source_markdown raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="Task",
|
||||
|
|
@ -57,6 +61,7 @@ def test_whitespace_only_source_markdown_raises():
|
|||
|
||||
|
||||
def test_empty_title_raises():
|
||||
"""Empty title raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="",
|
||||
|
|
@ -68,6 +73,7 @@ def test_empty_title_raises():
|
|||
|
||||
|
||||
def test_empty_created_by_id_raises():
|
||||
"""Empty created_by_id raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="Task",
|
||||
|
|
@ -81,6 +87,7 @@ def test_empty_created_by_id_raises():
|
|||
|
||||
|
||||
def test_zero_search_space_id_raises():
|
||||
"""search_space_id of zero raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="Task",
|
||||
|
|
@ -94,6 +101,7 @@ def test_zero_search_space_id_raises():
|
|||
|
||||
|
||||
def test_empty_unique_id_raises():
|
||||
"""Empty unique_id raises a validation error."""
|
||||
with pytest.raises(ValidationError):
|
||||
ConnectorDocument(
|
||||
title="Task",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ pytestmark = pytest.mark.unit
|
|||
|
||||
@pytest.mark.usefixtures("patched_chunker_instance", "patched_code_chunker_instance")
|
||||
def test_uses_code_chunker_when_flag_is_true():
|
||||
"""Code chunker is selected when use_code_chunker=True."""
|
||||
result = chunk_text("def foo(): pass", use_code_chunker=True)
|
||||
|
||||
assert result == ["code chunk"]
|
||||
|
|
@ -14,6 +15,7 @@ def test_uses_code_chunker_when_flag_is_true():
|
|||
|
||||
@pytest.mark.usefixtures("patched_chunker_instance", "patched_code_chunker_instance")
|
||||
def test_uses_default_chunker_when_flag_is_false():
|
||||
"""Default prose chunker is selected when use_code_chunker=False."""
|
||||
result = chunk_text("Some prose text.", use_code_chunker=False)
|
||||
|
||||
assert result == ["prose chunk"]
|
||||
|
|
|
|||
|
|
@ -7,36 +7,42 @@ pytestmark = pytest.mark.unit
|
|||
|
||||
|
||||
def test_different_unique_id_produces_different_hash(make_connector_document):
|
||||
"""Two documents with different unique_ids produce different identifier hashes."""
|
||||
doc_a = make_connector_document(unique_id="id-001")
|
||||
doc_b = make_connector_document(unique_id="id-002")
|
||||
assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
|
||||
|
||||
|
||||
def test_different_search_space_produces_different_identifier_hash(make_connector_document):
|
||||
"""Same document in different search spaces produces different identifier hashes."""
|
||||
doc_a = make_connector_document(search_space_id=1)
|
||||
doc_b = make_connector_document(search_space_id=2)
|
||||
assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
|
||||
|
||||
|
||||
def test_different_document_type_produces_different_identifier_hash(make_connector_document):
|
||||
"""Same unique_id with different document types produces different identifier hashes."""
|
||||
doc_a = make_connector_document(document_type=DocumentType.CLICKUP_CONNECTOR)
|
||||
doc_b = make_connector_document(document_type=DocumentType.NOTION_CONNECTOR)
|
||||
assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
|
||||
|
||||
|
||||
def test_same_content_same_space_produces_same_content_hash(make_connector_document):
|
||||
"""Identical content in the same search space always produces the same content hash."""
|
||||
doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
|
||||
doc_b = make_connector_document(source_markdown="Hello world", search_space_id=1)
|
||||
assert compute_content_hash(doc_a) == compute_content_hash(doc_b)
|
||||
|
||||
|
||||
def test_same_content_different_space_produces_different_content_hash(make_connector_document):
|
||||
"""Identical content in different search spaces produces different content hashes."""
|
||||
doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
|
||||
doc_b = make_connector_document(source_markdown="Hello world", search_space_id=2)
|
||||
assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
|
||||
|
||||
|
||||
def test_different_content_produces_different_content_hash(make_connector_document):
|
||||
"""Different source markdown produces different content hashes."""
|
||||
doc_a = make_connector_document(source_markdown="Original content")
|
||||
doc_b = make_connector_document(source_markdown="Updated content")
|
||||
assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ pytestmark = pytest.mark.unit
|
|||
|
||||
@pytest.mark.usefixtures("patched_summarizer_chain")
|
||||
async def test_without_metadata_returns_raw_summary():
|
||||
"""Summarizer returns the LLM output directly when no metadata is provided."""
|
||||
result = await summarize_document("# Content", llm=MagicMock(model="gpt-4"))
|
||||
|
||||
assert result == "The summary."
|
||||
|
|
@ -15,6 +16,7 @@ async def test_without_metadata_returns_raw_summary():
|
|||
|
||||
@pytest.mark.usefixtures("patched_summarizer_chain")
|
||||
async def test_with_metadata_includes_metadata_values_in_output():
|
||||
"""Non-empty metadata values are prepended to the summary output."""
|
||||
result = await summarize_document(
|
||||
"# Content",
|
||||
llm=MagicMock(model="gpt-4"),
|
||||
|
|
@ -27,6 +29,7 @@ async def test_with_metadata_includes_metadata_values_in_output():
|
|||
|
||||
@pytest.mark.usefixtures("patched_summarizer_chain")
|
||||
async def test_with_metadata_omits_empty_fields_from_output():
|
||||
"""Empty metadata fields are omitted from the summary output."""
|
||||
result = await summarize_document(
|
||||
"# Content",
|
||||
llm=MagicMock(model="gpt-4"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue