refactor(tests): Update tests to remove summary references and adjust for embedding errors

This commit is contained in:
Anish Sarkar 2026-06-04 01:51:21 +05:30
parent e4d7b01b09
commit e588782a9b
17 changed files with 69 additions and 148 deletions

View file

@ -1,7 +1,7 @@
import importlib
import sys
import uuid
from unittest.mock import AsyncMock, MagicMock
from unittest.mock import MagicMock
import pytest
import pytest_asyncio
@ -123,18 +123,6 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
return space
@pytest.fixture
def patched_summarize(monkeypatch) -> AsyncMock:
mock = AsyncMock(return_value="Mocked summary.")
return mock
@pytest.fixture
def patched_summarize_raises(monkeypatch) -> AsyncMock:
mock = AsyncMock(side_effect=RuntimeError("LLM unavailable"))
return mock
@pytest.fixture
def patched_embed_texts(monkeypatch) -> MagicMock:
mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts])
@ -145,6 +133,16 @@ def patched_embed_texts(monkeypatch) -> MagicMock:
return mock
@pytest.fixture
def patched_embed_texts_raises(monkeypatch) -> MagicMock:
mock = MagicMock(side_effect=RuntimeError("Embedding unavailable"))
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
mock,
)
return mock
@pytest.fixture
def patched_chunk_text(monkeypatch) -> MagicMock:
mock = MagicMock(return_value=["Test chunk content."])

View file

@ -32,8 +32,8 @@ async def test_sets_status_ready(db_session, db_search_space, db_user, mocker):
@pytest.mark.usefixtures(
"patched_embed_texts", "patched_chunk_text"
)
async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
"""Document content is set to the LLM-generated summary."""
async def test_content_is_source_markdown(db_session, db_search_space, db_user, mocker):
"""Document content is set to the extracted source markdown."""
adapter = UploadDocumentAdapter(db_session)
await adapter.index(
markdown_content="## Hello\n\nSome content.",
@ -48,7 +48,7 @@ async def test_content_is_summary(db_session, db_search_space, db_user, mocker):
)
document = result.scalars().first()
assert document.content == "Mocked summary."
assert document.content == "## Hello\n\nSome content."
@pytest.mark.usefixtures(
@ -79,9 +79,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
assert chunks[0].content == "Test chunk content."
@pytest.mark.usefixtures(
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
)
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
"""RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
adapter = UploadDocumentAdapter(db_session)
@ -92,7 +90,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
etl_service="UNSTRUCTURED",
search_space_id=db_search_space.id,
user_id=str(db_user.id),
)
)
# ---------------------------------------------------------------------------
@ -104,7 +102,7 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
"patched_embed_texts", "patched_chunk_text"
)
async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
"""Document content is updated to the new summary after reindexing."""
"""Document content is updated to the new source markdown after reindexing."""
adapter = UploadDocumentAdapter(db_session)
await adapter.index(
markdown_content="## Original\n\nOriginal content.",
@ -125,7 +123,7 @@ async def test_reindex_updates_content(db_session, db_search_space, db_user, moc
await adapter.reindex(document=document)
await db_session.refresh(document)
assert document.content == "Mocked summary."
assert document.content == "## Edited\n\nNew content after user edit."
@pytest.mark.usefixtures(
@ -256,7 +254,9 @@ async def test_reindex_clears_reindexing_flag(
@pytest.mark.usefixtures("patched_embed_texts", "patched_chunk_text")
async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, mocker):
async def test_reindex_raises_on_failure(
db_session, db_search_space, db_user, patched_embed_texts, mocker
):
"""RuntimeError is raised when reindexing fails so the caller can handle it."""
adapter = UploadDocumentAdapter(db_session)
@ -276,6 +276,8 @@ async def test_reindex_raises_on_failure(db_session, db_search_space, db_user, m
document.source_markdown = "## Edited\n\nNew content after user edit."
await db_session.flush()
patched_embed_texts.side_effect = RuntimeError("Embedding unavailable")
with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
await adapter.reindex(document=document)

View file

@ -259,16 +259,14 @@ async def test_reindex_replaces_old_chunks(
assert len(chunks) == 1
@pytest.mark.usefixtures(
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
)
async def test_llm_error_sets_status_failed(
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
async def test_embedding_error_sets_status_failed(
db_session,
db_search_space,
make_connector_document,
mocker,
):
"""Document status is FAILED when the LLM raises during indexing."""
"""Document status is FAILED when embedding raises during indexing."""
connector_doc = make_connector_document(search_space_id=db_search_space.id)
service = IndexingPipelineService(session=db_session)
@ -286,10 +284,8 @@ async def test_llm_error_sets_status_failed(
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
@pytest.mark.usefixtures(
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
)
async def test_llm_error_leaves_no_partial_data(
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
async def test_embedding_error_leaves_no_partial_data(
db_session,
db_search_space,
make_connector_document,

View file

@ -1271,7 +1271,7 @@ class TestIndexingProgressFlag:
original_index = IndexingPipelineService.index
flag_observed = []
async def patched_index(self_pipe, document, connector_doc, llm):
async def patched_index(self_pipe, document, connector_doc):
folder = (
await db_session.execute(
select(Folder).where(
@ -1283,7 +1283,7 @@ class TestIndexingProgressFlag:
if folder:
meta = folder.folder_metadata or {}
flag_observed.append(meta.get("indexing_in_progress", False))
return await original_index(self_pipe, document, connector_doc, llm)
return await original_index(self_pipe, document, connector_doc)
IndexingPipelineService.index = patched_index
try:

View file

@ -338,9 +338,7 @@ async def test_same_content_from_different_source_is_skipped(
assert len(result.scalars().all()) == 1
@pytest.mark.usefixtures(
"patched_summarize_raises", "patched_embed_texts", "patched_chunk_text"
)
@pytest.mark.usefixtures("patched_embed_texts_raises", "patched_chunk_text")
async def test_failed_document_with_unchanged_content_is_requeued(
db_session,
db_search_space,
@ -351,7 +349,7 @@ async def test_failed_document_with_unchanged_content_is_requeued(
doc = make_connector_document(search_space_id=db_search_space.id)
service = IndexingPipelineService(session=db_session)
# First run: document is created and indexing crashes → status = failed
# First run: document is created and indexing crashes, so status becomes failed.
prepared = await service.prepare_for_indexing([doc])
document_id = prepared[0].id
await service.index(prepared[0], doc)