fix: updated_at on title change, LLM fallback, stale chunks deleted on re-index

This commit is contained in:
CREDO23 2026-02-25 08:40:13 +02:00
parent af22fa7c88
commit 0363cb9c17
3 changed files with 93 additions and 10 deletions

View file

@ -28,7 +28,7 @@ async def test_sets_status_ready(
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
async def test_content_is_summary_when_should_summarize_true(
db_session, db_search_space, make_connector_document,
db_session, db_search_space, make_connector_document, mocker,
):
connector_doc = make_connector_document(search_space_id=db_search_space.id)
service = IndexingPipelineService(session=db_session)
@ -37,7 +37,7 @@ async def test_content_is_summary_when_should_summarize_true(
document = prepared[0]
document_id = document.id
await service.index(document, connector_doc, llm=None)
await service.index(document, connector_doc, llm=mocker.Mock())
result = await db_session.execute(select(Document).filter(Document.id == document_id))
reloaded = result.scalars().first()
@ -132,11 +132,15 @@ async def test_updated_at_advances_after_indexing(
assert updated_at_ready > updated_at_pending
@pytest.mark.usefixtures("patched_summarize_raises", "patched_chunk_text")
async def test_llm_error_sets_status_failed(
@pytest.mark.usefixtures("patched_embed_text", "patched_chunk_text")
async def test_no_llm_falls_back_to_source_markdown(
db_session, db_search_space, make_connector_document,
):
connector_doc = make_connector_document(search_space_id=db_search_space.id)
connector_doc = make_connector_document(
search_space_id=db_search_space.id,
should_summarize=True,
source_markdown="## Fallback content",
)
service = IndexingPipelineService(session=db_session)
prepared = await service.prepare_for_indexing([connector_doc])
@ -148,12 +152,44 @@ async def test_llm_error_sets_status_failed(
result = await db_session.execute(select(Document).filter(Document.id == document_id))
reloaded = result.scalars().first()
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.READY)
assert reloaded.content == "## Fallback content"
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text", "patched_chunk_text")
async def test_reindex_replaces_old_chunks(
db_session, db_search_space, make_connector_document,
):
connector_doc = make_connector_document(
search_space_id=db_search_space.id,
source_markdown="## v1",
)
service = IndexingPipelineService(session=db_session)
prepared = await service.prepare_for_indexing([connector_doc])
document = prepared[0]
document_id = document.id
await service.index(document, connector_doc, llm=None)
updated_doc = make_connector_document(
search_space_id=db_search_space.id,
source_markdown="## v2",
)
re_prepared = await service.prepare_for_indexing([updated_doc])
await service.index(re_prepared[0], updated_doc, llm=None)
result = await db_session.execute(
select(Chunk).filter(Chunk.document_id == document_id)
)
chunks = result.scalars().all()
assert len(chunks) == 1
@pytest.mark.usefixtures("patched_summarize_raises", "patched_chunk_text")
async def test_llm_error_leaves_no_partial_data(
db_session, db_search_space, make_connector_document,
async def test_llm_error_sets_status_failed(
db_session, db_search_space, make_connector_document, mocker,
):
connector_doc = make_connector_document(search_space_id=db_search_space.id)
service = IndexingPipelineService(session=db_session)
@ -162,7 +198,26 @@ async def test_llm_error_leaves_no_partial_data(
document = prepared[0]
document_id = document.id
await service.index(document, connector_doc, llm=None)
await service.index(document, connector_doc, llm=mocker.Mock())
result = await db_session.execute(select(Document).filter(Document.id == document_id))
reloaded = result.scalars().first()
assert DocumentStatus.is_state(reloaded.status, DocumentStatus.FAILED)
@pytest.mark.usefixtures("patched_summarize_raises", "patched_chunk_text")
async def test_llm_error_leaves_no_partial_data(
db_session, db_search_space, make_connector_document, mocker,
):
connector_doc = make_connector_document(search_space_id=db_search_space.id)
service = IndexingPipelineService(session=db_session)
prepared = await service.prepare_for_indexing([connector_doc])
document = prepared[0]
document_id = document.id
await service.index(document, connector_doc, llm=mocker.Mock())
result = await db_session.execute(select(Document).filter(Document.id == document_id))
reloaded = result.scalars().first()

View file

@ -159,6 +159,27 @@ async def test_metadata_is_updated_when_content_changes(
assert reloaded.document_metadata == {"status": "done"}
async def test_updated_at_advances_when_title_only_changes(
db_session, db_search_space, make_connector_document
):
original = make_connector_document(search_space_id=db_search_space.id, title="Old Title")
service = IndexingPipelineService(session=db_session)
first = await service.prepare_for_indexing([original])
document_id = first[0].id
result = await db_session.execute(select(Document).filter(Document.id == document_id))
updated_at_v1 = result.scalars().first().updated_at
renamed = make_connector_document(search_space_id=db_search_space.id, title="New Title")
await service.prepare_for_indexing([renamed])
result = await db_session.execute(select(Document).filter(Document.id == document_id))
updated_at_v2 = result.scalars().first().updated_at
assert updated_at_v2 > updated_at_v1
async def test_updated_at_advances_when_content_changes(
db_session, db_search_space, make_connector_document
):