mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
test: enhance reindexing tests for UploadDocumentAdapter
This commit is contained in:
parent
37f76a8533
commit
ce82807f16
1 changed files with 96 additions and 6 deletions
|
|
@ -89,7 +89,7 @@ async def test_chunks_written_to_db(db_session, db_search_space, db_user, mocker
|
|||
async def test_raises_on_indexing_failure(db_session, db_search_space, db_user, mocker):
|
||||
"""RuntimeError is raised when the indexing step fails so the caller can fire a failure notification."""
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
with pytest.raises(RuntimeError):
|
||||
with pytest.raises(RuntimeError, match=r"Embedding failed|Indexing failed"):
|
||||
await adapter.index(
|
||||
markdown_content="## Hello\n\nSome content.",
|
||||
filename="test.pdf",
|
||||
|
|
@ -106,6 +106,67 @@ async def test_raises_on_indexing_failure(db_session, db_search_space, db_user,
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_updates_content(db_session, db_search_space, db_user, mocker):
|
||||
"""Document content is updated to the new summary after reindexing."""
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
await adapter.index(
|
||||
markdown_content="## Original\n\nOriginal content.",
|
||||
filename="test.pdf",
|
||||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == db_search_space.id)
|
||||
)
|
||||
document = result.scalars().first()
|
||||
|
||||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
|
||||
await db_session.refresh(document)
|
||||
assert document.content == "Mocked summary."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
)
|
||||
async def test_reindex_updates_content_hash(
|
||||
db_session, db_search_space, db_user, mocker
|
||||
):
|
||||
"""Content hash is recomputed after reindexing with new source markdown."""
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
await adapter.index(
|
||||
markdown_content="## Original\n\nOriginal content.",
|
||||
filename="test.pdf",
|
||||
etl_service="UNSTRUCTURED",
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
llm=mocker.Mock(),
|
||||
)
|
||||
|
||||
result = await db_session.execute(
|
||||
select(Document).filter(Document.search_space_id == db_search_space.id)
|
||||
)
|
||||
document = result.scalars().first()
|
||||
original_hash = document.content_hash
|
||||
|
||||
document.source_markdown = "## Edited\n\nNew content after user edit."
|
||||
await db_session.flush()
|
||||
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
|
||||
await db_session.refresh(document)
|
||||
assert document.content_hash != original_hash
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
)
|
||||
|
|
@ -135,11 +196,14 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
|
|||
assert DocumentStatus.is_state(document.status, DocumentStatus.READY)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
"patched_summarize", "patched_embed_text", "patched_chunk_text"
|
||||
)
|
||||
@pytest.mark.usefixtures("patched_summarize", "patched_embed_text")
|
||||
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
|
||||
"""Reindexing replaces old chunks rather than appending new ones."""
|
||||
"""Reindexing replaces old chunks with new content rather than appending."""
|
||||
mocker.patch(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
side_effect=[["Original chunk."], ["Updated chunk."]],
|
||||
)
|
||||
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
await adapter.index(
|
||||
markdown_content="## Original\n\nOriginal content.",
|
||||
|
|
@ -167,6 +231,7 @@ async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, moc
|
|||
chunks = chunks_result.scalars().all()
|
||||
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0].content == "Updated chunk."
|
||||
|
||||
|
||||
@pytest.mark.usefixtures(
|
||||
|
|
@ -234,5 +299,30 @@ async def test_reindex_raises_on_failure(
|
|||
side_effect=RuntimeError("LLM unavailable"),
|
||||
)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
with pytest.raises(RuntimeError, match=r"Embedding failed|Reindexing failed"):
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
|
||||
|
||||
async def test_reindex_raises_on_empty_source_markdown(
|
||||
db_session, db_search_space, db_user, mocker
|
||||
):
|
||||
"""Reindexing a document with no source_markdown raises immediately."""
|
||||
from app.db import DocumentType
|
||||
|
||||
document = Document(
|
||||
title="empty.pdf",
|
||||
document_type=DocumentType.FILE,
|
||||
content="placeholder",
|
||||
content_hash="abc123",
|
||||
unique_identifier_hash="def456",
|
||||
source_markdown="",
|
||||
search_space_id=db_search_space.id,
|
||||
created_by_id=str(db_user.id),
|
||||
)
|
||||
db_session.add(document)
|
||||
await db_session.flush()
|
||||
|
||||
adapter = UploadDocumentAdapter(db_session)
|
||||
|
||||
with pytest.raises(RuntimeError, match="no source_markdown"):
|
||||
await adapter.reindex(document=document, llm=mocker.Mock())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue