mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
test: enhance index batch parallel tests to include hybrid chunker
Updated the test for the indexing pipeline to verify that both the standard and hybrid chunkers are called via asyncio.to_thread, ensuring non-blocking behavior. This change reflects the routing of non-code documents through the hybrid chunker, maintaining the event loop contract.
This commit is contained in:
parent
c187b04e82
commit
9fb9778bd0
1 changed files with 11 additions and 2 deletions
|
|
@ -37,7 +37,7 @@ def _make_orm_doc(connector_doc, doc_id):
|
|||
async def test_index_calls_embed_and_chunk_via_to_thread(
|
||||
pipeline, make_connector_document, monkeypatch
|
||||
):
|
||||
"""index() runs embed_texts and chunk_text via asyncio.to_thread, not blocking the loop."""
|
||||
"""index() runs embed_texts and the chunker via asyncio.to_thread, not blocking the loop."""
|
||||
to_thread_calls = []
|
||||
original_to_thread = asyncio.to_thread
|
||||
|
||||
|
|
@ -57,6 +57,12 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||
mock_chunk,
|
||||
)
|
||||
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||
monkeypatch.setattr(
|
||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||
mock_chunk_hybrid,
|
||||
)
|
||||
mock_embed = MagicMock(
|
||||
side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]
|
||||
)
|
||||
|
|
@ -77,7 +83,10 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
|||
|
||||
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||
|
||||
assert "chunk_text" in to_thread_calls
|
||||
# Non-code documents now route through the table-aware hybrid chunker
|
||||
# (see commit 2f3a33c9). Either chunker entry point satisfies the
|
||||
# "chunking runs off the event loop" contract this test guards.
|
||||
assert {"chunk_text", "chunk_text_hybrid"} & set(to_thread_calls)
|
||||
assert "embed_texts" in to_thread_calls
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue