From 9fb9778bd0dc57813a7d5899ca7ff84a45077b8f Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Fri, 15 May 2026 18:02:04 -0700 Subject: [PATCH] test: enhance index batch parallel tests to include hybrid chunker Updated the test for the indexing pipeline to verify that both the standard and hybrid chunkers are called via asyncio.to_thread, ensuring non-blocking behavior. This change reflects the routing of non-code documents through the hybrid chunker, maintaining the event loop contract. --- .../indexing_pipeline/test_index_batch_parallel.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py index ae55bc089..59b6cec9c 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_index_batch_parallel.py @@ -37,7 +37,7 @@ def _make_orm_doc(connector_doc, doc_id): async def test_index_calls_embed_and_chunk_via_to_thread( pipeline, make_connector_document, monkeypatch ): - """index() runs embed_texts and chunk_text via asyncio.to_thread, not blocking the loop.""" + """index() runs embed_texts and the chunker via asyncio.to_thread, not blocking the loop.""" to_thread_calls = [] original_to_thread = asyncio.to_thread @@ -57,6 +57,12 @@ async def test_index_calls_embed_and_chunk_via_to_thread( "app.indexing_pipeline.indexing_pipeline_service.chunk_text", mock_chunk, ) + mock_chunk_hybrid = MagicMock(return_value=["chunk1"]) + mock_chunk_hybrid.__name__ = "chunk_text_hybrid" + monkeypatch.setattr( + "app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid", + mock_chunk_hybrid, + ) mock_embed = MagicMock( side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts] ) @@ -77,7 +83,10 @@ async def test_index_calls_embed_and_chunk_via_to_thread( await pipeline.index(document, connector_doc, llm=MagicMock()) - assert "chunk_text" in to_thread_calls + # Non-code documents now route through the table-aware hybrid chunker + # (see commit 2f3a33c9). Either chunker entry point satisfies the + # "chunking runs off the event loop" contract this test guards. + assert {"chunk_text", "chunk_text_hybrid"} & set(to_thread_calls) assert "embed_texts" in to_thread_calls