diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_index_editions.py b/surfsense_backend/tests/integration/indexing_pipeline/test_index_editions.py index 68d5ec0af..f86ee8e4f 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_index_editions.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_index_editions.py @@ -18,16 +18,22 @@ _V1 = "Intro paragraph.\n\nBody paragraph.\n\nOutro paragraph." @pytest.fixture def paragraph_chunker(monkeypatch): - """One chunk per markdown paragraph, so edits map to chunk-level diffs.""" + """One slice per markdown paragraph, so edits map to chunk-level diffs.""" + from app.indexing_pipeline.document_chunker import ChunkSlice - def _split(markdown, **_kwargs): - return [p for p in markdown.split("\n\n") if p.strip()] + def _split(markdown, *_args, **_kwargs): + slices = [] + cursor = 0 + for para in markdown.split("\n\n"): + start = markdown.index(para, cursor) + cursor = start + len(para) + if para.strip(): + slices.append(ChunkSlice(para, start, cursor)) + return slices monkeypatch.setattr( - "app.indexing_pipeline.cache.cached_indexing.chunk_text", _split - ) - monkeypatch.setattr( - "app.indexing_pipeline.cache.cached_indexing.chunk_text_hybrid", _split + "app.indexing_pipeline.cache.cached_indexing.chunk_markdown_with_spans", + _split, )