test: span-aware paragraph chunker fixture

This commit is contained in:
CREDO23 2026-06-18 20:06:33 +02:00
parent a7cf9bd946
commit 03012c3077

View file

@ -18,16 +18,22 @@ _V1 = "Intro paragraph.\n\nBody paragraph.\n\nOutro paragraph."
@pytest.fixture
def paragraph_chunker(monkeypatch):
"""One chunk per markdown paragraph, so edits map to chunk-level diffs."""
"""One slice per markdown paragraph, so edits map to chunk-level diffs."""
from app.indexing_pipeline.document_chunker import ChunkSlice
def _split(markdown, **_kwargs):
return [p for p in markdown.split("\n\n") if p.strip()]
def _split(markdown, *_args, **_kwargs):
slices = []
cursor = 0
for para in markdown.split("\n\n"):
start = markdown.index(para, cursor)
cursor = start + len(para)
if para.strip():
slices.append(ChunkSlice(para, start, cursor))
return slices
monkeypatch.setattr(
"app.indexing_pipeline.cache.cached_indexing.chunk_text", _split
)
monkeypatch.setattr(
"app.indexing_pipeline.cache.cached_indexing.chunk_text_hybrid", _split
"app.indexing_pipeline.cache.cached_indexing.chunk_markdown_with_spans",
_split,
)