mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
feat: add integration tests for batch processing of local folder indexing, covering multiple file scenarios and error handling
This commit is contained in:
parent
1fa8e1cc83
commit
2b9d79d44c
1 changed files with 129 additions and 1 deletions
|
|
@ -1,6 +1,7 @@
|
|||
"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1)."""
|
||||
"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1), Tier 6 (B1-B2)."""
|
||||
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
|
@ -24,6 +25,34 @@ UNIFIED_FIXTURES = (
|
|||
)
|
||||
|
||||
|
||||
class _FakeSessionMaker:
|
||||
"""Wraps an existing AsyncSession so ``async with factory()`` yields it
|
||||
without closing it. Used to route batch-mode DB operations through the
|
||||
test's savepoint-wrapped session."""
|
||||
|
||||
def __init__(self, session: AsyncSession):
|
||||
self._session = session
|
||||
|
||||
def __call__(self):
|
||||
@asynccontextmanager
|
||||
async def _ctx():
|
||||
yield self._session
|
||||
return _ctx()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_batch_sessions(monkeypatch, db_session):
|
||||
"""Make ``_index_batch_files`` use the test session and run sequentially."""
|
||||
monkeypatch.setattr(
|
||||
"app.tasks.connector_indexers.local_folder_indexer.get_celery_session_maker",
|
||||
lambda: _FakeSessionMaker(db_session),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.tasks.connector_indexers.local_folder_indexer.BATCH_CONCURRENCY",
|
||||
1,
|
||||
)
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Tier 3: Full Indexer Integration (I1-I5)
|
||||
# ====================================================================
|
||||
|
|
@ -597,6 +626,105 @@ class TestFolderMirroring:
|
|||
assert notes_after is None
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Tier 6: Batch Mode (B1-B2)
|
||||
# ====================================================================
|
||||
|
||||
|
||||
class TestBatchMode:
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
async def test_b1_batch_indexes_multiple_files(
|
||||
self,
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_search_space: SearchSpace,
|
||||
tmp_path: Path,
|
||||
patched_batch_sessions,
|
||||
):
|
||||
"""B1: Batch with 3 files indexes all of them."""
|
||||
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||
|
||||
(tmp_path / "a.md").write_text("File A content")
|
||||
(tmp_path / "b.md").write_text("File B content")
|
||||
(tmp_path / "c.md").write_text("File C content")
|
||||
|
||||
count, failed, root_folder_id, err = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
folder_path=str(tmp_path),
|
||||
folder_name="test-folder",
|
||||
target_file_paths=[
|
||||
str(tmp_path / "a.md"),
|
||||
str(tmp_path / "b.md"),
|
||||
str(tmp_path / "c.md"),
|
||||
],
|
||||
)
|
||||
|
||||
assert count == 3
|
||||
assert failed == 0
|
||||
assert err is None
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
assert len(docs) == 3
|
||||
assert {d.title for d in docs} == {"a.md", "b.md", "c.md"}
|
||||
assert all(
|
||||
DocumentStatus.is_state(d.status, DocumentStatus.READY) for d in docs
|
||||
)
|
||||
|
||||
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||
async def test_b2_partial_failure(
|
||||
self,
|
||||
db_session: AsyncSession,
|
||||
db_user: User,
|
||||
db_search_space: SearchSpace,
|
||||
tmp_path: Path,
|
||||
patched_batch_sessions,
|
||||
):
|
||||
"""B2: One unreadable file fails gracefully; the other two still get indexed."""
|
||||
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||
|
||||
(tmp_path / "good1.md").write_text("Good file one")
|
||||
(tmp_path / "good2.md").write_text("Good file two")
|
||||
(tmp_path / "bad.md").write_bytes(b"\x00binary garbage")
|
||||
|
||||
count, failed, _, err = await index_local_folder(
|
||||
session=db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user_id=str(db_user.id),
|
||||
folder_path=str(tmp_path),
|
||||
folder_name="test-folder",
|
||||
target_file_paths=[
|
||||
str(tmp_path / "good1.md"),
|
||||
str(tmp_path / "bad.md"),
|
||||
str(tmp_path / "good2.md"),
|
||||
],
|
||||
)
|
||||
|
||||
assert count == 2
|
||||
assert failed == 1
|
||||
assert err is not None
|
||||
|
||||
docs = (
|
||||
await db_session.execute(
|
||||
select(Document).where(
|
||||
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||
Document.search_space_id == db_search_space.id,
|
||||
)
|
||||
)
|
||||
).scalars().all()
|
||||
assert len(docs) == 2
|
||||
assert {d.title for d in docs} == {"good1.md", "good2.md"}
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Tier 5: Pipeline Integration (P1)
|
||||
# ====================================================================
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue