mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-02 22:01:05 +02:00
feat: add integration tests for batch processing of local folder indexing, covering multiple file scenarios and error handling
This commit is contained in:
parent
1fa8e1cc83
commit
2b9d79d44c
1 changed files with 129 additions and 1 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1)."""
|
"""Integration tests for local folder indexer — Tier 3 (I1-I5), Tier 4 (F1-F7), Tier 5 (P1), Tier 6 (B1-B2)."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
@ -24,6 +25,34 @@ UNIFIED_FIXTURES = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeSessionMaker:
|
||||||
|
"""Wraps an existing AsyncSession so ``async with factory()`` yields it
|
||||||
|
without closing it. Used to route batch-mode DB operations through the
|
||||||
|
test's savepoint-wrapped session."""
|
||||||
|
|
||||||
|
def __init__(self, session: AsyncSession):
|
||||||
|
self._session = session
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _ctx():
|
||||||
|
yield self._session
|
||||||
|
return _ctx()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def patched_batch_sessions(monkeypatch, db_session):
|
||||||
|
"""Make ``_index_batch_files`` use the test session and run sequentially."""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.tasks.connector_indexers.local_folder_indexer.get_celery_session_maker",
|
||||||
|
lambda: _FakeSessionMaker(db_session),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.tasks.connector_indexers.local_folder_indexer.BATCH_CONCURRENCY",
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
# Tier 3: Full Indexer Integration (I1-I5)
|
# Tier 3: Full Indexer Integration (I1-I5)
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
|
|
@ -597,6 +626,105 @@ class TestFolderMirroring:
|
||||||
assert notes_after is None
|
assert notes_after is None
|
||||||
|
|
||||||
|
|
||||||
|
# ====================================================================
|
||||||
|
# Tier 6: Batch Mode (B1-B2)
|
||||||
|
# ====================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestBatchMode:
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||||
|
async def test_b1_batch_indexes_multiple_files(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
db_user: User,
|
||||||
|
db_search_space: SearchSpace,
|
||||||
|
tmp_path: Path,
|
||||||
|
patched_batch_sessions,
|
||||||
|
):
|
||||||
|
"""B1: Batch with 3 files indexes all of them."""
|
||||||
|
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||||
|
|
||||||
|
(tmp_path / "a.md").write_text("File A content")
|
||||||
|
(tmp_path / "b.md").write_text("File B content")
|
||||||
|
(tmp_path / "c.md").write_text("File C content")
|
||||||
|
|
||||||
|
count, failed, root_folder_id, err = await index_local_folder(
|
||||||
|
session=db_session,
|
||||||
|
search_space_id=db_search_space.id,
|
||||||
|
user_id=str(db_user.id),
|
||||||
|
folder_path=str(tmp_path),
|
||||||
|
folder_name="test-folder",
|
||||||
|
target_file_paths=[
|
||||||
|
str(tmp_path / "a.md"),
|
||||||
|
str(tmp_path / "b.md"),
|
||||||
|
str(tmp_path / "c.md"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert count == 3
|
||||||
|
assert failed == 0
|
||||||
|
assert err is None
|
||||||
|
|
||||||
|
docs = (
|
||||||
|
await db_session.execute(
|
||||||
|
select(Document).where(
|
||||||
|
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||||
|
Document.search_space_id == db_search_space.id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).scalars().all()
|
||||||
|
assert len(docs) == 3
|
||||||
|
assert {d.title for d in docs} == {"a.md", "b.md", "c.md"}
|
||||||
|
assert all(
|
||||||
|
DocumentStatus.is_state(d.status, DocumentStatus.READY) for d in docs
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.usefixtures(*UNIFIED_FIXTURES)
|
||||||
|
async def test_b2_partial_failure(
|
||||||
|
self,
|
||||||
|
db_session: AsyncSession,
|
||||||
|
db_user: User,
|
||||||
|
db_search_space: SearchSpace,
|
||||||
|
tmp_path: Path,
|
||||||
|
patched_batch_sessions,
|
||||||
|
):
|
||||||
|
"""B2: One unreadable file fails gracefully; the other two still get indexed."""
|
||||||
|
from app.tasks.connector_indexers.local_folder_indexer import index_local_folder
|
||||||
|
|
||||||
|
(tmp_path / "good1.md").write_text("Good file one")
|
||||||
|
(tmp_path / "good2.md").write_text("Good file two")
|
||||||
|
(tmp_path / "bad.md").write_bytes(b"\x00binary garbage")
|
||||||
|
|
||||||
|
count, failed, _, err = await index_local_folder(
|
||||||
|
session=db_session,
|
||||||
|
search_space_id=db_search_space.id,
|
||||||
|
user_id=str(db_user.id),
|
||||||
|
folder_path=str(tmp_path),
|
||||||
|
folder_name="test-folder",
|
||||||
|
target_file_paths=[
|
||||||
|
str(tmp_path / "good1.md"),
|
||||||
|
str(tmp_path / "bad.md"),
|
||||||
|
str(tmp_path / "good2.md"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert count == 2
|
||||||
|
assert failed == 1
|
||||||
|
assert err is not None
|
||||||
|
|
||||||
|
docs = (
|
||||||
|
await db_session.execute(
|
||||||
|
select(Document).where(
|
||||||
|
Document.document_type == DocumentType.LOCAL_FOLDER_FILE,
|
||||||
|
Document.search_space_id == db_search_space.id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).scalars().all()
|
||||||
|
assert len(docs) == 2
|
||||||
|
assert {d.title for d in docs} == {"good1.md", "good2.md"}
|
||||||
|
|
||||||
|
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
# Tier 5: Pipeline Integration (P1)
|
# Tier 5: Pipeline Integration (P1)
|
||||||
# ====================================================================
|
# ====================================================================
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue