mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-30 19:36:25 +02:00
test: add unit tests for Dropbox integration, covering delta sync methods, file type filtering, and re-authentication behavior
This commit is contained in:
parent
b5a15b7681
commit
caca491774
7 changed files with 843 additions and 0 deletions
|
|
@ -8,6 +8,10 @@ import pytest
|
|||
from app.db import DocumentType
|
||||
from app.tasks.connector_indexers.dropbox_indexer import (
|
||||
_download_files_parallel,
|
||||
_index_full_scan,
|
||||
_index_selected_files,
|
||||
_index_with_delta_sync,
|
||||
index_dropbox_files,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
|
@ -234,3 +238,544 @@ async def test_heartbeat_fires_during_parallel_downloads(
|
|||
assert len(docs) == 3
|
||||
assert failed == 0
|
||||
assert len(heartbeat_calls) >= 1, "Heartbeat should have fired at least once"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# D1-D2: _index_full_scan tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _folder_dict(name: str) -> dict:
|
||||
return {".tag": "folder", "name": name}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def full_scan_mocks(mock_dropbox_client, monkeypatch):
|
||||
"""Wire up mocks for _index_full_scan in isolation."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_task_logger = MagicMock()
|
||||
mock_task_logger.log_task_progress = AsyncMock()
|
||||
mock_log_entry = MagicMock()
|
||||
|
||||
skip_results: dict[str, tuple[bool, str | None]] = {}
|
||||
|
||||
async def _fake_skip(session, file, search_space_id):
|
||||
from app.connectors.dropbox.file_types import should_skip_file as _skip
|
||||
if _skip(file):
|
||||
return True, "folder/non-downloadable"
|
||||
return skip_results.get(file.get("id", ""), (False, None))
|
||||
|
||||
monkeypatch.setattr(_mod, "_should_skip_file", _fake_skip)
|
||||
|
||||
download_and_index_mock = AsyncMock(return_value=(0, 0))
|
||||
monkeypatch.setattr(_mod, "_download_and_index", download_and_index_mock)
|
||||
|
||||
return {
|
||||
"dropbox_client": mock_dropbox_client,
|
||||
"session": mock_session,
|
||||
"task_logger": mock_task_logger,
|
||||
"log_entry": mock_log_entry,
|
||||
"skip_results": skip_results,
|
||||
"download_and_index_mock": download_and_index_mock,
|
||||
}
|
||||
|
||||
|
||||
async def _run_full_scan(mocks, monkeypatch, page_files, *, max_files=500):
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
monkeypatch.setattr(
|
||||
_mod,
|
||||
"get_files_in_folder",
|
||||
AsyncMock(return_value=(page_files, None)),
|
||||
)
|
||||
return await _index_full_scan(
|
||||
mocks["dropbox_client"],
|
||||
mocks["session"],
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
"",
|
||||
"Root",
|
||||
mocks["task_logger"],
|
||||
mocks["log_entry"],
|
||||
max_files,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
async def test_full_scan_three_phase_counts(full_scan_mocks, monkeypatch):
|
||||
"""Skipped files excluded, renames counted as indexed, new files downloaded."""
|
||||
page_files = [
|
||||
_folder_dict("SubFolder"),
|
||||
_make_file_dict("skip1", "unchanged.txt"),
|
||||
_make_file_dict("rename1", "renamed.txt"),
|
||||
_make_file_dict("new1", "new1.txt"),
|
||||
_make_file_dict("new2", "new2.txt"),
|
||||
]
|
||||
|
||||
full_scan_mocks["skip_results"]["skip1"] = (True, "unchanged")
|
||||
full_scan_mocks["skip_results"]["rename1"] = (
|
||||
True,
|
||||
"File renamed: 'old' -> 'renamed.txt'",
|
||||
)
|
||||
|
||||
full_scan_mocks["download_and_index_mock"].return_value = (2, 0)
|
||||
|
||||
indexed, skipped = await _run_full_scan(
|
||||
full_scan_mocks, monkeypatch, page_files
|
||||
)
|
||||
|
||||
assert indexed == 3 # 1 renamed + 2 from batch
|
||||
assert skipped == 2 # 1 folder + 1 unchanged
|
||||
|
||||
call_args = full_scan_mocks["download_and_index_mock"].call_args
|
||||
call_files = call_args[0][2]
|
||||
assert len(call_files) == 2
|
||||
assert {f["id"] for f in call_files} == {"new1", "new2"}
|
||||
|
||||
|
||||
async def test_full_scan_respects_max_files(full_scan_mocks, monkeypatch):
|
||||
"""Only max_files non-folder items are considered."""
|
||||
page_files = [_make_file_dict(f"f{i}", f"file{i}.txt") for i in range(10)]
|
||||
|
||||
full_scan_mocks["download_and_index_mock"].return_value = (3, 0)
|
||||
|
||||
await _run_full_scan(full_scan_mocks, monkeypatch, page_files, max_files=3)
|
||||
|
||||
call_files = full_scan_mocks["download_and_index_mock"].call_args[0][2]
|
||||
assert len(call_files) == 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# D3-D5: _index_selected_files tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def selected_files_mocks(mock_dropbox_client, monkeypatch):
|
||||
"""Wire up mocks for _index_selected_files tests."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
mock_session = AsyncMock()
|
||||
|
||||
get_file_results: dict[str, tuple[dict | None, str | None]] = {}
|
||||
|
||||
async def _fake_get_file(client, path):
|
||||
return get_file_results.get(path, (None, f"Not configured: {path}"))
|
||||
|
||||
monkeypatch.setattr(_mod, "get_file_by_path", _fake_get_file)
|
||||
|
||||
skip_results: dict[str, tuple[bool, str | None]] = {}
|
||||
|
||||
async def _fake_skip(session, file, search_space_id):
|
||||
return skip_results.get(file["id"], (False, None))
|
||||
|
||||
monkeypatch.setattr(_mod, "_should_skip_file", _fake_skip)
|
||||
|
||||
download_and_index_mock = AsyncMock(return_value=(0, 0))
|
||||
monkeypatch.setattr(_mod, "_download_and_index", download_and_index_mock)
|
||||
|
||||
return {
|
||||
"dropbox_client": mock_dropbox_client,
|
||||
"session": mock_session,
|
||||
"get_file_results": get_file_results,
|
||||
"skip_results": skip_results,
|
||||
"download_and_index_mock": download_and_index_mock,
|
||||
}
|
||||
|
||||
|
||||
async def _run_selected(mocks, file_tuples):
|
||||
return await _index_selected_files(
|
||||
mocks["dropbox_client"],
|
||||
mocks["session"],
|
||||
file_tuples,
|
||||
connector_id=_CONNECTOR_ID,
|
||||
search_space_id=_SEARCH_SPACE_ID,
|
||||
user_id=_USER_ID,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
|
||||
async def test_selected_files_single_file_indexed(selected_files_mocks):
|
||||
selected_files_mocks["get_file_results"]["/report.pdf"] = (
|
||||
_make_file_dict("f1", "report.pdf"),
|
||||
None,
|
||||
)
|
||||
selected_files_mocks["download_and_index_mock"].return_value = (1, 0)
|
||||
|
||||
indexed, skipped, errors = await _run_selected(
|
||||
selected_files_mocks,
|
||||
[("/report.pdf", "report.pdf")],
|
||||
)
|
||||
|
||||
assert indexed == 1
|
||||
assert skipped == 0
|
||||
assert errors == []
|
||||
|
||||
|
||||
async def test_selected_files_fetch_failure_isolation(selected_files_mocks):
|
||||
selected_files_mocks["get_file_results"]["/first.txt"] = (
|
||||
_make_file_dict("f1", "first.txt"),
|
||||
None,
|
||||
)
|
||||
selected_files_mocks["get_file_results"]["/mid.txt"] = (None, "HTTP 404")
|
||||
selected_files_mocks["get_file_results"]["/third.txt"] = (
|
||||
_make_file_dict("f3", "third.txt"),
|
||||
None,
|
||||
)
|
||||
selected_files_mocks["download_and_index_mock"].return_value = (2, 0)
|
||||
|
||||
indexed, skipped, errors = await _run_selected(
|
||||
selected_files_mocks,
|
||||
[("/first.txt", "first.txt"), ("/mid.txt", "mid.txt"), ("/third.txt", "third.txt")],
|
||||
)
|
||||
|
||||
assert indexed == 2
|
||||
assert skipped == 0
|
||||
assert len(errors) == 1
|
||||
assert "mid.txt" in errors[0]
|
||||
|
||||
|
||||
async def test_selected_files_skip_rename_counting(selected_files_mocks):
|
||||
for path, fid, fname in [
|
||||
("/unchanged.txt", "s1", "unchanged.txt"),
|
||||
("/renamed.txt", "r1", "renamed.txt"),
|
||||
("/new1.txt", "n1", "new1.txt"),
|
||||
("/new2.txt", "n2", "new2.txt"),
|
||||
]:
|
||||
selected_files_mocks["get_file_results"][path] = (
|
||||
_make_file_dict(fid, fname),
|
||||
None,
|
||||
)
|
||||
|
||||
selected_files_mocks["skip_results"]["s1"] = (True, "unchanged")
|
||||
selected_files_mocks["skip_results"]["r1"] = (
|
||||
True,
|
||||
"File renamed: 'old' -> 'renamed.txt'",
|
||||
)
|
||||
selected_files_mocks["download_and_index_mock"].return_value = (2, 0)
|
||||
|
||||
indexed, skipped, errors = await _run_selected(
|
||||
selected_files_mocks,
|
||||
[
|
||||
("/unchanged.txt", "unchanged.txt"),
|
||||
("/renamed.txt", "renamed.txt"),
|
||||
("/new1.txt", "new1.txt"),
|
||||
("/new2.txt", "new2.txt"),
|
||||
],
|
||||
)
|
||||
|
||||
assert indexed == 3 # 1 renamed + 2 batch
|
||||
assert skipped == 1
|
||||
assert errors == []
|
||||
|
||||
mock = selected_files_mocks["download_and_index_mock"]
|
||||
call_files = mock.call_args[0][2]
|
||||
assert len(call_files) == 2
|
||||
assert {f["id"] for f in call_files} == {"n1", "n2"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# E1-E4: _index_with_delta_sync tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def test_delta_sync_deletions_call_remove_document(monkeypatch):
|
||||
"""E1: deleted entries are processed via _remove_document."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
entries = [
|
||||
{".tag": "deleted", "name": "gone.txt", "path_lower": "/gone.txt", "id": "id:del1"},
|
||||
{".tag": "deleted", "name": "also_gone.pdf", "path_lower": "/also_gone.pdf", "id": "id:del2"},
|
||||
]
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_changes = AsyncMock(return_value=(entries, "new-cursor", None))
|
||||
|
||||
remove_calls: list[str] = []
|
||||
|
||||
async def _fake_remove(session, file_id, search_space_id):
|
||||
remove_calls.append(file_id)
|
||||
|
||||
monkeypatch.setattr(_mod, "_remove_document", _fake_remove)
|
||||
monkeypatch.setattr(_mod, "_download_and_index", AsyncMock(return_value=(0, 0)))
|
||||
|
||||
mock_task_logger = MagicMock()
|
||||
mock_task_logger.log_task_progress = AsyncMock()
|
||||
|
||||
indexed, skipped, cursor = await _index_with_delta_sync(
|
||||
mock_client,
|
||||
AsyncMock(),
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
"old-cursor",
|
||||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert sorted(remove_calls) == ["id:del1", "id:del2"]
|
||||
assert cursor == "new-cursor"
|
||||
|
||||
|
||||
async def test_delta_sync_upserts_filtered_and_downloaded(monkeypatch):
|
||||
"""E2: modified/new file entries go through skip filter then download+index."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
entries = [
|
||||
_make_file_dict("mod1", "modified1.txt"),
|
||||
_make_file_dict("mod2", "modified2.txt"),
|
||||
]
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_changes = AsyncMock(return_value=(entries, "cursor-v2", None))
|
||||
|
||||
monkeypatch.setattr(_mod, "_should_skip_file", AsyncMock(return_value=(False, None)))
|
||||
|
||||
download_mock = AsyncMock(return_value=(2, 0))
|
||||
monkeypatch.setattr(_mod, "_download_and_index", download_mock)
|
||||
|
||||
mock_task_logger = MagicMock()
|
||||
mock_task_logger.log_task_progress = AsyncMock()
|
||||
|
||||
indexed, skipped, cursor = await _index_with_delta_sync(
|
||||
mock_client,
|
||||
AsyncMock(),
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
"cursor-v1",
|
||||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert indexed == 2
|
||||
assert skipped == 0
|
||||
assert cursor == "cursor-v2"
|
||||
|
||||
downloaded_files = download_mock.call_args[0][2]
|
||||
assert len(downloaded_files) == 2
|
||||
assert {f["id"] for f in downloaded_files} == {"mod1", "mod2"}
|
||||
|
||||
|
||||
async def test_delta_sync_mix_deletions_and_upserts(monkeypatch):
|
||||
"""E3: deletions processed, then remaining upserts filtered and indexed."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
entries = [
|
||||
{".tag": "deleted", "name": "removed.txt", "path_lower": "/removed.txt", "id": "id:del1"},
|
||||
{".tag": "deleted", "name": "trashed.pdf", "path_lower": "/trashed.pdf", "id": "id:del2"},
|
||||
_make_file_dict("mod1", "updated.txt"),
|
||||
_make_file_dict("new1", "brandnew.docx"),
|
||||
]
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_changes = AsyncMock(return_value=(entries, "final-cursor", None))
|
||||
|
||||
remove_calls: list[str] = []
|
||||
|
||||
async def _fake_remove(session, file_id, search_space_id):
|
||||
remove_calls.append(file_id)
|
||||
|
||||
monkeypatch.setattr(_mod, "_remove_document", _fake_remove)
|
||||
monkeypatch.setattr(_mod, "_should_skip_file", AsyncMock(return_value=(False, None)))
|
||||
|
||||
download_mock = AsyncMock(return_value=(2, 0))
|
||||
monkeypatch.setattr(_mod, "_download_and_index", download_mock)
|
||||
|
||||
mock_task_logger = MagicMock()
|
||||
mock_task_logger.log_task_progress = AsyncMock()
|
||||
|
||||
indexed, skipped, cursor = await _index_with_delta_sync(
|
||||
mock_client,
|
||||
AsyncMock(),
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
"old-cursor",
|
||||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert sorted(remove_calls) == ["id:del1", "id:del2"]
|
||||
assert indexed == 2
|
||||
assert skipped == 0
|
||||
assert cursor == "final-cursor"
|
||||
|
||||
downloaded_files = download_mock.call_args[0][2]
|
||||
assert {f["id"] for f in downloaded_files} == {"mod1", "new1"}
|
||||
|
||||
|
||||
async def test_delta_sync_returns_new_cursor(monkeypatch):
|
||||
"""E4: the new cursor from the API response is returned."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_changes = AsyncMock(return_value=([], "brand-new-cursor-xyz", None))
|
||||
|
||||
monkeypatch.setattr(_mod, "_download_and_index", AsyncMock(return_value=(0, 0)))
|
||||
|
||||
mock_task_logger = MagicMock()
|
||||
mock_task_logger.log_task_progress = AsyncMock()
|
||||
|
||||
indexed, skipped, cursor = await _index_with_delta_sync(
|
||||
mock_client,
|
||||
AsyncMock(),
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
"old-cursor",
|
||||
mock_task_logger,
|
||||
MagicMock(),
|
||||
max_files=500,
|
||||
enable_summary=True,
|
||||
)
|
||||
|
||||
assert cursor == "brand-new-cursor-xyz"
|
||||
assert indexed == 0
|
||||
assert skipped == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# F1-F3: index_dropbox_files orchestrator tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def orchestrator_mocks(monkeypatch):
|
||||
"""Wire up mocks for index_dropbox_files orchestrator tests."""
|
||||
import app.tasks.connector_indexers.dropbox_indexer as _mod
|
||||
|
||||
mock_connector = MagicMock()
|
||||
mock_connector.config = {"_token_encrypted": False}
|
||||
mock_connector.last_indexed_at = None
|
||||
mock_connector.enable_summary = True
|
||||
|
||||
monkeypatch.setattr(
|
||||
_mod,
|
||||
"get_connector_by_id",
|
||||
AsyncMock(return_value=mock_connector),
|
||||
)
|
||||
|
||||
mock_task_logger = MagicMock()
|
||||
mock_task_logger.log_task_start = AsyncMock(return_value=MagicMock())
|
||||
mock_task_logger.log_task_progress = AsyncMock()
|
||||
mock_task_logger.log_task_success = AsyncMock()
|
||||
mock_task_logger.log_task_failure = AsyncMock()
|
||||
monkeypatch.setattr(
|
||||
_mod, "TaskLoggingService", MagicMock(return_value=mock_task_logger)
|
||||
)
|
||||
|
||||
monkeypatch.setattr(_mod, "update_connector_last_indexed", AsyncMock())
|
||||
|
||||
full_scan_mock = AsyncMock(return_value=(5, 2))
|
||||
monkeypatch.setattr(_mod, "_index_full_scan", full_scan_mock)
|
||||
|
||||
delta_sync_mock = AsyncMock(return_value=(3, 1, "delta-cursor-new"))
|
||||
monkeypatch.setattr(_mod, "_index_with_delta_sync", delta_sync_mock)
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_latest_cursor = AsyncMock(return_value=("latest-cursor-abc", None))
|
||||
monkeypatch.setattr(
|
||||
_mod, "DropboxClient", MagicMock(return_value=mock_client)
|
||||
)
|
||||
|
||||
return {
|
||||
"connector": mock_connector,
|
||||
"full_scan_mock": full_scan_mock,
|
||||
"delta_sync_mock": delta_sync_mock,
|
||||
"mock_client": mock_client,
|
||||
}
|
||||
|
||||
|
||||
async def test_orchestrator_uses_delta_sync_when_cursor_and_last_indexed(
|
||||
orchestrator_mocks,
|
||||
):
|
||||
"""F1: with cursor + last_indexed_at + use_delta_sync, calls delta sync."""
|
||||
from datetime import UTC, datetime
|
||||
|
||||
connector = orchestrator_mocks["connector"]
|
||||
connector.config = {
|
||||
"_token_encrypted": False,
|
||||
"folder_cursors": {"/docs": "saved-cursor-123"},
|
||||
}
|
||||
connector.last_indexed_at = datetime(2026, 1, 1, tzinfo=UTC)
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_session.commit = AsyncMock()
|
||||
|
||||
indexed, skipped, error = await index_dropbox_files(
|
||||
mock_session,
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
{
|
||||
"folders": [{"path": "/docs", "name": "Docs"}],
|
||||
"files": [],
|
||||
"indexing_options": {"use_delta_sync": True},
|
||||
},
|
||||
)
|
||||
|
||||
assert error is None
|
||||
orchestrator_mocks["delta_sync_mock"].assert_called_once()
|
||||
orchestrator_mocks["full_scan_mock"].assert_not_called()
|
||||
|
||||
|
||||
async def test_orchestrator_falls_back_to_full_scan_without_cursor(
|
||||
orchestrator_mocks,
|
||||
):
|
||||
"""F2: without cursor, falls back to full scan."""
|
||||
connector = orchestrator_mocks["connector"]
|
||||
connector.config = {"_token_encrypted": False}
|
||||
connector.last_indexed_at = None
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_session.commit = AsyncMock()
|
||||
|
||||
indexed, skipped, error = await index_dropbox_files(
|
||||
mock_session,
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
{
|
||||
"folders": [{"path": "/docs", "name": "Docs"}],
|
||||
"files": [],
|
||||
"indexing_options": {"use_delta_sync": True},
|
||||
},
|
||||
)
|
||||
|
||||
assert error is None
|
||||
orchestrator_mocks["full_scan_mock"].assert_called_once()
|
||||
orchestrator_mocks["delta_sync_mock"].assert_not_called()
|
||||
|
||||
|
||||
async def test_orchestrator_persists_cursor_after_sync(orchestrator_mocks):
|
||||
"""F3: after sync, persists new cursor to connector config."""
|
||||
connector = orchestrator_mocks["connector"]
|
||||
connector.config = {"_token_encrypted": False}
|
||||
connector.last_indexed_at = None
|
||||
|
||||
mock_session = AsyncMock()
|
||||
mock_session.commit = AsyncMock()
|
||||
|
||||
await index_dropbox_files(
|
||||
mock_session,
|
||||
_CONNECTOR_ID,
|
||||
_SEARCH_SPACE_ID,
|
||||
_USER_ID,
|
||||
{
|
||||
"folders": [{"path": "/docs", "name": "Docs"}],
|
||||
"files": [],
|
||||
},
|
||||
)
|
||||
|
||||
assert "folder_cursors" in connector.config
|
||||
assert connector.config["folder_cursors"]["/docs"] == "latest-cursor-abc"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue