mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-28 02:23:53 +02:00
feat: implement parallel file downloading and indexing in Google Drive indexer
- Added `_download_files_parallel` function to enable concurrent downloading of files from Google Drive, improving efficiency in document processing. - Introduced `_download_and_index` function to handle the parallel downloading and indexing phases, streamlining the overall workflow. - Updated `_index_full_scan` and `_index_with_delta_sync` methods to utilize the new parallel downloading functionality, enhancing performance. - Added unit tests to validate the new parallel downloading and indexing logic, ensuring robustness and error handling during document processing.
This commit is contained in:
parent
bd6e335cb3
commit
c016962064
4 changed files with 652 additions and 35 deletions
34
surfsense_backend/tests/unit/connector_indexers/conftest.py
Normal file
34
surfsense_backend/tests/unit/connector_indexers/conftest.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
"""Pre-register the connector_indexers package to bypass a circular import
|
||||
in its ``__init__.py`` (airtable_indexer -> routes -> connector_indexers).
|
||||
|
||||
This lets tests import individual indexer modules (e.g.
|
||||
``google_drive_indexer``) without triggering the full package init.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
|
||||
_BACKEND = Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
def _stub_package(dotted: str, fs_dir: Path) -> None:
|
||||
if dotted not in sys.modules:
|
||||
mod = types.ModuleType(dotted)
|
||||
mod.__path__ = [str(fs_dir)]
|
||||
mod.__package__ = dotted
|
||||
sys.modules[dotted] = mod
|
||||
|
||||
parts = dotted.split(".")
|
||||
if len(parts) > 1:
|
||||
parent_dotted = ".".join(parts[:-1])
|
||||
parent = sys.modules.get(parent_dotted)
|
||||
if parent is not None:
|
||||
setattr(parent, parts[-1], sys.modules[dotted])
|
||||
|
||||
|
||||
_stub_package("app.tasks", _BACKEND / "app" / "tasks")
|
||||
_stub_package(
|
||||
"app.tasks.connector_indexers",
|
||||
_BACKEND / "app" / "tasks" / "connector_indexers",
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue