mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-19 18:45:15 +02:00
Merge pull request #1390 from AnishSarkar22/fix/backend-tests
fix: unit and integration tests
This commit is contained in:
commit
3c27fe688a
8 changed files with 197 additions and 82 deletions
|
|
@ -350,6 +350,25 @@ def _drive_list_files(args: dict[str, Any]) -> dict[str, Any]:
|
||||||
folder id and serve the matching fixture list.
|
folder id and serve the matching fixture list.
|
||||||
"""
|
"""
|
||||||
q = args.get("q", "")
|
q = args.get("q", "")
|
||||||
|
if "in owners" in q:
|
||||||
|
return {
|
||||||
|
"data": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"id": "fake-file-owner-probe",
|
||||||
|
"name": "owner-probe",
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"me": True,
|
||||||
|
"emailAddress": "e2e-fake@surfsense.example",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"nextPageToken": None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
folder_id = "root"
|
folder_id = "root"
|
||||||
if "in parents" in q:
|
if "in parents" in q:
|
||||||
# q looks like: '<folder_id>' in parents and trashed = false ...
|
# q looks like: '<folder_id>' in parents and trashed = false ...
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,11 @@
|
||||||
"""Composio route integration fixtures.
|
"""Composio route integration fixtures.
|
||||||
|
|
||||||
The sys.modules hijack happens at module import time, before importing
|
The `composio` sys.modules hijack lives in the parent integration conftest
|
||||||
app.app, so production `from composio import Composio` bindings resolve to
|
so it runs before any sibling suite imports `app.routes`.
|
||||||
the strict E2E fake in this pytest process too.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import sys
|
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -16,19 +14,15 @@ import pytest_asyncio
|
||||||
from httpx import ASGITransport
|
from httpx import ASGITransport
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from tests.e2e.fakes import composio_module as _fake_composio
|
from app.app import app, limiter
|
||||||
|
from app.config import config
|
||||||
sys.modules["composio"] = _fake_composio
|
from app.db import (
|
||||||
|
|
||||||
from app.app import app, limiter # noqa: E402
|
|
||||||
from app.config import config # noqa: E402
|
|
||||||
from app.db import ( # noqa: E402
|
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
User,
|
User,
|
||||||
get_async_session,
|
get_async_session,
|
||||||
)
|
)
|
||||||
from app.users import current_active_user # noqa: E402
|
from app.users import current_active_user
|
||||||
|
|
||||||
pytestmark = pytest.mark.integration
|
pytestmark = pytest.mark.integration
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
|
@ -7,17 +9,27 @@ from sqlalchemy import text
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
||||||
from sqlalchemy.pool import NullPool
|
from sqlalchemy.pool import NullPool
|
||||||
|
|
||||||
from app.config import config as app_config
|
# Hijack `composio` before any `from app.*` import; the `from composio import
|
||||||
from app.db import (
|
# Composio` in app.services.composio_service binds once at first import.
|
||||||
Base,
|
from tests.e2e.fakes import composio_module as _fake_composio
|
||||||
DocumentType,
|
|
||||||
SearchSourceConnector,
|
sys.modules["composio"] = _fake_composio
|
||||||
SearchSourceConnectorType,
|
|
||||||
SearchSpace,
|
app_config = importlib.import_module("app.config").config
|
||||||
User,
|
app_db = importlib.import_module("app.db")
|
||||||
)
|
Base = app_db.Base
|
||||||
from app.indexing_pipeline.connector_document import ConnectorDocument
|
DocumentType = app_db.DocumentType
|
||||||
from tests.conftest import TEST_DATABASE_URL
|
SearchSourceConnector = app_db.SearchSourceConnector
|
||||||
|
SearchSourceConnectorType = app_db.SearchSourceConnectorType
|
||||||
|
SearchSpace = app_db.SearchSpace
|
||||||
|
User = app_db.User
|
||||||
|
ConnectorDocument = importlib.import_module(
|
||||||
|
"app.indexing_pipeline.connector_document"
|
||||||
|
).ConnectorDocument
|
||||||
|
create_default_roles_and_membership = importlib.import_module(
|
||||||
|
"app.routes.search_spaces_routes"
|
||||||
|
).create_default_roles_and_membership
|
||||||
|
TEST_DATABASE_URL = importlib.import_module("tests.conftest").TEST_DATABASE_URL
|
||||||
|
|
||||||
_EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
_EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
||||||
|
|
||||||
|
|
@ -105,6 +117,9 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
|
||||||
)
|
)
|
||||||
db_session.add(space)
|
db_session.add(space)
|
||||||
await db_session.flush()
|
await db_session.flush()
|
||||||
|
# Mirror POST /searchspaces so routes guarded by check_permission find a membership.
|
||||||
|
await create_default_roles_and_membership(db_session, space.id, db_user.id)
|
||||||
|
await db_session.flush()
|
||||||
return space
|
return space
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -145,6 +160,10 @@ def patched_chunk_text(monkeypatch) -> MagicMock:
|
||||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||||
mock,
|
mock,
|
||||||
)
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||||
|
mock,
|
||||||
|
)
|
||||||
return mock
|
return mock
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,7 @@ class TestStripeCheckoutSessionCreation:
|
||||||
assert (
|
assert (
|
||||||
fake_client.last_params["success_url"]
|
fake_client.last_params["success_url"]
|
||||||
== f"http://localhost:3000/dashboard/{search_space_id}/purchase-success"
|
== f"http://localhost:3000/dashboard/{search_space_id}/purchase-success"
|
||||||
|
"?session_id={CHECKOUT_SESSION_ID}"
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
fake_client.last_params["cancel_url"]
|
fake_client.last_params["cancel_url"]
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ mocked at their system boundaries.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
|
||||||
|
|
||||||
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
|
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
|
||||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
|
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
|
||||||
|
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
|
|
@ -69,32 +70,29 @@ async def native_calendar(async_engine):
|
||||||
await cleanup_space(async_engine, data["search_space_id"])
|
await cleanup_space(async_engine, data["search_space_id"])
|
||||||
|
|
||||||
|
|
||||||
|
@patch(_GET_ACCESS_TOKEN)
|
||||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||||
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
||||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||||
async def test_composio_calendar_uses_composio_credentials(
|
async def test_composio_calendar_uses_composio_service(
|
||||||
mock_build_creds,
|
mock_composio_service_cls,
|
||||||
mock_cal_cls,
|
mock_cal_cls,
|
||||||
mock_tl_cls,
|
mock_tl_cls,
|
||||||
|
mock_get_access_token,
|
||||||
async_engine,
|
async_engine,
|
||||||
composio_calendar,
|
composio_calendar,
|
||||||
):
|
):
|
||||||
"""Calendar indexer calls build_composio_credentials for a Composio connector."""
|
"""Calendar indexer uses Composio tools directly for a Composio connector."""
|
||||||
from app.tasks.connector_indexers.google_calendar_indexer import (
|
from app.tasks.connector_indexers.google_calendar_indexer import (
|
||||||
index_google_calendar_events,
|
index_google_calendar_events,
|
||||||
)
|
)
|
||||||
|
|
||||||
data = composio_calendar
|
data = composio_calendar
|
||||||
mock_creds = MagicMock(name="composio-creds")
|
mock_composio_service = MagicMock()
|
||||||
mock_build_creds.return_value = mock_creds
|
mock_composio_service.get_calendar_events = AsyncMock(return_value=([], None))
|
||||||
|
mock_composio_service_cls.return_value = mock_composio_service
|
||||||
mock_tl_cls.return_value = mock_task_logger()
|
mock_tl_cls.return_value = mock_task_logger()
|
||||||
|
|
||||||
mock_cal_instance = MagicMock()
|
|
||||||
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(
|
|
||||||
return_value=([], None)
|
|
||||||
)
|
|
||||||
mock_cal_cls.return_value = mock_cal_instance
|
|
||||||
|
|
||||||
maker = make_session_factory(async_engine)
|
maker = make_session_factory(async_engine)
|
||||||
async with maker() as session:
|
async with maker() as session:
|
||||||
await index_google_calendar_events(
|
await index_google_calendar_events(
|
||||||
|
|
@ -104,17 +102,25 @@ async def test_composio_calendar_uses_composio_credentials(
|
||||||
user_id=data["user_id"],
|
user_id=data["user_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
mock_composio_service_cls.assert_called_once()
|
||||||
mock_cal_cls.assert_called_once()
|
mock_composio_service.get_calendar_events.assert_called_once_with(
|
||||||
_, kwargs = mock_cal_cls.call_args
|
connected_account_id=_COMPOSIO_ACCOUNT_ID,
|
||||||
assert kwargs.get("credentials") is mock_creds
|
entity_id=f"surfsense_{data['user_id']}",
|
||||||
|
time_min=ANY,
|
||||||
|
time_max=ANY,
|
||||||
|
max_results=250,
|
||||||
|
)
|
||||||
|
mock_cal_cls.assert_not_called()
|
||||||
|
mock_get_access_token.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch(_GET_ACCESS_TOKEN)
|
||||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||||
async def test_composio_calendar_without_account_id_returns_error(
|
async def test_composio_calendar_without_account_id_returns_error(
|
||||||
mock_build_creds,
|
mock_composio_service_cls,
|
||||||
mock_tl_cls,
|
mock_tl_cls,
|
||||||
|
mock_get_access_token,
|
||||||
async_engine,
|
async_engine,
|
||||||
composio_calendar_no_id,
|
composio_calendar_no_id,
|
||||||
):
|
):
|
||||||
|
|
@ -138,20 +144,23 @@ async def test_composio_calendar_without_account_id_returns_error(
|
||||||
assert count == 0
|
assert count == 0
|
||||||
assert error is not None
|
assert error is not None
|
||||||
assert "composio" in error.lower()
|
assert "composio" in error.lower()
|
||||||
mock_build_creds.assert_not_called()
|
mock_composio_service_cls.assert_not_called()
|
||||||
|
mock_get_access_token.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch(_GET_ACCESS_TOKEN)
|
||||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||||
|
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||||
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
||||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
async def test_native_calendar_uses_google_calendar_connector(
|
||||||
async def test_native_calendar_does_not_use_composio_credentials(
|
|
||||||
mock_build_creds,
|
|
||||||
mock_cal_cls,
|
mock_cal_cls,
|
||||||
|
mock_composio_service_cls,
|
||||||
mock_tl_cls,
|
mock_tl_cls,
|
||||||
|
mock_get_access_token,
|
||||||
async_engine,
|
async_engine,
|
||||||
native_calendar,
|
native_calendar,
|
||||||
):
|
):
|
||||||
"""Calendar indexer does NOT call build_composio_credentials for a native connector."""
|
"""Native Calendar connector uses GoogleCalendarConnector with no Composio path."""
|
||||||
from app.tasks.connector_indexers.google_calendar_indexer import (
|
from app.tasks.connector_indexers.google_calendar_indexer import (
|
||||||
index_google_calendar_events,
|
index_google_calendar_events,
|
||||||
)
|
)
|
||||||
|
|
@ -174,4 +183,6 @@ async def test_native_calendar_does_not_use_composio_credentials(
|
||||||
user_id=data["user_id"],
|
user_id=data["user_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_build_creds.assert_not_called()
|
mock_cal_cls.assert_called_once()
|
||||||
|
mock_composio_service_cls.assert_not_called()
|
||||||
|
mock_get_access_token.assert_not_called()
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ mocked at their system boundaries.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
|
|
@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
|
||||||
|
|
||||||
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
|
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
|
||||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
|
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
|
||||||
|
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
|
||||||
|
|
||||||
|
|
||||||
@pytest_asyncio.fixture
|
@pytest_asyncio.fixture
|
||||||
|
|
@ -69,30 +70,32 @@ async def native_gmail(async_engine):
|
||||||
await cleanup_space(async_engine, data["search_space_id"])
|
await cleanup_space(async_engine, data["search_space_id"])
|
||||||
|
|
||||||
|
|
||||||
|
@patch(_GET_ACCESS_TOKEN)
|
||||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||||
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
||||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||||
async def test_composio_gmail_uses_composio_credentials(
|
async def test_composio_gmail_uses_composio_service(
|
||||||
mock_build_creds,
|
mock_composio_service_cls,
|
||||||
mock_gmail_cls,
|
mock_gmail_cls,
|
||||||
mock_tl_cls,
|
mock_tl_cls,
|
||||||
|
mock_get_access_token,
|
||||||
async_engine,
|
async_engine,
|
||||||
composio_gmail,
|
composio_gmail,
|
||||||
):
|
):
|
||||||
"""Gmail indexer calls build_composio_credentials for a Composio connector."""
|
"""Gmail indexer uses Composio tools directly for a Composio connector."""
|
||||||
from app.tasks.connector_indexers.google_gmail_indexer import (
|
from app.tasks.connector_indexers.google_gmail_indexer import (
|
||||||
index_google_gmail_messages,
|
index_google_gmail_messages,
|
||||||
)
|
)
|
||||||
|
|
||||||
data = composio_gmail
|
data = composio_gmail
|
||||||
mock_creds = MagicMock(name="composio-creds")
|
mock_composio_service = MagicMock()
|
||||||
mock_build_creds.return_value = mock_creds
|
mock_composio_service.get_gmail_messages = AsyncMock(
|
||||||
|
return_value=([], None, None, None)
|
||||||
|
)
|
||||||
|
mock_composio_service.get_gmail_message_detail = AsyncMock(return_value=({}, None))
|
||||||
|
mock_composio_service_cls.return_value = mock_composio_service
|
||||||
mock_tl_cls.return_value = mock_task_logger()
|
mock_tl_cls.return_value = mock_task_logger()
|
||||||
|
|
||||||
mock_gmail_instance = MagicMock()
|
|
||||||
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
|
|
||||||
mock_gmail_cls.return_value = mock_gmail_instance
|
|
||||||
|
|
||||||
maker = make_session_factory(async_engine)
|
maker = make_session_factory(async_engine)
|
||||||
async with maker() as session:
|
async with maker() as session:
|
||||||
await index_google_gmail_messages(
|
await index_google_gmail_messages(
|
||||||
|
|
@ -102,17 +105,25 @@ async def test_composio_gmail_uses_composio_credentials(
|
||||||
user_id=data["user_id"],
|
user_id=data["user_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
mock_composio_service_cls.assert_called_once()
|
||||||
mock_gmail_cls.assert_called_once()
|
mock_composio_service.get_gmail_messages.assert_called_once_with(
|
||||||
args, _ = mock_gmail_cls.call_args
|
connected_account_id=_COMPOSIO_ACCOUNT_ID,
|
||||||
assert args[0] is mock_creds
|
entity_id=f"surfsense_{data['user_id']}",
|
||||||
|
query=ANY,
|
||||||
|
max_results=ANY,
|
||||||
|
page_token=None,
|
||||||
|
)
|
||||||
|
mock_gmail_cls.assert_not_called()
|
||||||
|
mock_get_access_token.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch(_GET_ACCESS_TOKEN)
|
||||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||||
async def test_composio_gmail_without_account_id_returns_error(
|
async def test_composio_gmail_without_account_id_returns_error(
|
||||||
mock_build_creds,
|
mock_composio_service_cls,
|
||||||
mock_tl_cls,
|
mock_tl_cls,
|
||||||
|
mock_get_access_token,
|
||||||
async_engine,
|
async_engine,
|
||||||
composio_gmail_no_id,
|
composio_gmail_no_id,
|
||||||
):
|
):
|
||||||
|
|
@ -136,20 +147,23 @@ async def test_composio_gmail_without_account_id_returns_error(
|
||||||
assert count == 0
|
assert count == 0
|
||||||
assert error is not None
|
assert error is not None
|
||||||
assert "composio" in error.lower()
|
assert "composio" in error.lower()
|
||||||
mock_build_creds.assert_not_called()
|
mock_composio_service_cls.assert_not_called()
|
||||||
|
mock_get_access_token.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@patch(_GET_ACCESS_TOKEN)
|
||||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||||
|
@patch(f"{_INDEXER_MODULE}.ComposioService")
|
||||||
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
||||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
async def test_native_gmail_uses_google_gmail_connector(
|
||||||
async def test_native_gmail_does_not_use_composio_credentials(
|
|
||||||
mock_build_creds,
|
|
||||||
mock_gmail_cls,
|
mock_gmail_cls,
|
||||||
|
mock_composio_service_cls,
|
||||||
mock_tl_cls,
|
mock_tl_cls,
|
||||||
|
mock_get_access_token,
|
||||||
async_engine,
|
async_engine,
|
||||||
native_gmail,
|
native_gmail,
|
||||||
):
|
):
|
||||||
"""Gmail indexer does NOT call build_composio_credentials for a native connector."""
|
"""Native Gmail connector uses GoogleGmailConnector with no Composio path."""
|
||||||
from app.tasks.connector_indexers.google_gmail_indexer import (
|
from app.tasks.connector_indexers.google_gmail_indexer import (
|
||||||
index_google_gmail_messages,
|
index_google_gmail_messages,
|
||||||
)
|
)
|
||||||
|
|
@ -170,4 +184,6 @@ async def test_native_gmail_does_not_use_composio_credentials(
|
||||||
user_id=data["user_id"],
|
user_id=data["user_id"],
|
||||||
)
|
)
|
||||||
|
|
||||||
mock_build_creds.assert_not_called()
|
mock_gmail_cls.assert_called_once()
|
||||||
|
mock_composio_service_cls.assert_not_called()
|
||||||
|
mock_get_access_token.assert_not_called()
|
||||||
|
|
|
||||||
|
|
@ -200,7 +200,7 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
|
||||||
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
|
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
|
||||||
"""Reindexing replaces old chunks with new content rather than appending."""
|
"""Reindexing replaces old chunks with new content rather than appending."""
|
||||||
mocker.patch(
|
mocker.patch(
|
||||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||||
side_effect=[["Original chunk."], ["Updated chunk."]],
|
side_effect=[["Original chunk."], ["Updated chunk."]],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,12 @@ def _make_orm_doc(connector_doc, doc_id):
|
||||||
async def test_index_calls_embed_and_chunk_via_to_thread(
|
async def test_index_calls_embed_and_chunk_via_to_thread(
|
||||||
pipeline, make_connector_document, monkeypatch
|
pipeline, make_connector_document, monkeypatch
|
||||||
):
|
):
|
||||||
"""index() runs embed_texts and the chunker via asyncio.to_thread, not blocking the loop."""
|
"""index() runs the chunker and embed_texts via asyncio.to_thread, not blocking the loop.
|
||||||
|
|
||||||
|
Routing between ``chunk_text`` (code path) and ``chunk_text_hybrid`` (default
|
||||||
|
path, see issue #1334) is verified separately in
|
||||||
|
``test_non_code_documents_use_hybrid_chunker``.
|
||||||
|
"""
|
||||||
to_thread_calls = []
|
to_thread_calls = []
|
||||||
original_to_thread = asyncio.to_thread
|
original_to_thread = asyncio.to_thread
|
||||||
|
|
||||||
|
|
@ -51,12 +56,6 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
||||||
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||||
AsyncMock(return_value="Summary."),
|
AsyncMock(return_value="Summary."),
|
||||||
)
|
)
|
||||||
mock_chunk = MagicMock(return_value=["chunk1"])
|
|
||||||
mock_chunk.__name__ = "chunk_text"
|
|
||||||
monkeypatch.setattr(
|
|
||||||
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
|
||||||
mock_chunk,
|
|
||||||
)
|
|
||||||
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||||
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
|
|
@ -71,6 +70,11 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
||||||
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||||
mock_embed,
|
mock_embed,
|
||||||
)
|
)
|
||||||
|
# Bypass set_committed_value, which requires a real ORM instance (not MagicMock).
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.attach_chunks_to_document",
|
||||||
|
MagicMock(),
|
||||||
|
)
|
||||||
|
|
||||||
connector_doc = make_connector_document(
|
connector_doc = make_connector_document(
|
||||||
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
|
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
|
||||||
|
|
@ -83,11 +87,62 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
|
||||||
|
|
||||||
await pipeline.index(document, connector_doc, llm=MagicMock())
|
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||||
|
|
||||||
# Non-code documents now route through the table-aware hybrid chunker
|
# Either chunker entry point satisfies the "chunking runs off the event
|
||||||
# (see commit 2f3a33c9). Either chunker entry point satisfies the
|
# loop" contract this test guards. Routing between the two is verified
|
||||||
# "chunking runs off the event loop" contract this test guards.
|
# in test_non_code_documents_use_hybrid_chunker.
|
||||||
assert {"chunk_text", "chunk_text_hybrid"} & set(to_thread_calls)
|
assert {"chunk_text", "chunk_text_hybrid"} & set(to_thread_calls)
|
||||||
assert "embed_texts" in to_thread_calls
|
assert "embed_texts" in to_thread_calls
|
||||||
|
assert document.status == DocumentStatus.ready()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_non_code_documents_use_hybrid_chunker(
|
||||||
|
pipeline, make_connector_document, monkeypatch
|
||||||
|
):
|
||||||
|
"""Non-code documents route through ``chunk_text_hybrid`` (issue #1334).
|
||||||
|
|
||||||
|
The hybrid chunker preserves Markdown table integrity by avoiding splits
|
||||||
|
mid-row. Only documents flagged with ``should_use_code_chunker=True``
|
||||||
|
should take the ``chunk_text`` path.
|
||||||
|
"""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
|
||||||
|
AsyncMock(return_value="Summary."),
|
||||||
|
)
|
||||||
|
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
|
||||||
|
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
|
||||||
|
mock_chunk_hybrid,
|
||||||
|
)
|
||||||
|
mock_chunk_code = MagicMock(return_value=["chunk1"])
|
||||||
|
mock_chunk_code.__name__ = "chunk_text"
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
|
||||||
|
mock_chunk_code,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
|
||||||
|
MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.indexing_pipeline.indexing_pipeline_service.attach_chunks_to_document",
|
||||||
|
MagicMock(),
|
||||||
|
)
|
||||||
|
|
||||||
|
connector_doc = make_connector_document(
|
||||||
|
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
|
||||||
|
unique_id="msg-1",
|
||||||
|
search_space_id=1,
|
||||||
|
should_use_code_chunker=False,
|
||||||
|
)
|
||||||
|
document = MagicMock(spec=Document)
|
||||||
|
document.id = 1
|
||||||
|
document.status = DocumentStatus.pending()
|
||||||
|
|
||||||
|
await pipeline.index(document, connector_doc, llm=MagicMock())
|
||||||
|
|
||||||
|
mock_chunk_hybrid.assert_called_once()
|
||||||
|
mock_chunk_code.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
def _mock_session_factory(orm_docs_by_id):
|
def _mock_session_factory(orm_docs_by_id):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue