diff --git a/surfsense_backend/tests/integration/google_unification/__init__.py b/surfsense_backend/tests/integration/google_unification/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/integration/google_unification/conftest.py b/surfsense_backend/tests/integration/google_unification/conftest.py new file mode 100644 index 000000000..768bab84b --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/conftest.py @@ -0,0 +1,328 @@ +"""Shared fixtures for Google unification integration tests.""" + +from __future__ import annotations + +import uuid +from contextlib import asynccontextmanager +from datetime import UTC, datetime +from unittest.mock import MagicMock + +import pytest +import pytest_asyncio +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from app.config import config as app_config +from app.db import ( + Chunk, + Document, + DocumentType, + SearchSourceConnector, + SearchSourceConnectorType, + SearchSpace, + User, +) + +EMBEDDING_DIM = app_config.embedding_model_instance.dimension +DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM + + +def make_document( + *, + title: str, + document_type: DocumentType, + content: str, + search_space_id: int, + created_by_id: str, +) -> Document: + """Build a Document instance with unique hashes and a dummy embedding.""" + uid = uuid.uuid4().hex[:12] + return Document( + title=title, + document_type=document_type, + content=content, + content_hash=f"content-{uid}", + unique_identifier_hash=f"uid-{uid}", + source_markdown=content, + search_space_id=search_space_id, + created_by_id=created_by_id, + embedding=DUMMY_EMBEDDING, + updated_at=datetime.now(UTC), + status={"state": "ready"}, + ) + + +def make_chunk(*, content: str, document_id: int) -> Chunk: + return Chunk( + content=content, + document_id=document_id, + embedding=DUMMY_EMBEDDING, + ) + + +# --------------------------------------------------------------------------- +# Savepoint-based fixture (used by retriever tests that receive db_session) +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def seed_google_docs( + db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace" +): + """Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc. + + Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``, + plus ``search_space`` and ``user``. + """ + user_id = str(db_user.id) + space_id = db_search_space.id + + native_doc = make_document( + title="Native Drive Document", + document_type=DocumentType.GOOGLE_DRIVE_FILE, + content="quarterly report from native google drive connector", + search_space_id=space_id, + created_by_id=user_id, + ) + legacy_doc = make_document( + title="Legacy Composio Drive Document", + document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + content="quarterly report from composio google drive connector", + search_space_id=space_id, + created_by_id=user_id, + ) + file_doc = make_document( + title="Uploaded PDF", + document_type=DocumentType.FILE, + content="unrelated uploaded file about quarterly reports", + search_space_id=space_id, + created_by_id=user_id, + ) + + db_session.add_all([native_doc, legacy_doc, file_doc]) + await db_session.flush() + + native_chunk = make_chunk( + content="quarterly report from native google drive connector", + document_id=native_doc.id, + ) + legacy_chunk = make_chunk( + content="quarterly report from composio google drive connector", + document_id=legacy_doc.id, + ) + file_chunk = make_chunk( + content="unrelated uploaded file about quarterly reports", + document_id=file_doc.id, + ) + + db_session.add_all([native_chunk, legacy_chunk, file_chunk]) + await db_session.flush() + + return { + "native_doc": native_doc, + "legacy_doc": legacy_doc, + "file_doc": file_doc, + "search_space": db_search_space, + "user": db_user, + } + + +# --------------------------------------------------------------------------- +# Committed-data fixture (used by service / browse tests that create their +# own sessions internally and therefore cannot see savepoint-scoped data) +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def committed_google_data(async_engine): + """Insert native, legacy, and FILE docs via a committed transaction. + + Yields ``{"search_space_id": int, "user_id": str}``. + Cleans up by deleting the search space (cascades to documents / chunks). + """ + space_id = None + + async with async_engine.begin() as conn: + session = AsyncSession(bind=conn, expire_on_commit=False) + + user = User( + id=uuid.uuid4(), + email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net", + hashed_password="hashed", + is_active=True, + is_superuser=False, + is_verified=True, + ) + session.add(user) + await session.flush() + + space = SearchSpace( + name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id + ) + session.add(space) + await session.flush() + space_id = space.id + user_id = str(user.id) + + native_doc = make_document( + title="Native Drive Doc", + document_type=DocumentType.GOOGLE_DRIVE_FILE, + content="quarterly budget from native google drive", + search_space_id=space_id, + created_by_id=user_id, + ) + legacy_doc = make_document( + title="Legacy Composio Drive Doc", + document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + content="quarterly budget from composio google drive", + search_space_id=space_id, + created_by_id=user_id, + ) + file_doc = make_document( + title="Plain File", + document_type=DocumentType.FILE, + content="quarterly budget uploaded as file", + search_space_id=space_id, + created_by_id=user_id, + ) + session.add_all([native_doc, legacy_doc, file_doc]) + await session.flush() + + for doc in [native_doc, legacy_doc, file_doc]: + session.add( + Chunk( + content=doc.content, + document_id=doc.id, + embedding=DUMMY_EMBEDDING, + ) + ) + await session.flush() + + yield {"search_space_id": space_id, "user_id": user_id} + + async with async_engine.begin() as conn: + await conn.execute( + text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id} + ) + + +# --------------------------------------------------------------------------- +# Monkeypatch fixtures for system boundaries +# --------------------------------------------------------------------------- + + +@pytest.fixture +def patched_session_factory(async_engine, monkeypatch): + """Replace ``async_session_maker`` in connector_service with one bound to the test engine.""" + test_maker = async_sessionmaker(async_engine, expire_on_commit=False) + monkeypatch.setattr("app.services.connector_service.async_session_maker", test_maker) + return test_maker + + +@pytest.fixture +def patched_embed(monkeypatch): + """Mock the embedding model (system boundary) to return a fixed vector.""" + mock = MagicMock(return_value=DUMMY_EMBEDDING) + monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock) + return mock + + +@pytest.fixture +def patched_shielded_session(async_engine, monkeypatch): + """Replace ``shielded_async_session`` in the knowledge_base module + with one that yields sessions from the test engine.""" + test_maker = async_sessionmaker(async_engine, expire_on_commit=False) + + @asynccontextmanager + async def _test_shielded(): + async with test_maker() as session: + yield session + + monkeypatch.setattr( + "app.agents.new_chat.tools.knowledge_base.shielded_async_session", + _test_shielded, + ) + + +# --------------------------------------------------------------------------- +# Indexer test helpers +# --------------------------------------------------------------------------- + + +def make_session_factory(async_engine): + """Create a session factory bound to the test engine.""" + return async_sessionmaker(async_engine, expire_on_commit=False) + + +def mock_task_logger(): + """Return a fully-mocked TaskLoggingService with async methods.""" + from unittest.mock import AsyncMock, MagicMock + + mock = AsyncMock() + mock.log_task_start = AsyncMock(return_value=MagicMock()) + mock.log_task_progress = AsyncMock() + mock.log_task_failure = AsyncMock() + mock.log_task_success = AsyncMock() + return mock + + +async def seed_connector( + async_engine, + *, + connector_type: "SearchSourceConnectorType", + config: dict, + name_prefix: str = "test", +): + """Seed a connector with committed data. Returns dict and cleanup function. + + Yields ``{"connector_id", "search_space_id", "user_id"}``. + """ + space_id = None + + async with async_engine.begin() as conn: + session = AsyncSession(bind=conn, expire_on_commit=False) + + user = User( + id=uuid.uuid4(), + email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net", + hashed_password="hashed", + is_active=True, + is_superuser=False, + is_verified=True, + ) + session.add(user) + await session.flush() + + space = SearchSpace( + name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id + ) + session.add(space) + await session.flush() + space_id = space.id + + connector = SearchSourceConnector( + name=f"{name_prefix} connector", + connector_type=connector_type, + is_indexable=True, + config=config, + search_space_id=space_id, + user_id=user.id, + ) + session.add(connector) + await session.flush() + connector_id = connector.id + user_id = str(user.id) + + return { + "connector_id": connector_id, + "search_space_id": space_id, + "user_id": user_id, + } + + +async def cleanup_space(async_engine, space_id: int): + """Delete a search space (cascades to connectors/documents).""" + async with async_engine.begin() as conn: + await conn.execute( + text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id} + ) diff --git a/surfsense_backend/tests/integration/google_unification/test_browse_includes_legacy_docs.py b/surfsense_backend/tests/integration/google_unification/test_browse_includes_legacy_docs.py new file mode 100644 index 000000000..fc2fec5a8 --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/test_browse_includes_legacy_docs.py @@ -0,0 +1,44 @@ +"""Integration test: _browse_recent_documents returns docs of multiple types. + +Exercises the browse path (degenerate-query fallback) with a real PostgreSQL +database. Verifies that passing a list of document types correctly returns +documents of all listed types -- the same ``.in_()`` SQL path used by hybrid +search but through the browse/recency-ordered code path. +""" + +from __future__ import annotations + +import pytest + +pytestmark = pytest.mark.integration + + +async def test_browse_recent_documents_with_list_type_returns_both( + committed_google_data, patched_shielded_session +): + """_browse_recent_documents returns docs of all types when given a list.""" + from app.agents.new_chat.tools.knowledge_base import _browse_recent_documents + + space_id = committed_google_data["search_space_id"] + + results = await _browse_recent_documents( + search_space_id=space_id, + document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"], + top_k=10, + start_date=None, + end_date=None, + ) + + returned_types = set() + for doc in results: + doc_info = doc.get("document", {}) + dtype = doc_info.get("document_type") + if dtype: + returned_types.add(dtype) + + assert "GOOGLE_DRIVE_FILE" in returned_types, ( + "Native Drive docs should appear in browse results" + ) + assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, ( + "Legacy Composio Drive docs should appear in browse results" + ) diff --git a/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py new file mode 100644 index 000000000..b5d60e55b --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/test_calendar_indexer_credentials.py @@ -0,0 +1,143 @@ +"""Integration tests: Calendar indexer credential resolution for Composio vs native connectors. + +Exercises ``index_google_calendar_events`` with a real PostgreSQL database +containing seeded connector records. Google API and Composio SDK are +mocked at their system boundaries. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import pytest_asyncio + +from app.db import SearchSourceConnectorType + +from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector + +pytestmark = pytest.mark.integration + +_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789" +_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer" + + +@pytest_asyncio.fixture +async def composio_calendar(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR, + config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID}, + name_prefix="cal-composio", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@pytest_asyncio.fixture +async def composio_calendar_no_id(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR, + config={}, + name_prefix="cal-noid", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@pytest_asyncio.fixture +async def native_calendar(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR, + config={ + "token": "fake", "refresh_token": "fake", + "client_id": "fake", "client_secret": "fake", + "token_uri": "https://oauth2.googleapis.com/token", + }, + name_prefix="cal-native", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_composio_calendar_uses_composio_credentials( + mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, composio_calendar, +): + """Calendar indexer calls build_composio_credentials for a Composio connector.""" + from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events + + data = composio_calendar + mock_creds = MagicMock(name="composio-creds") + mock_build_creds.return_value = mock_creds + mock_tl_cls.return_value = mock_task_logger() + + mock_cal_instance = MagicMock() + mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None)) + mock_cal_cls.return_value = mock_cal_instance + + maker = make_session_factory(async_engine) + async with maker() as session: + await index_google_calendar_events( + session=session, connector_id=data["connector_id"], + search_space_id=data["search_space_id"], user_id=data["user_id"], + ) + + mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID) + mock_cal_cls.assert_called_once() + _, kwargs = mock_cal_cls.call_args + assert kwargs.get("credentials") is mock_creds + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_composio_calendar_without_account_id_returns_error( + mock_build_creds, mock_tl_cls, async_engine, composio_calendar_no_id, +): + """Calendar indexer returns error when Composio connector lacks connected_account_id.""" + from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events + + data = composio_calendar_no_id + mock_tl_cls.return_value = mock_task_logger() + + maker = make_session_factory(async_engine) + async with maker() as session: + count, error = await index_google_calendar_events( + session=session, connector_id=data["connector_id"], + search_space_id=data["search_space_id"], user_id=data["user_id"], + ) + + assert count == 0 + assert error is not None + assert "composio" in error.lower() + mock_build_creds.assert_not_called() + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_native_calendar_does_not_use_composio_credentials( + mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, native_calendar, +): + """Calendar indexer does NOT call build_composio_credentials for a native connector.""" + from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events + + data = native_calendar + mock_tl_cls.return_value = mock_task_logger() + + mock_cal_instance = MagicMock() + mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None)) + mock_cal_cls.return_value = mock_cal_instance + + maker = make_session_factory(async_engine) + async with maker() as session: + await index_google_calendar_events( + session=session, connector_id=data["connector_id"], + search_space_id=data["search_space_id"], user_id=data["user_id"], + ) + + mock_build_creds.assert_not_called() diff --git a/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py new file mode 100644 index 000000000..4bcbed730 --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py @@ -0,0 +1,167 @@ +"""Integration tests: Drive indexer credential resolution for Composio vs native connectors. + +Exercises ``index_google_drive_files`` with a real PostgreSQL database +containing seeded connector records. Google API and Composio SDK are +mocked at their system boundaries. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import pytest_asyncio + +from app.db import SearchSourceConnectorType + +from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector + +pytestmark = pytest.mark.integration + +_COMPOSIO_ACCOUNT_ID = "composio-test-account-123" +_INDEXER_MODULE = "app.tasks.connector_indexers.google_drive_indexer" + + +@pytest_asyncio.fixture +async def committed_drive_connector(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID}, + name_prefix="drive-composio", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@pytest_asyncio.fixture +async def committed_native_drive_connector(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR, + config={ + "token": "fake-token", + "refresh_token": "fake-refresh", + "client_id": "fake-client-id", + "client_secret": "fake-secret", + "token_uri": "https://oauth2.googleapis.com/token", + }, + name_prefix="drive-native", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@pytest_asyncio.fixture +async def committed_composio_no_account_id(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, + config={}, + name_prefix="drive-noid", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.GoogleDriveClient") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_composio_connector_uses_composio_credentials( + mock_build_creds, + mock_client_cls, + mock_task_logger_cls, + async_engine, + committed_drive_connector, +): + """Drive indexer calls build_composio_credentials for a Composio connector + and passes the result to GoogleDriveClient.""" + from app.tasks.connector_indexers.google_drive_indexer import ( + index_google_drive_files, + ) + + data = committed_drive_connector + mock_creds = MagicMock(name="composio-credentials") + mock_build_creds.return_value = mock_creds + mock_task_logger_cls.return_value = mock_task_logger() + + maker = make_session_factory(async_engine) + async with maker() as session: + await index_google_drive_files( + session=session, + connector_id=data["connector_id"], + search_space_id=data["search_space_id"], + user_id=data["user_id"], + folder_id="test-folder-id", + ) + + mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID) + mock_client_cls.assert_called_once() + _, kwargs = mock_client_cls.call_args + assert kwargs.get("credentials") is mock_creds + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_composio_connector_without_account_id_returns_error( + mock_build_creds, + mock_task_logger_cls, + async_engine, + committed_composio_no_account_id, +): + """Drive indexer returns an error when Composio connector lacks connected_account_id.""" + from app.tasks.connector_indexers.google_drive_indexer import ( + index_google_drive_files, + ) + + data = committed_composio_no_account_id + mock_task_logger_cls.return_value = mock_task_logger() + + maker = make_session_factory(async_engine) + async with maker() as session: + count, error = await index_google_drive_files( + session=session, + connector_id=data["connector_id"], + search_space_id=data["search_space_id"], + user_id=data["user_id"], + folder_id="test-folder-id", + ) + + assert count == 0 + assert error is not None + assert "composio_connected_account_id" in error.lower() or "composio" in error.lower() + mock_build_creds.assert_not_called() + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.GoogleDriveClient") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_native_connector_does_not_use_composio_credentials( + mock_build_creds, + mock_client_cls, + mock_task_logger_cls, + async_engine, + committed_native_drive_connector, +): + """Drive indexer does NOT call build_composio_credentials for a native connector.""" + from app.tasks.connector_indexers.google_drive_indexer import ( + index_google_drive_files, + ) + + data = committed_native_drive_connector + mock_task_logger_cls.return_value = mock_task_logger() + + maker = make_session_factory(async_engine) + async with maker() as session: + await index_google_drive_files( + session=session, + connector_id=data["connector_id"], + search_space_id=data["search_space_id"], + user_id=data["user_id"], + folder_id="test-folder-id", + ) + + mock_build_creds.assert_not_called() + mock_client_cls.assert_called_once() + _, kwargs = mock_client_cls.call_args + assert kwargs.get("credentials") is None diff --git a/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py new file mode 100644 index 000000000..8fbf1f320 --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/test_gmail_indexer_credentials.py @@ -0,0 +1,143 @@ +"""Integration tests: Gmail indexer credential resolution for Composio vs native connectors. + +Exercises ``index_google_gmail_messages`` with a real PostgreSQL database +containing seeded connector records. Google API and Composio SDK are +mocked at their system boundaries. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import pytest_asyncio + +from app.db import SearchSourceConnectorType + +from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector + +pytestmark = pytest.mark.integration + +_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456" +_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer" + + +@pytest_asyncio.fixture +async def composio_gmail(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR, + config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID}, + name_prefix="gmail-composio", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@pytest_asyncio.fixture +async def composio_gmail_no_id(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR, + config={}, + name_prefix="gmail-noid", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@pytest_asyncio.fixture +async def native_gmail(async_engine): + data = await seed_connector( + async_engine, + connector_type=SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR, + config={ + "token": "fake", "refresh_token": "fake", + "client_id": "fake", "client_secret": "fake", + "token_uri": "https://oauth2.googleapis.com/token", + }, + name_prefix="gmail-native", + ) + yield data + await cleanup_space(async_engine, data["search_space_id"]) + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_composio_gmail_uses_composio_credentials( + mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, composio_gmail, +): + """Gmail indexer calls build_composio_credentials for a Composio connector.""" + from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages + + data = composio_gmail + mock_creds = MagicMock(name="composio-creds") + mock_build_creds.return_value = mock_creds + mock_tl_cls.return_value = mock_task_logger() + + mock_gmail_instance = MagicMock() + mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None)) + mock_gmail_cls.return_value = mock_gmail_instance + + maker = make_session_factory(async_engine) + async with maker() as session: + await index_google_gmail_messages( + session=session, connector_id=data["connector_id"], + search_space_id=data["search_space_id"], user_id=data["user_id"], + ) + + mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID) + mock_gmail_cls.assert_called_once() + args, _ = mock_gmail_cls.call_args + assert args[0] is mock_creds + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_composio_gmail_without_account_id_returns_error( + mock_build_creds, mock_tl_cls, async_engine, composio_gmail_no_id, +): + """Gmail indexer returns error when Composio connector lacks connected_account_id.""" + from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages + + data = composio_gmail_no_id + mock_tl_cls.return_value = mock_task_logger() + + maker = make_session_factory(async_engine) + async with maker() as session: + count, error = await index_google_gmail_messages( + session=session, connector_id=data["connector_id"], + search_space_id=data["search_space_id"], user_id=data["user_id"], + ) + + assert count == 0 + assert error is not None + assert "composio" in error.lower() + mock_build_creds.assert_not_called() + + +@patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector") +@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +async def test_native_gmail_does_not_use_composio_credentials( + mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, native_gmail, +): + """Gmail indexer does NOT call build_composio_credentials for a native connector.""" + from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages + + data = native_gmail + mock_tl_cls.return_value = mock_task_logger() + + mock_gmail_instance = MagicMock() + mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None)) + mock_gmail_cls.return_value = mock_gmail_instance + + maker = make_session_factory(async_engine) + async with maker() as session: + await index_google_gmail_messages( + session=session, connector_id=data["connector_id"], + search_space_id=data["search_space_id"], user_id=data["user_id"], + ) + + mock_build_creds.assert_not_called() diff --git a/surfsense_backend/tests/integration/google_unification/test_hybrid_search_type_filtering.py b/surfsense_backend/tests/integration/google_unification/test_hybrid_search_type_filtering.py new file mode 100644 index 000000000..bd84d5bb3 --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/test_hybrid_search_type_filtering.py @@ -0,0 +1,76 @@ +"""Integration tests: hybrid search correctly filters by document type lists. + +These tests exercise the public ``hybrid_search`` method on +``ChucksHybridSearchRetriever`` with a real PostgreSQL database. +They verify that the ``.in_()`` SQL path works for list-of-types filtering, +which is the foundation of the Google unification changes. +""" + +import pytest + +from app.config import config as app_config +from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever + +from .conftest import DUMMY_EMBEDDING + +pytestmark = pytest.mark.integration + + +async def test_list_of_types_returns_both_matching_doc_types( + db_session, seed_google_docs +): + """Searching with a list of document types returns documents of ALL listed types.""" + space_id = seed_google_docs["search_space"].id + + retriever = ChucksHybridSearchRetriever(db_session) + results = await retriever.hybrid_search( + query_text="quarterly report", + top_k=10, + search_space_id=space_id, + document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"], + query_embedding=DUMMY_EMBEDDING, + ) + + returned_types = { + r["document"]["document_type"] for r in results if r.get("document") + } + assert "GOOGLE_DRIVE_FILE" in returned_types + assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types + assert "FILE" not in returned_types + + +async def test_single_string_type_returns_only_that_type( + db_session, seed_google_docs +): + """Searching with a single string type returns only documents of that exact type.""" + space_id = seed_google_docs["search_space"].id + + retriever = ChucksHybridSearchRetriever(db_session) + results = await retriever.hybrid_search( + query_text="quarterly report", + top_k=10, + search_space_id=space_id, + document_type="GOOGLE_DRIVE_FILE", + query_embedding=DUMMY_EMBEDDING, + ) + + returned_types = { + r["document"]["document_type"] for r in results if r.get("document") + } + assert returned_types == {"GOOGLE_DRIVE_FILE"} + + +async def test_all_invalid_types_returns_empty(db_session, seed_google_docs): + """Searching with a list of nonexistent types returns an empty list, no exceptions.""" + space_id = seed_google_docs["search_space"].id + + retriever = ChucksHybridSearchRetriever(db_session) + results = await retriever.hybrid_search( + query_text="quarterly report", + top_k=10, + search_space_id=space_id, + document_type=["NONEXISTENT_TYPE"], + query_embedding=DUMMY_EMBEDDING, + ) + + assert results == [] diff --git a/surfsense_backend/tests/integration/google_unification/test_search_includes_legacy_docs.py b/surfsense_backend/tests/integration/google_unification/test_search_includes_legacy_docs.py new file mode 100644 index 000000000..a05ff04d0 --- /dev/null +++ b/surfsense_backend/tests/integration/google_unification/test_search_includes_legacy_docs.py @@ -0,0 +1,73 @@ +"""Integration tests: ConnectorService search transparently includes legacy Composio docs. + +These tests exercise ``ConnectorService.search_google_drive`` and +``ConnectorService.search_files`` through a real PostgreSQL database. +They verify that the legacy-type alias expansion works end-to-end: +searching for native Google Drive docs also returns old Composio-typed docs. +""" + +from __future__ import annotations + +import pytest + +from app.services.connector_service import ConnectorService + +pytestmark = pytest.mark.integration + + +async def test_search_google_drive_includes_legacy_composio_docs( + async_engine, committed_google_data, patched_session_factory, patched_embed +): + """search_google_drive returns both GOOGLE_DRIVE_FILE and COMPOSIO_GOOGLE_DRIVE_CONNECTOR docs.""" + space_id = committed_google_data["search_space_id"] + + async with patched_session_factory() as session: + service = ConnectorService(session, search_space_id=space_id) + result_object, raw_docs = await service.search_google_drive( + user_query="quarterly budget", + search_space_id=space_id, + top_k=10, + ) + + returned_types = set() + for doc in raw_docs: + doc_info = doc.get("document", {}) + dtype = doc_info.get("document_type") + if dtype: + returned_types.add(dtype) + + assert "GOOGLE_DRIVE_FILE" in returned_types, ( + "Native Drive docs should appear in search_google_drive results" + ) + assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, ( + "Legacy Composio Drive docs should appear in search_google_drive results" + ) + assert "FILE" not in returned_types, ( + "Plain FILE docs should NOT appear in search_google_drive results" + ) + + +async def test_search_files_does_not_include_google_types( + async_engine, committed_google_data, patched_session_factory, patched_embed +): + """search_files returns only FILE docs, not Google Drive docs.""" + space_id = committed_google_data["search_space_id"] + + async with patched_session_factory() as session: + service = ConnectorService(session, search_space_id=space_id) + result_object, raw_docs = await service.search_files( + user_query="quarterly budget", + search_space_id=space_id, + top_k=10, + ) + + returned_types = set() + for doc in raw_docs: + doc_info = doc.get("document", {}) + dtype = doc_info.get("document_type") + if dtype: + returned_types.add(dtype) + + if returned_types: + assert "GOOGLE_DRIVE_FILE" not in returned_types + assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" not in returned_types diff --git a/surfsense_backend/tests/unit/google_unification/__init__.py b/surfsense_backend/tests/unit/google_unification/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py b/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py new file mode 100644 index 000000000..988e7854f --- /dev/null +++ b/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py @@ -0,0 +1,57 @@ +"""Unit tests: build_composio_credentials returns valid Google Credentials. + +Mocks the Composio SDK (external system boundary) and verifies that the +returned ``google.oauth2.credentials.Credentials`` object is correctly +configured with a token and a working refresh handler. +""" + +from datetime import datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest + +from google.oauth2.credentials import Credentials + +pytestmark = pytest.mark.unit + + +@patch("app.services.composio_service.ComposioService") +def test_returns_credentials_with_token_and_expiry(MockComposioService): + """build_composio_credentials returns a Credentials object with the Composio access token.""" + mock_service = MagicMock() + mock_service.get_access_token.return_value = "fake-access-token" + MockComposioService.return_value = mock_service + + from app.utils.google_credentials import build_composio_credentials + + creds = build_composio_credentials("test-account-id") + + assert isinstance(creds, Credentials) + assert creds.token == "fake-access-token" + assert creds.expiry is not None + assert creds.expiry > datetime.now(timezone.utc).replace(tzinfo=None) + + +@patch("app.services.composio_service.ComposioService") +def test_refresh_handler_fetches_fresh_token(MockComposioService): + """The refresh_handler on the returned Credentials fetches a new token from Composio.""" + mock_service = MagicMock() + mock_service.get_access_token.side_effect = [ + "initial-token", + "refreshed-token", + ] + MockComposioService.return_value = mock_service + + from app.utils.google_credentials import build_composio_credentials + + creds = build_composio_credentials("test-account-id") + assert creds.token == "initial-token" + + refresh_handler = creds._refresh_handler + assert callable(refresh_handler) + + new_token, new_expiry = refresh_handler(request=None, scopes=None) + + assert new_token == "refreshed-token" + assert new_expiry > datetime.now(timezone.utc).replace(tzinfo=None) + assert mock_service.get_access_token.call_count == 2 diff --git a/surfsense_backend/tests/unit/google_unification/test_connector_credential_acceptance.py b/surfsense_backend/tests/unit/google_unification/test_connector_credential_acceptance.py new file mode 100644 index 000000000..689aea40f --- /dev/null +++ b/surfsense_backend/tests/unit/google_unification/test_connector_credential_acceptance.py @@ -0,0 +1,226 @@ +"""Unit tests: Gmail, Calendar, and Drive connectors accept Composio-sourced credentials. + +These tests exercise the REAL connector code with Composio-style credentials +(token + expiry + refresh_handler, but NO refresh_token / client_id / client_secret). +Only the Google API boundary (``googleapiclient.discovery.build``) is mocked. + +This verifies Phase 2b: the relaxed validation in ``_get_credentials()`` correctly +allows Composio credentials through without raising ValueError or persisting to DB. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta, timezone +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from google.oauth2.credentials import Credentials + +pytestmark = pytest.mark.unit + + +def _utcnow_naive() -> datetime: + """Return current UTC time as a naive datetime (matches google-auth convention).""" + return datetime.now(timezone.utc).replace(tzinfo=None) + + +def _composio_credentials(*, expired: bool = False) -> Credentials: + """Create a Credentials object that mimics build_composio_credentials output.""" + if expired: + expiry = _utcnow_naive() - timedelta(hours=1) + else: + expiry = _utcnow_naive() + timedelta(hours=1) + + def refresh_handler(request, scopes): + return "refreshed-token", _utcnow_naive() + timedelta(hours=1) + + return Credentials( + token="composio-access-token", + expiry=expiry, + refresh_handler=refresh_handler, + ) + + +# --------------------------------------------------------------------------- +# Gmail +# --------------------------------------------------------------------------- + + +@patch("app.connectors.google_gmail_connector.build") +async def test_gmail_accepts_valid_composio_credentials(mock_build): + """GoogleGmailConnector.get_user_profile succeeds with Composio credentials + that have no client_id, client_secret, or refresh_token.""" + from app.connectors.google_gmail_connector import GoogleGmailConnector + + creds = _composio_credentials(expired=False) + + mock_service = MagicMock() + mock_service.users.return_value.getProfile.return_value.execute.return_value = { + "emailAddress": "test@example.com", + "messagesTotal": 42, + "threadsTotal": 10, + "historyId": "12345", + } + mock_build.return_value = mock_service + + connector = GoogleGmailConnector( + creds, session=MagicMock(), user_id="test-user", + ) + + profile, error = await connector.get_user_profile() + + assert error is None + assert profile["email_address"] == "test@example.com" + mock_build.assert_called_once_with("gmail", "v1", credentials=creds) + + +@patch("app.connectors.google_gmail_connector.Request") +@patch("app.connectors.google_gmail_connector.build") +async def test_gmail_refreshes_expired_composio_credentials(mock_build, mock_request_cls): + """GoogleGmailConnector handles expired Composio credentials via refresh_handler + without attempting DB persistence.""" + from app.connectors.google_gmail_connector import GoogleGmailConnector + + creds = _composio_credentials(expired=True) + assert creds.expired + + mock_service = MagicMock() + mock_service.users.return_value.getProfile.return_value.execute.return_value = { + "emailAddress": "test@example.com", + "messagesTotal": 42, + "threadsTotal": 10, + "historyId": "12345", + } + mock_build.return_value = mock_service + + mock_session = AsyncMock() + connector = GoogleGmailConnector( + creds, session=mock_session, user_id="test-user", + ) + + profile, error = await connector.get_user_profile() + + assert error is None + assert profile["email_address"] == "test@example.com" + assert creds.token == "refreshed-token" + assert not creds.expired + mock_session.execute.assert_not_called() + mock_session.commit.assert_not_called() + + +# --------------------------------------------------------------------------- +# Calendar +# --------------------------------------------------------------------------- + + +@patch("app.connectors.google_calendar_connector.build") +async def test_calendar_accepts_valid_composio_credentials(mock_build): + """GoogleCalendarConnector.get_calendars succeeds with Composio credentials + that have no client_id, client_secret, or refresh_token.""" + from app.connectors.google_calendar_connector import GoogleCalendarConnector + + creds = _composio_credentials(expired=False) + + mock_service = MagicMock() + mock_service.calendarList.return_value.list.return_value.execute.return_value = { + "items": [{"id": "primary", "summary": "My Calendar", "primary": True}], + } + mock_build.return_value = mock_service + + connector = GoogleCalendarConnector( + creds, session=MagicMock(), user_id="test-user", + ) + + calendars, error = await connector.get_calendars() + + assert error is None + assert len(calendars) == 1 + assert calendars[0]["summary"] == "My Calendar" + mock_build.assert_called_once_with("calendar", "v3", credentials=creds) + + +@patch("app.connectors.google_calendar_connector.Request") +@patch("app.connectors.google_calendar_connector.build") +async def test_calendar_refreshes_expired_composio_credentials(mock_build, mock_request_cls): + """GoogleCalendarConnector handles expired Composio credentials via refresh_handler + without attempting DB persistence.""" + from app.connectors.google_calendar_connector import GoogleCalendarConnector + + creds = _composio_credentials(expired=True) + assert creds.expired + + mock_service = MagicMock() + mock_service.calendarList.return_value.list.return_value.execute.return_value = { + "items": [{"id": "primary", "summary": "My Calendar", "primary": True}], + } + mock_build.return_value = mock_service + + mock_session = AsyncMock() + connector = GoogleCalendarConnector( + creds, session=mock_session, user_id="test-user", + ) + + calendars, error = await connector.get_calendars() + + assert error is None + assert len(calendars) == 1 + assert creds.token == "refreshed-token" + assert not creds.expired + mock_session.execute.assert_not_called() + mock_session.commit.assert_not_called() + + +# --------------------------------------------------------------------------- +# Drive +# --------------------------------------------------------------------------- + + +@patch("app.connectors.google_drive.client.build") +async def test_drive_client_uses_prebuilt_composio_credentials(mock_build): + """GoogleDriveClient with pre-built Composio credentials uses them directly, + bypassing DB credential loading via get_valid_credentials.""" + from app.connectors.google_drive.client import GoogleDriveClient + + creds = _composio_credentials(expired=False) + + mock_service = MagicMock() + mock_service.files.return_value.list.return_value.execute.return_value = { + "files": [], + "nextPageToken": None, + } + mock_build.return_value = mock_service + + client = GoogleDriveClient( + session=MagicMock(), connector_id=999, credentials=creds, + ) + + files, next_token, error = await client.list_files() + + assert error is None + assert files == [] + mock_build.assert_called_once_with("drive", "v3", credentials=creds) + + +@patch("app.connectors.google_drive.client.get_valid_credentials") +@patch("app.connectors.google_drive.client.build") +async def test_drive_client_prebuilt_creds_skip_db_loading(mock_build, mock_get_valid): + """GoogleDriveClient does NOT call get_valid_credentials when pre-built + credentials are provided.""" + from app.connectors.google_drive.client import GoogleDriveClient + + creds = _composio_credentials(expired=False) + + mock_service = MagicMock() + mock_service.files.return_value.list.return_value.execute.return_value = { + "files": [], + "nextPageToken": None, + } + mock_build.return_value = mock_service + + client = GoogleDriveClient( + session=MagicMock(), connector_id=999, credentials=creds, + ) + + await client.list_files() + + mock_get_valid.assert_not_called() diff --git a/surfsense_backend/tests/unit/google_unification/test_schedule_checker_routing.py b/surfsense_backend/tests/unit/google_unification/test_schedule_checker_routing.py new file mode 100644 index 000000000..38ef4d553 --- /dev/null +++ b/surfsense_backend/tests/unit/google_unification/test_schedule_checker_routing.py @@ -0,0 +1,53 @@ +"""Unit tests: connector type acceptance sets include both native and Composio types. + +The indexer ``ACCEPTED_*_CONNECTOR_TYPES`` sets and the shared +``COMPOSIO_GOOGLE_CONNECTOR_TYPES`` set are the constants that control +whether a connector is accepted by an indexer and which credential path +is used. These tests verify those sets are correctly defined so that +both native and Composio connectors are handled by the unified pipeline. +""" + +import pytest + +from app.db import SearchSourceConnectorType + +pytestmark = pytest.mark.unit + + +def test_drive_indexer_accepts_both_native_and_composio(): + """ACCEPTED_DRIVE_CONNECTOR_TYPES should include both native and Composio Drive types.""" + from app.tasks.connector_indexers.google_drive_indexer import ( + ACCEPTED_DRIVE_CONNECTOR_TYPES, + ) + + assert SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR in ACCEPTED_DRIVE_CONNECTOR_TYPES + assert SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR in ACCEPTED_DRIVE_CONNECTOR_TYPES + + +def test_gmail_indexer_accepts_both_native_and_composio(): + """ACCEPTED_GMAIL_CONNECTOR_TYPES should include both native and Composio Gmail types.""" + from app.tasks.connector_indexers.google_gmail_indexer import ( + ACCEPTED_GMAIL_CONNECTOR_TYPES, + ) + + assert SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR in ACCEPTED_GMAIL_CONNECTOR_TYPES + assert SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR in ACCEPTED_GMAIL_CONNECTOR_TYPES + + +def test_calendar_indexer_accepts_both_native_and_composio(): + """ACCEPTED_CALENDAR_CONNECTOR_TYPES should include both native and Composio Calendar types.""" + from app.tasks.connector_indexers.google_calendar_indexer import ( + ACCEPTED_CALENDAR_CONNECTOR_TYPES, + ) + + assert SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR in ACCEPTED_CALENDAR_CONNECTOR_TYPES + assert SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR in ACCEPTED_CALENDAR_CONNECTOR_TYPES + + +def test_composio_connector_types_set_covers_all_google_services(): + """COMPOSIO_GOOGLE_CONNECTOR_TYPES should contain all three Composio Google types.""" + from app.utils.google_credentials import COMPOSIO_GOOGLE_CONNECTOR_TYPES + + assert SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR in COMPOSIO_GOOGLE_CONNECTOR_TYPES + assert SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR in COMPOSIO_GOOGLE_CONNECTOR_TYPES + assert SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR in COMPOSIO_GOOGLE_CONNECTOR_TYPES