feat: add integration and unit tests for Google unification connectors

- Introduced comprehensive integration tests for Google Drive, Gmail, and Calendar indexers, ensuring proper credential handling for both Composio and native connectors.
- Added unit tests to validate the acceptance of Composio-sourced credentials across various connector types.
- Implemented fixtures to seed test data and facilitate testing of hybrid search functionality, ensuring accurate document type filtering.
This commit is contained in:
Anish Sarkar 2026-03-19 17:51:15 +05:30
parent 83152e8e7e
commit 36f4709225
12 changed files with 1310 additions and 0 deletions

View file

@ -0,0 +1,328 @@
"""Shared fixtures for Google unification integration tests."""
from __future__ import annotations
import uuid
from contextlib import asynccontextmanager
from datetime import UTC, datetime
from unittest.mock import MagicMock
import pytest
import pytest_asyncio
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from app.config import config as app_config
from app.db import (
Chunk,
Document,
DocumentType,
SearchSourceConnector,
SearchSourceConnectorType,
SearchSpace,
User,
)
EMBEDDING_DIM = app_config.embedding_model_instance.dimension
DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM
def make_document(
*,
title: str,
document_type: DocumentType,
content: str,
search_space_id: int,
created_by_id: str,
) -> Document:
"""Build a Document instance with unique hashes and a dummy embedding."""
uid = uuid.uuid4().hex[:12]
return Document(
title=title,
document_type=document_type,
content=content,
content_hash=f"content-{uid}",
unique_identifier_hash=f"uid-{uid}",
source_markdown=content,
search_space_id=search_space_id,
created_by_id=created_by_id,
embedding=DUMMY_EMBEDDING,
updated_at=datetime.now(UTC),
status={"state": "ready"},
)
def make_chunk(*, content: str, document_id: int) -> Chunk:
return Chunk(
content=content,
document_id=document_id,
embedding=DUMMY_EMBEDDING,
)
# ---------------------------------------------------------------------------
# Savepoint-based fixture (used by retriever tests that receive db_session)
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def seed_google_docs(
db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace"
):
"""Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc.
Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``,
plus ``search_space`` and ``user``.
"""
user_id = str(db_user.id)
space_id = db_search_space.id
native_doc = make_document(
title="Native Drive Document",
document_type=DocumentType.GOOGLE_DRIVE_FILE,
content="quarterly report from native google drive connector",
search_space_id=space_id,
created_by_id=user_id,
)
legacy_doc = make_document(
title="Legacy Composio Drive Document",
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
content="quarterly report from composio google drive connector",
search_space_id=space_id,
created_by_id=user_id,
)
file_doc = make_document(
title="Uploaded PDF",
document_type=DocumentType.FILE,
content="unrelated uploaded file about quarterly reports",
search_space_id=space_id,
created_by_id=user_id,
)
db_session.add_all([native_doc, legacy_doc, file_doc])
await db_session.flush()
native_chunk = make_chunk(
content="quarterly report from native google drive connector",
document_id=native_doc.id,
)
legacy_chunk = make_chunk(
content="quarterly report from composio google drive connector",
document_id=legacy_doc.id,
)
file_chunk = make_chunk(
content="unrelated uploaded file about quarterly reports",
document_id=file_doc.id,
)
db_session.add_all([native_chunk, legacy_chunk, file_chunk])
await db_session.flush()
return {
"native_doc": native_doc,
"legacy_doc": legacy_doc,
"file_doc": file_doc,
"search_space": db_search_space,
"user": db_user,
}
# ---------------------------------------------------------------------------
# Committed-data fixture (used by service / browse tests that create their
# own sessions internally and therefore cannot see savepoint-scoped data)
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def committed_google_data(async_engine):
"""Insert native, legacy, and FILE docs via a committed transaction.
Yields ``{"search_space_id": int, "user_id": str}``.
Cleans up by deleting the search space (cascades to documents / chunks).
"""
space_id = None
async with async_engine.begin() as conn:
session = AsyncSession(bind=conn, expire_on_commit=False)
user = User(
id=uuid.uuid4(),
email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net",
hashed_password="hashed",
is_active=True,
is_superuser=False,
is_verified=True,
)
session.add(user)
await session.flush()
space = SearchSpace(
name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id
)
session.add(space)
await session.flush()
space_id = space.id
user_id = str(user.id)
native_doc = make_document(
title="Native Drive Doc",
document_type=DocumentType.GOOGLE_DRIVE_FILE,
content="quarterly budget from native google drive",
search_space_id=space_id,
created_by_id=user_id,
)
legacy_doc = make_document(
title="Legacy Composio Drive Doc",
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
content="quarterly budget from composio google drive",
search_space_id=space_id,
created_by_id=user_id,
)
file_doc = make_document(
title="Plain File",
document_type=DocumentType.FILE,
content="quarterly budget uploaded as file",
search_space_id=space_id,
created_by_id=user_id,
)
session.add_all([native_doc, legacy_doc, file_doc])
await session.flush()
for doc in [native_doc, legacy_doc, file_doc]:
session.add(
Chunk(
content=doc.content,
document_id=doc.id,
embedding=DUMMY_EMBEDDING,
)
)
await session.flush()
yield {"search_space_id": space_id, "user_id": user_id}
async with async_engine.begin() as conn:
await conn.execute(
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
)
# ---------------------------------------------------------------------------
# Monkeypatch fixtures for system boundaries
# ---------------------------------------------------------------------------
@pytest.fixture
def patched_session_factory(async_engine, monkeypatch):
"""Replace ``async_session_maker`` in connector_service with one bound to the test engine."""
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
monkeypatch.setattr("app.services.connector_service.async_session_maker", test_maker)
return test_maker
@pytest.fixture
def patched_embed(monkeypatch):
"""Mock the embedding model (system boundary) to return a fixed vector."""
mock = MagicMock(return_value=DUMMY_EMBEDDING)
monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock)
return mock
@pytest.fixture
def patched_shielded_session(async_engine, monkeypatch):
"""Replace ``shielded_async_session`` in the knowledge_base module
with one that yields sessions from the test engine."""
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
@asynccontextmanager
async def _test_shielded():
async with test_maker() as session:
yield session
monkeypatch.setattr(
"app.agents.new_chat.tools.knowledge_base.shielded_async_session",
_test_shielded,
)
# ---------------------------------------------------------------------------
# Indexer test helpers
# ---------------------------------------------------------------------------
def make_session_factory(async_engine):
"""Create a session factory bound to the test engine."""
return async_sessionmaker(async_engine, expire_on_commit=False)
def mock_task_logger():
"""Return a fully-mocked TaskLoggingService with async methods."""
from unittest.mock import AsyncMock, MagicMock
mock = AsyncMock()
mock.log_task_start = AsyncMock(return_value=MagicMock())
mock.log_task_progress = AsyncMock()
mock.log_task_failure = AsyncMock()
mock.log_task_success = AsyncMock()
return mock
async def seed_connector(
async_engine,
*,
connector_type: "SearchSourceConnectorType",
config: dict,
name_prefix: str = "test",
):
"""Seed a connector with committed data. Returns dict and cleanup function.
Yields ``{"connector_id", "search_space_id", "user_id"}``.
"""
space_id = None
async with async_engine.begin() as conn:
session = AsyncSession(bind=conn, expire_on_commit=False)
user = User(
id=uuid.uuid4(),
email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net",
hashed_password="hashed",
is_active=True,
is_superuser=False,
is_verified=True,
)
session.add(user)
await session.flush()
space = SearchSpace(
name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id
)
session.add(space)
await session.flush()
space_id = space.id
connector = SearchSourceConnector(
name=f"{name_prefix} connector",
connector_type=connector_type,
is_indexable=True,
config=config,
search_space_id=space_id,
user_id=user.id,
)
session.add(connector)
await session.flush()
connector_id = connector.id
user_id = str(user.id)
return {
"connector_id": connector_id,
"search_space_id": space_id,
"user_id": user_id,
}
async def cleanup_space(async_engine, space_id: int):
"""Delete a search space (cascades to connectors/documents)."""
async with async_engine.begin() as conn:
await conn.execute(
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
)

View file

@ -0,0 +1,44 @@
"""Integration test: _browse_recent_documents returns docs of multiple types.
Exercises the browse path (degenerate-query fallback) with a real PostgreSQL
database. Verifies that passing a list of document types correctly returns
documents of all listed types -- the same ``.in_()`` SQL path used by hybrid
search but through the browse/recency-ordered code path.
"""
from __future__ import annotations
import pytest
pytestmark = pytest.mark.integration
async def test_browse_recent_documents_with_list_type_returns_both(
committed_google_data, patched_shielded_session
):
"""_browse_recent_documents returns docs of all types when given a list."""
from app.agents.new_chat.tools.knowledge_base import _browse_recent_documents
space_id = committed_google_data["search_space_id"]
results = await _browse_recent_documents(
search_space_id=space_id,
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
top_k=10,
start_date=None,
end_date=None,
)
returned_types = set()
for doc in results:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
assert "GOOGLE_DRIVE_FILE" in returned_types, (
"Native Drive docs should appear in browse results"
)
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
"Legacy Composio Drive docs should appear in browse results"
)

View file

@ -0,0 +1,143 @@
"""Integration tests: Calendar indexer credential resolution for Composio vs native connectors.
Exercises ``index_google_calendar_events`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from app.db import SearchSourceConnectorType
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
@pytest_asyncio.fixture
async def composio_calendar(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
name_prefix="cal-composio",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def composio_calendar_no_id(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
config={},
name_prefix="cal-noid",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def native_calendar(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
config={
"token": "fake", "refresh_token": "fake",
"client_id": "fake", "client_secret": "fake",
"token_uri": "https://oauth2.googleapis.com/token",
},
name_prefix="cal-native",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_calendar_uses_composio_credentials(
mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, composio_calendar,
):
"""Calendar indexer calls build_composio_credentials for a Composio connector."""
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
data = composio_calendar
mock_creds = MagicMock(name="composio-creds")
mock_build_creds.return_value = mock_creds
mock_tl_cls.return_value = mock_task_logger()
mock_cal_instance = MagicMock()
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None))
mock_cal_cls.return_value = mock_cal_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_calendar_events(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_cal_cls.assert_called_once()
_, kwargs = mock_cal_cls.call_args
assert kwargs.get("credentials") is mock_creds
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_calendar_without_account_id_returns_error(
mock_build_creds, mock_tl_cls, async_engine, composio_calendar_no_id,
):
"""Calendar indexer returns error when Composio connector lacks connected_account_id."""
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
data = composio_calendar_no_id
mock_tl_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_calendar_events(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
assert count == 0
assert error is not None
assert "composio" in error.lower()
mock_build_creds.assert_not_called()
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_calendar_does_not_use_composio_credentials(
mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, native_calendar,
):
"""Calendar indexer does NOT call build_composio_credentials for a native connector."""
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
data = native_calendar
mock_tl_cls.return_value = mock_task_logger()
mock_cal_instance = MagicMock()
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None))
mock_cal_cls.return_value = mock_cal_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_calendar_events(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_not_called()

View file

@ -0,0 +1,167 @@
"""Integration tests: Drive indexer credential resolution for Composio vs native connectors.
Exercises ``index_google_drive_files`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from app.db import SearchSourceConnectorType
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-test-account-123"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_drive_indexer"
@pytest_asyncio.fixture
async def committed_drive_connector(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
name_prefix="drive-composio",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def committed_native_drive_connector(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
config={
"token": "fake-token",
"refresh_token": "fake-refresh",
"client_id": "fake-client-id",
"client_secret": "fake-secret",
"token_uri": "https://oauth2.googleapis.com/token",
},
name_prefix="drive-native",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def committed_composio_no_account_id(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
config={},
name_prefix="drive-noid",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_connector_uses_composio_credentials(
mock_build_creds,
mock_client_cls,
mock_task_logger_cls,
async_engine,
committed_drive_connector,
):
"""Drive indexer calls build_composio_credentials for a Composio connector
and passes the result to GoogleDriveClient."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_drive_connector
mock_creds = MagicMock(name="composio-credentials")
mock_build_creds.return_value = mock_creds
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],
user_id=data["user_id"],
folder_id="test-folder-id",
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_client_cls.assert_called_once()
_, kwargs = mock_client_cls.call_args
assert kwargs.get("credentials") is mock_creds
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_connector_without_account_id_returns_error(
mock_build_creds,
mock_task_logger_cls,
async_engine,
committed_composio_no_account_id,
):
"""Drive indexer returns an error when Composio connector lacks connected_account_id."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_composio_no_account_id
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],
user_id=data["user_id"],
folder_id="test-folder-id",
)
assert count == 0
assert error is not None
assert "composio_connected_account_id" in error.lower() or "composio" in error.lower()
mock_build_creds.assert_not_called()
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_connector_does_not_use_composio_credentials(
mock_build_creds,
mock_client_cls,
mock_task_logger_cls,
async_engine,
committed_native_drive_connector,
):
"""Drive indexer does NOT call build_composio_credentials for a native connector."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_native_drive_connector
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],
user_id=data["user_id"],
folder_id="test-folder-id",
)
mock_build_creds.assert_not_called()
mock_client_cls.assert_called_once()
_, kwargs = mock_client_cls.call_args
assert kwargs.get("credentials") is None

View file

@ -0,0 +1,143 @@
"""Integration tests: Gmail indexer credential resolution for Composio vs native connectors.
Exercises ``index_google_gmail_messages`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from app.db import SearchSourceConnectorType
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
@pytest_asyncio.fixture
async def composio_gmail(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
name_prefix="gmail-composio",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def composio_gmail_no_id(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
config={},
name_prefix="gmail-noid",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def native_gmail(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
config={
"token": "fake", "refresh_token": "fake",
"client_id": "fake", "client_secret": "fake",
"token_uri": "https://oauth2.googleapis.com/token",
},
name_prefix="gmail-native",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_gmail_uses_composio_credentials(
mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, composio_gmail,
):
"""Gmail indexer calls build_composio_credentials for a Composio connector."""
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
data = composio_gmail
mock_creds = MagicMock(name="composio-creds")
mock_build_creds.return_value = mock_creds
mock_tl_cls.return_value = mock_task_logger()
mock_gmail_instance = MagicMock()
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
mock_gmail_cls.return_value = mock_gmail_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_gmail_messages(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_gmail_cls.assert_called_once()
args, _ = mock_gmail_cls.call_args
assert args[0] is mock_creds
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_gmail_without_account_id_returns_error(
mock_build_creds, mock_tl_cls, async_engine, composio_gmail_no_id,
):
"""Gmail indexer returns error when Composio connector lacks connected_account_id."""
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
data = composio_gmail_no_id
mock_tl_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_gmail_messages(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
assert count == 0
assert error is not None
assert "composio" in error.lower()
mock_build_creds.assert_not_called()
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_gmail_does_not_use_composio_credentials(
mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, native_gmail,
):
"""Gmail indexer does NOT call build_composio_credentials for a native connector."""
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
data = native_gmail
mock_tl_cls.return_value = mock_task_logger()
mock_gmail_instance = MagicMock()
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
mock_gmail_cls.return_value = mock_gmail_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_gmail_messages(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_not_called()

View file

@ -0,0 +1,76 @@
"""Integration tests: hybrid search correctly filters by document type lists.
These tests exercise the public ``hybrid_search`` method on
``ChucksHybridSearchRetriever`` with a real PostgreSQL database.
They verify that the ``.in_()`` SQL path works for list-of-types filtering,
which is the foundation of the Google unification changes.
"""
import pytest
from app.config import config as app_config
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
from .conftest import DUMMY_EMBEDDING
pytestmark = pytest.mark.integration
async def test_list_of_types_returns_both_matching_doc_types(
db_session, seed_google_docs
):
"""Searching with a list of document types returns documents of ALL listed types."""
space_id = seed_google_docs["search_space"].id
retriever = ChucksHybridSearchRetriever(db_session)
results = await retriever.hybrid_search(
query_text="quarterly report",
top_k=10,
search_space_id=space_id,
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
query_embedding=DUMMY_EMBEDDING,
)
returned_types = {
r["document"]["document_type"] for r in results if r.get("document")
}
assert "GOOGLE_DRIVE_FILE" in returned_types
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types
assert "FILE" not in returned_types
async def test_single_string_type_returns_only_that_type(
db_session, seed_google_docs
):
"""Searching with a single string type returns only documents of that exact type."""
space_id = seed_google_docs["search_space"].id
retriever = ChucksHybridSearchRetriever(db_session)
results = await retriever.hybrid_search(
query_text="quarterly report",
top_k=10,
search_space_id=space_id,
document_type="GOOGLE_DRIVE_FILE",
query_embedding=DUMMY_EMBEDDING,
)
returned_types = {
r["document"]["document_type"] for r in results if r.get("document")
}
assert returned_types == {"GOOGLE_DRIVE_FILE"}
async def test_all_invalid_types_returns_empty(db_session, seed_google_docs):
"""Searching with a list of nonexistent types returns an empty list, no exceptions."""
space_id = seed_google_docs["search_space"].id
retriever = ChucksHybridSearchRetriever(db_session)
results = await retriever.hybrid_search(
query_text="quarterly report",
top_k=10,
search_space_id=space_id,
document_type=["NONEXISTENT_TYPE"],
query_embedding=DUMMY_EMBEDDING,
)
assert results == []

View file

@ -0,0 +1,73 @@
"""Integration tests: ConnectorService search transparently includes legacy Composio docs.
These tests exercise ``ConnectorService.search_google_drive`` and
``ConnectorService.search_files`` through a real PostgreSQL database.
They verify that the legacy-type alias expansion works end-to-end:
searching for native Google Drive docs also returns old Composio-typed docs.
"""
from __future__ import annotations
import pytest
from app.services.connector_service import ConnectorService
pytestmark = pytest.mark.integration
async def test_search_google_drive_includes_legacy_composio_docs(
async_engine, committed_google_data, patched_session_factory, patched_embed
):
"""search_google_drive returns both GOOGLE_DRIVE_FILE and COMPOSIO_GOOGLE_DRIVE_CONNECTOR docs."""
space_id = committed_google_data["search_space_id"]
async with patched_session_factory() as session:
service = ConnectorService(session, search_space_id=space_id)
result_object, raw_docs = await service.search_google_drive(
user_query="quarterly budget",
search_space_id=space_id,
top_k=10,
)
returned_types = set()
for doc in raw_docs:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
assert "GOOGLE_DRIVE_FILE" in returned_types, (
"Native Drive docs should appear in search_google_drive results"
)
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
"Legacy Composio Drive docs should appear in search_google_drive results"
)
assert "FILE" not in returned_types, (
"Plain FILE docs should NOT appear in search_google_drive results"
)
async def test_search_files_does_not_include_google_types(
async_engine, committed_google_data, patched_session_factory, patched_embed
):
"""search_files returns only FILE docs, not Google Drive docs."""
space_id = committed_google_data["search_space_id"]
async with patched_session_factory() as session:
service = ConnectorService(session, search_space_id=space_id)
result_object, raw_docs = await service.search_files(
user_query="quarterly budget",
search_space_id=space_id,
top_k=10,
)
returned_types = set()
for doc in raw_docs:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
if returned_types:
assert "GOOGLE_DRIVE_FILE" not in returned_types
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" not in returned_types

View file

@ -0,0 +1,57 @@
"""Unit tests: build_composio_credentials returns valid Google Credentials.
Mocks the Composio SDK (external system boundary) and verifies that the
returned ``google.oauth2.credentials.Credentials`` object is correctly
configured with a token and a working refresh handler.
"""
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
import pytest
from google.oauth2.credentials import Credentials
pytestmark = pytest.mark.unit
@patch("app.services.composio_service.ComposioService")
def test_returns_credentials_with_token_and_expiry(MockComposioService):
"""build_composio_credentials returns a Credentials object with the Composio access token."""
mock_service = MagicMock()
mock_service.get_access_token.return_value = "fake-access-token"
MockComposioService.return_value = mock_service
from app.utils.google_credentials import build_composio_credentials
creds = build_composio_credentials("test-account-id")
assert isinstance(creds, Credentials)
assert creds.token == "fake-access-token"
assert creds.expiry is not None
assert creds.expiry > datetime.now(timezone.utc).replace(tzinfo=None)
@patch("app.services.composio_service.ComposioService")
def test_refresh_handler_fetches_fresh_token(MockComposioService):
"""The refresh_handler on the returned Credentials fetches a new token from Composio."""
mock_service = MagicMock()
mock_service.get_access_token.side_effect = [
"initial-token",
"refreshed-token",
]
MockComposioService.return_value = mock_service
from app.utils.google_credentials import build_composio_credentials
creds = build_composio_credentials("test-account-id")
assert creds.token == "initial-token"
refresh_handler = creds._refresh_handler
assert callable(refresh_handler)
new_token, new_expiry = refresh_handler(request=None, scopes=None)
assert new_token == "refreshed-token"
assert new_expiry > datetime.now(timezone.utc).replace(tzinfo=None)
assert mock_service.get_access_token.call_count == 2

View file

@ -0,0 +1,226 @@
"""Unit tests: Gmail, Calendar, and Drive connectors accept Composio-sourced credentials.
These tests exercise the REAL connector code with Composio-style credentials
(token + expiry + refresh_handler, but NO refresh_token / client_id / client_secret).
Only the Google API boundary (``googleapiclient.discovery.build``) is mocked.
This verifies Phase 2b: the relaxed validation in ``_get_credentials()`` correctly
allows Composio credentials through without raising ValueError or persisting to DB.
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from google.oauth2.credentials import Credentials
pytestmark = pytest.mark.unit
def _utcnow_naive() -> datetime:
"""Return current UTC time as a naive datetime (matches google-auth convention)."""
return datetime.now(timezone.utc).replace(tzinfo=None)
def _composio_credentials(*, expired: bool = False) -> Credentials:
"""Create a Credentials object that mimics build_composio_credentials output."""
if expired:
expiry = _utcnow_naive() - timedelta(hours=1)
else:
expiry = _utcnow_naive() + timedelta(hours=1)
def refresh_handler(request, scopes):
return "refreshed-token", _utcnow_naive() + timedelta(hours=1)
return Credentials(
token="composio-access-token",
expiry=expiry,
refresh_handler=refresh_handler,
)
# ---------------------------------------------------------------------------
# Gmail
# ---------------------------------------------------------------------------
@patch("app.connectors.google_gmail_connector.build")
async def test_gmail_accepts_valid_composio_credentials(mock_build):
"""GoogleGmailConnector.get_user_profile succeeds with Composio credentials
that have no client_id, client_secret, or refresh_token."""
from app.connectors.google_gmail_connector import GoogleGmailConnector
creds = _composio_credentials(expired=False)
mock_service = MagicMock()
mock_service.users.return_value.getProfile.return_value.execute.return_value = {
"emailAddress": "test@example.com",
"messagesTotal": 42,
"threadsTotal": 10,
"historyId": "12345",
}
mock_build.return_value = mock_service
connector = GoogleGmailConnector(
creds, session=MagicMock(), user_id="test-user",
)
profile, error = await connector.get_user_profile()
assert error is None
assert profile["email_address"] == "test@example.com"
mock_build.assert_called_once_with("gmail", "v1", credentials=creds)
@patch("app.connectors.google_gmail_connector.Request")
@patch("app.connectors.google_gmail_connector.build")
async def test_gmail_refreshes_expired_composio_credentials(mock_build, mock_request_cls):
"""GoogleGmailConnector handles expired Composio credentials via refresh_handler
without attempting DB persistence."""
from app.connectors.google_gmail_connector import GoogleGmailConnector
creds = _composio_credentials(expired=True)
assert creds.expired
mock_service = MagicMock()
mock_service.users.return_value.getProfile.return_value.execute.return_value = {
"emailAddress": "test@example.com",
"messagesTotal": 42,
"threadsTotal": 10,
"historyId": "12345",
}
mock_build.return_value = mock_service
mock_session = AsyncMock()
connector = GoogleGmailConnector(
creds, session=mock_session, user_id="test-user",
)
profile, error = await connector.get_user_profile()
assert error is None
assert profile["email_address"] == "test@example.com"
assert creds.token == "refreshed-token"
assert not creds.expired
mock_session.execute.assert_not_called()
mock_session.commit.assert_not_called()
# ---------------------------------------------------------------------------
# Calendar
# ---------------------------------------------------------------------------
@patch("app.connectors.google_calendar_connector.build")
async def test_calendar_accepts_valid_composio_credentials(mock_build):
"""GoogleCalendarConnector.get_calendars succeeds with Composio credentials
that have no client_id, client_secret, or refresh_token."""
from app.connectors.google_calendar_connector import GoogleCalendarConnector
creds = _composio_credentials(expired=False)
mock_service = MagicMock()
mock_service.calendarList.return_value.list.return_value.execute.return_value = {
"items": [{"id": "primary", "summary": "My Calendar", "primary": True}],
}
mock_build.return_value = mock_service
connector = GoogleCalendarConnector(
creds, session=MagicMock(), user_id="test-user",
)
calendars, error = await connector.get_calendars()
assert error is None
assert len(calendars) == 1
assert calendars[0]["summary"] == "My Calendar"
mock_build.assert_called_once_with("calendar", "v3", credentials=creds)
@patch("app.connectors.google_calendar_connector.Request")
@patch("app.connectors.google_calendar_connector.build")
async def test_calendar_refreshes_expired_composio_credentials(mock_build, mock_request_cls):
"""GoogleCalendarConnector handles expired Composio credentials via refresh_handler
without attempting DB persistence."""
from app.connectors.google_calendar_connector import GoogleCalendarConnector
creds = _composio_credentials(expired=True)
assert creds.expired
mock_service = MagicMock()
mock_service.calendarList.return_value.list.return_value.execute.return_value = {
"items": [{"id": "primary", "summary": "My Calendar", "primary": True}],
}
mock_build.return_value = mock_service
mock_session = AsyncMock()
connector = GoogleCalendarConnector(
creds, session=mock_session, user_id="test-user",
)
calendars, error = await connector.get_calendars()
assert error is None
assert len(calendars) == 1
assert creds.token == "refreshed-token"
assert not creds.expired
mock_session.execute.assert_not_called()
mock_session.commit.assert_not_called()
# ---------------------------------------------------------------------------
# Drive
# ---------------------------------------------------------------------------
@patch("app.connectors.google_drive.client.build")
async def test_drive_client_uses_prebuilt_composio_credentials(mock_build):
"""GoogleDriveClient with pre-built Composio credentials uses them directly,
bypassing DB credential loading via get_valid_credentials."""
from app.connectors.google_drive.client import GoogleDriveClient
creds = _composio_credentials(expired=False)
mock_service = MagicMock()
mock_service.files.return_value.list.return_value.execute.return_value = {
"files": [],
"nextPageToken": None,
}
mock_build.return_value = mock_service
client = GoogleDriveClient(
session=MagicMock(), connector_id=999, credentials=creds,
)
files, next_token, error = await client.list_files()
assert error is None
assert files == []
mock_build.assert_called_once_with("drive", "v3", credentials=creds)
@patch("app.connectors.google_drive.client.get_valid_credentials")
@patch("app.connectors.google_drive.client.build")
async def test_drive_client_prebuilt_creds_skip_db_loading(mock_build, mock_get_valid):
"""GoogleDriveClient does NOT call get_valid_credentials when pre-built
credentials are provided."""
from app.connectors.google_drive.client import GoogleDriveClient
creds = _composio_credentials(expired=False)
mock_service = MagicMock()
mock_service.files.return_value.list.return_value.execute.return_value = {
"files": [],
"nextPageToken": None,
}
mock_build.return_value = mock_service
client = GoogleDriveClient(
session=MagicMock(), connector_id=999, credentials=creds,
)
await client.list_files()
mock_get_valid.assert_not_called()

View file

@ -0,0 +1,53 @@
"""Unit tests: connector type acceptance sets include both native and Composio types.
The indexer ``ACCEPTED_*_CONNECTOR_TYPES`` sets and the shared
``COMPOSIO_GOOGLE_CONNECTOR_TYPES`` set are the constants that control
whether a connector is accepted by an indexer and which credential path
is used. These tests verify those sets are correctly defined so that
both native and Composio connectors are handled by the unified pipeline.
"""
import pytest
from app.db import SearchSourceConnectorType
pytestmark = pytest.mark.unit
def test_drive_indexer_accepts_both_native_and_composio():
"""ACCEPTED_DRIVE_CONNECTOR_TYPES should include both native and Composio Drive types."""
from app.tasks.connector_indexers.google_drive_indexer import (
ACCEPTED_DRIVE_CONNECTOR_TYPES,
)
assert SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR in ACCEPTED_DRIVE_CONNECTOR_TYPES
assert SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR in ACCEPTED_DRIVE_CONNECTOR_TYPES
def test_gmail_indexer_accepts_both_native_and_composio():
"""ACCEPTED_GMAIL_CONNECTOR_TYPES should include both native and Composio Gmail types."""
from app.tasks.connector_indexers.google_gmail_indexer import (
ACCEPTED_GMAIL_CONNECTOR_TYPES,
)
assert SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR in ACCEPTED_GMAIL_CONNECTOR_TYPES
assert SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR in ACCEPTED_GMAIL_CONNECTOR_TYPES
def test_calendar_indexer_accepts_both_native_and_composio():
"""ACCEPTED_CALENDAR_CONNECTOR_TYPES should include both native and Composio Calendar types."""
from app.tasks.connector_indexers.google_calendar_indexer import (
ACCEPTED_CALENDAR_CONNECTOR_TYPES,
)
assert SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR in ACCEPTED_CALENDAR_CONNECTOR_TYPES
assert SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR in ACCEPTED_CALENDAR_CONNECTOR_TYPES
def test_composio_connector_types_set_covers_all_google_services():
"""COMPOSIO_GOOGLE_CONNECTOR_TYPES should contain all three Composio Google types."""
from app.utils.google_credentials import COMPOSIO_GOOGLE_CONNECTOR_TYPES
assert SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR in COMPOSIO_GOOGLE_CONNECTOR_TYPES
assert SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR in COMPOSIO_GOOGLE_CONNECTOR_TYPES
assert SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR in COMPOSIO_GOOGLE_CONNECTOR_TYPES