feat: add integration and unit tests for Google unification connectors

- Introduced comprehensive integration tests for Google Drive, Gmail, and Calendar indexers, ensuring proper credential handling for both Composio and native connectors.
- Added unit tests to validate the acceptance of Composio-sourced credentials across various connector types.
- Implemented fixtures to seed test data and facilitate testing of hybrid search functionality, ensuring accurate document type filtering.
This commit is contained in:
Anish Sarkar 2026-03-19 17:51:15 +05:30
parent 83152e8e7e
commit 36f4709225
12 changed files with 1310 additions and 0 deletions

View file

@ -0,0 +1,328 @@
"""Shared fixtures for Google unification integration tests."""
from __future__ import annotations
import uuid
from contextlib import asynccontextmanager
from datetime import UTC, datetime
from unittest.mock import MagicMock
import pytest
import pytest_asyncio
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
from app.config import config as app_config
from app.db import (
Chunk,
Document,
DocumentType,
SearchSourceConnector,
SearchSourceConnectorType,
SearchSpace,
User,
)
EMBEDDING_DIM = app_config.embedding_model_instance.dimension
DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM
def make_document(
*,
title: str,
document_type: DocumentType,
content: str,
search_space_id: int,
created_by_id: str,
) -> Document:
"""Build a Document instance with unique hashes and a dummy embedding."""
uid = uuid.uuid4().hex[:12]
return Document(
title=title,
document_type=document_type,
content=content,
content_hash=f"content-{uid}",
unique_identifier_hash=f"uid-{uid}",
source_markdown=content,
search_space_id=search_space_id,
created_by_id=created_by_id,
embedding=DUMMY_EMBEDDING,
updated_at=datetime.now(UTC),
status={"state": "ready"},
)
def make_chunk(*, content: str, document_id: int) -> Chunk:
return Chunk(
content=content,
document_id=document_id,
embedding=DUMMY_EMBEDDING,
)
# ---------------------------------------------------------------------------
# Savepoint-based fixture (used by retriever tests that receive db_session)
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def seed_google_docs(
db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace"
):
"""Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc.
Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``,
plus ``search_space`` and ``user``.
"""
user_id = str(db_user.id)
space_id = db_search_space.id
native_doc = make_document(
title="Native Drive Document",
document_type=DocumentType.GOOGLE_DRIVE_FILE,
content="quarterly report from native google drive connector",
search_space_id=space_id,
created_by_id=user_id,
)
legacy_doc = make_document(
title="Legacy Composio Drive Document",
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
content="quarterly report from composio google drive connector",
search_space_id=space_id,
created_by_id=user_id,
)
file_doc = make_document(
title="Uploaded PDF",
document_type=DocumentType.FILE,
content="unrelated uploaded file about quarterly reports",
search_space_id=space_id,
created_by_id=user_id,
)
db_session.add_all([native_doc, legacy_doc, file_doc])
await db_session.flush()
native_chunk = make_chunk(
content="quarterly report from native google drive connector",
document_id=native_doc.id,
)
legacy_chunk = make_chunk(
content="quarterly report from composio google drive connector",
document_id=legacy_doc.id,
)
file_chunk = make_chunk(
content="unrelated uploaded file about quarterly reports",
document_id=file_doc.id,
)
db_session.add_all([native_chunk, legacy_chunk, file_chunk])
await db_session.flush()
return {
"native_doc": native_doc,
"legacy_doc": legacy_doc,
"file_doc": file_doc,
"search_space": db_search_space,
"user": db_user,
}
# ---------------------------------------------------------------------------
# Committed-data fixture (used by service / browse tests that create their
# own sessions internally and therefore cannot see savepoint-scoped data)
# ---------------------------------------------------------------------------
@pytest_asyncio.fixture
async def committed_google_data(async_engine):
"""Insert native, legacy, and FILE docs via a committed transaction.
Yields ``{"search_space_id": int, "user_id": str}``.
Cleans up by deleting the search space (cascades to documents / chunks).
"""
space_id = None
async with async_engine.begin() as conn:
session = AsyncSession(bind=conn, expire_on_commit=False)
user = User(
id=uuid.uuid4(),
email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net",
hashed_password="hashed",
is_active=True,
is_superuser=False,
is_verified=True,
)
session.add(user)
await session.flush()
space = SearchSpace(
name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id
)
session.add(space)
await session.flush()
space_id = space.id
user_id = str(user.id)
native_doc = make_document(
title="Native Drive Doc",
document_type=DocumentType.GOOGLE_DRIVE_FILE,
content="quarterly budget from native google drive",
search_space_id=space_id,
created_by_id=user_id,
)
legacy_doc = make_document(
title="Legacy Composio Drive Doc",
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
content="quarterly budget from composio google drive",
search_space_id=space_id,
created_by_id=user_id,
)
file_doc = make_document(
title="Plain File",
document_type=DocumentType.FILE,
content="quarterly budget uploaded as file",
search_space_id=space_id,
created_by_id=user_id,
)
session.add_all([native_doc, legacy_doc, file_doc])
await session.flush()
for doc in [native_doc, legacy_doc, file_doc]:
session.add(
Chunk(
content=doc.content,
document_id=doc.id,
embedding=DUMMY_EMBEDDING,
)
)
await session.flush()
yield {"search_space_id": space_id, "user_id": user_id}
async with async_engine.begin() as conn:
await conn.execute(
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
)
# ---------------------------------------------------------------------------
# Monkeypatch fixtures for system boundaries
# ---------------------------------------------------------------------------
@pytest.fixture
def patched_session_factory(async_engine, monkeypatch):
"""Replace ``async_session_maker`` in connector_service with one bound to the test engine."""
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
monkeypatch.setattr("app.services.connector_service.async_session_maker", test_maker)
return test_maker
@pytest.fixture
def patched_embed(monkeypatch):
"""Mock the embedding model (system boundary) to return a fixed vector."""
mock = MagicMock(return_value=DUMMY_EMBEDDING)
monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock)
return mock
@pytest.fixture
def patched_shielded_session(async_engine, monkeypatch):
"""Replace ``shielded_async_session`` in the knowledge_base module
with one that yields sessions from the test engine."""
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
@asynccontextmanager
async def _test_shielded():
async with test_maker() as session:
yield session
monkeypatch.setattr(
"app.agents.new_chat.tools.knowledge_base.shielded_async_session",
_test_shielded,
)
# ---------------------------------------------------------------------------
# Indexer test helpers
# ---------------------------------------------------------------------------
def make_session_factory(async_engine):
"""Create a session factory bound to the test engine."""
return async_sessionmaker(async_engine, expire_on_commit=False)
def mock_task_logger():
"""Return a fully-mocked TaskLoggingService with async methods."""
from unittest.mock import AsyncMock, MagicMock
mock = AsyncMock()
mock.log_task_start = AsyncMock(return_value=MagicMock())
mock.log_task_progress = AsyncMock()
mock.log_task_failure = AsyncMock()
mock.log_task_success = AsyncMock()
return mock
async def seed_connector(
async_engine,
*,
connector_type: "SearchSourceConnectorType",
config: dict,
name_prefix: str = "test",
):
"""Seed a connector with committed data. Returns dict and cleanup function.
Yields ``{"connector_id", "search_space_id", "user_id"}``.
"""
space_id = None
async with async_engine.begin() as conn:
session = AsyncSession(bind=conn, expire_on_commit=False)
user = User(
id=uuid.uuid4(),
email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net",
hashed_password="hashed",
is_active=True,
is_superuser=False,
is_verified=True,
)
session.add(user)
await session.flush()
space = SearchSpace(
name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id
)
session.add(space)
await session.flush()
space_id = space.id
connector = SearchSourceConnector(
name=f"{name_prefix} connector",
connector_type=connector_type,
is_indexable=True,
config=config,
search_space_id=space_id,
user_id=user.id,
)
session.add(connector)
await session.flush()
connector_id = connector.id
user_id = str(user.id)
return {
"connector_id": connector_id,
"search_space_id": space_id,
"user_id": user_id,
}
async def cleanup_space(async_engine, space_id: int):
"""Delete a search space (cascades to connectors/documents)."""
async with async_engine.begin() as conn:
await conn.execute(
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
)

View file

@ -0,0 +1,44 @@
"""Integration test: _browse_recent_documents returns docs of multiple types.
Exercises the browse path (degenerate-query fallback) with a real PostgreSQL
database. Verifies that passing a list of document types correctly returns
documents of all listed types -- the same ``.in_()`` SQL path used by hybrid
search but through the browse/recency-ordered code path.
"""
from __future__ import annotations
import pytest
pytestmark = pytest.mark.integration
async def test_browse_recent_documents_with_list_type_returns_both(
committed_google_data, patched_shielded_session
):
"""_browse_recent_documents returns docs of all types when given a list."""
from app.agents.new_chat.tools.knowledge_base import _browse_recent_documents
space_id = committed_google_data["search_space_id"]
results = await _browse_recent_documents(
search_space_id=space_id,
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
top_k=10,
start_date=None,
end_date=None,
)
returned_types = set()
for doc in results:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
assert "GOOGLE_DRIVE_FILE" in returned_types, (
"Native Drive docs should appear in browse results"
)
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
"Legacy Composio Drive docs should appear in browse results"
)

View file

@ -0,0 +1,143 @@
"""Integration tests: Calendar indexer credential resolution for Composio vs native connectors.
Exercises ``index_google_calendar_events`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from app.db import SearchSourceConnectorType
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
@pytest_asyncio.fixture
async def composio_calendar(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
name_prefix="cal-composio",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def composio_calendar_no_id(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
config={},
name_prefix="cal-noid",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def native_calendar(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
config={
"token": "fake", "refresh_token": "fake",
"client_id": "fake", "client_secret": "fake",
"token_uri": "https://oauth2.googleapis.com/token",
},
name_prefix="cal-native",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_calendar_uses_composio_credentials(
mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, composio_calendar,
):
"""Calendar indexer calls build_composio_credentials for a Composio connector."""
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
data = composio_calendar
mock_creds = MagicMock(name="composio-creds")
mock_build_creds.return_value = mock_creds
mock_tl_cls.return_value = mock_task_logger()
mock_cal_instance = MagicMock()
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None))
mock_cal_cls.return_value = mock_cal_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_calendar_events(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_cal_cls.assert_called_once()
_, kwargs = mock_cal_cls.call_args
assert kwargs.get("credentials") is mock_creds
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_calendar_without_account_id_returns_error(
mock_build_creds, mock_tl_cls, async_engine, composio_calendar_no_id,
):
"""Calendar indexer returns error when Composio connector lacks connected_account_id."""
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
data = composio_calendar_no_id
mock_tl_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_calendar_events(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
assert count == 0
assert error is not None
assert "composio" in error.lower()
mock_build_creds.assert_not_called()
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_calendar_does_not_use_composio_credentials(
mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, native_calendar,
):
"""Calendar indexer does NOT call build_composio_credentials for a native connector."""
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
data = native_calendar
mock_tl_cls.return_value = mock_task_logger()
mock_cal_instance = MagicMock()
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None))
mock_cal_cls.return_value = mock_cal_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_calendar_events(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_not_called()

View file

@ -0,0 +1,167 @@
"""Integration tests: Drive indexer credential resolution for Composio vs native connectors.
Exercises ``index_google_drive_files`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from app.db import SearchSourceConnectorType
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-test-account-123"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_drive_indexer"
@pytest_asyncio.fixture
async def committed_drive_connector(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
name_prefix="drive-composio",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def committed_native_drive_connector(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
config={
"token": "fake-token",
"refresh_token": "fake-refresh",
"client_id": "fake-client-id",
"client_secret": "fake-secret",
"token_uri": "https://oauth2.googleapis.com/token",
},
name_prefix="drive-native",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def committed_composio_no_account_id(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
config={},
name_prefix="drive-noid",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_connector_uses_composio_credentials(
mock_build_creds,
mock_client_cls,
mock_task_logger_cls,
async_engine,
committed_drive_connector,
):
"""Drive indexer calls build_composio_credentials for a Composio connector
and passes the result to GoogleDriveClient."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_drive_connector
mock_creds = MagicMock(name="composio-credentials")
mock_build_creds.return_value = mock_creds
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],
user_id=data["user_id"],
folder_id="test-folder-id",
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_client_cls.assert_called_once()
_, kwargs = mock_client_cls.call_args
assert kwargs.get("credentials") is mock_creds
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_connector_without_account_id_returns_error(
mock_build_creds,
mock_task_logger_cls,
async_engine,
committed_composio_no_account_id,
):
"""Drive indexer returns an error when Composio connector lacks connected_account_id."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_composio_no_account_id
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],
user_id=data["user_id"],
folder_id="test-folder-id",
)
assert count == 0
assert error is not None
assert "composio_connected_account_id" in error.lower() or "composio" in error.lower()
mock_build_creds.assert_not_called()
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_connector_does_not_use_composio_credentials(
mock_build_creds,
mock_client_cls,
mock_task_logger_cls,
async_engine,
committed_native_drive_connector,
):
"""Drive indexer does NOT call build_composio_credentials for a native connector."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_native_drive_connector
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_drive_files(
session=session,
connector_id=data["connector_id"],
search_space_id=data["search_space_id"],
user_id=data["user_id"],
folder_id="test-folder-id",
)
mock_build_creds.assert_not_called()
mock_client_cls.assert_called_once()
_, kwargs = mock_client_cls.call_args
assert kwargs.get("credentials") is None

View file

@ -0,0 +1,143 @@
"""Integration tests: Gmail indexer credential resolution for Composio vs native connectors.
Exercises ``index_google_gmail_messages`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
"""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
from app.db import SearchSourceConnectorType
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
@pytest_asyncio.fixture
async def composio_gmail(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
name_prefix="gmail-composio",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def composio_gmail_no_id(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
config={},
name_prefix="gmail-noid",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@pytest_asyncio.fixture
async def native_gmail(async_engine):
data = await seed_connector(
async_engine,
connector_type=SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
config={
"token": "fake", "refresh_token": "fake",
"client_id": "fake", "client_secret": "fake",
"token_uri": "https://oauth2.googleapis.com/token",
},
name_prefix="gmail-native",
)
yield data
await cleanup_space(async_engine, data["search_space_id"])
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_gmail_uses_composio_credentials(
mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, composio_gmail,
):
"""Gmail indexer calls build_composio_credentials for a Composio connector."""
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
data = composio_gmail
mock_creds = MagicMock(name="composio-creds")
mock_build_creds.return_value = mock_creds
mock_tl_cls.return_value = mock_task_logger()
mock_gmail_instance = MagicMock()
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
mock_gmail_cls.return_value = mock_gmail_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_gmail_messages(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_gmail_cls.assert_called_once()
args, _ = mock_gmail_cls.call_args
assert args[0] is mock_creds
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_gmail_without_account_id_returns_error(
mock_build_creds, mock_tl_cls, async_engine, composio_gmail_no_id,
):
"""Gmail indexer returns error when Composio connector lacks connected_account_id."""
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
data = composio_gmail_no_id
mock_tl_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
async with maker() as session:
count, error = await index_google_gmail_messages(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
assert count == 0
assert error is not None
assert "composio" in error.lower()
mock_build_creds.assert_not_called()
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_gmail_does_not_use_composio_credentials(
mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, native_gmail,
):
"""Gmail indexer does NOT call build_composio_credentials for a native connector."""
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
data = native_gmail
mock_tl_cls.return_value = mock_task_logger()
mock_gmail_instance = MagicMock()
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
mock_gmail_cls.return_value = mock_gmail_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_gmail_messages(
session=session, connector_id=data["connector_id"],
search_space_id=data["search_space_id"], user_id=data["user_id"],
)
mock_build_creds.assert_not_called()

View file

@ -0,0 +1,76 @@
"""Integration tests: hybrid search correctly filters by document type lists.
These tests exercise the public ``hybrid_search`` method on
``ChucksHybridSearchRetriever`` with a real PostgreSQL database.
They verify that the ``.in_()`` SQL path works for list-of-types filtering,
which is the foundation of the Google unification changes.
"""
import pytest
from app.config import config as app_config
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
from .conftest import DUMMY_EMBEDDING
pytestmark = pytest.mark.integration
async def test_list_of_types_returns_both_matching_doc_types(
db_session, seed_google_docs
):
"""Searching with a list of document types returns documents of ALL listed types."""
space_id = seed_google_docs["search_space"].id
retriever = ChucksHybridSearchRetriever(db_session)
results = await retriever.hybrid_search(
query_text="quarterly report",
top_k=10,
search_space_id=space_id,
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
query_embedding=DUMMY_EMBEDDING,
)
returned_types = {
r["document"]["document_type"] for r in results if r.get("document")
}
assert "GOOGLE_DRIVE_FILE" in returned_types
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types
assert "FILE" not in returned_types
async def test_single_string_type_returns_only_that_type(
db_session, seed_google_docs
):
"""Searching with a single string type returns only documents of that exact type."""
space_id = seed_google_docs["search_space"].id
retriever = ChucksHybridSearchRetriever(db_session)
results = await retriever.hybrid_search(
query_text="quarterly report",
top_k=10,
search_space_id=space_id,
document_type="GOOGLE_DRIVE_FILE",
query_embedding=DUMMY_EMBEDDING,
)
returned_types = {
r["document"]["document_type"] for r in results if r.get("document")
}
assert returned_types == {"GOOGLE_DRIVE_FILE"}
async def test_all_invalid_types_returns_empty(db_session, seed_google_docs):
"""Searching with a list of nonexistent types returns an empty list, no exceptions."""
space_id = seed_google_docs["search_space"].id
retriever = ChucksHybridSearchRetriever(db_session)
results = await retriever.hybrid_search(
query_text="quarterly report",
top_k=10,
search_space_id=space_id,
document_type=["NONEXISTENT_TYPE"],
query_embedding=DUMMY_EMBEDDING,
)
assert results == []

View file

@ -0,0 +1,73 @@
"""Integration tests: ConnectorService search transparently includes legacy Composio docs.
These tests exercise ``ConnectorService.search_google_drive`` and
``ConnectorService.search_files`` through a real PostgreSQL database.
They verify that the legacy-type alias expansion works end-to-end:
searching for native Google Drive docs also returns old Composio-typed docs.
"""
from __future__ import annotations
import pytest
from app.services.connector_service import ConnectorService
pytestmark = pytest.mark.integration
async def test_search_google_drive_includes_legacy_composio_docs(
async_engine, committed_google_data, patched_session_factory, patched_embed
):
"""search_google_drive returns both GOOGLE_DRIVE_FILE and COMPOSIO_GOOGLE_DRIVE_CONNECTOR docs."""
space_id = committed_google_data["search_space_id"]
async with patched_session_factory() as session:
service = ConnectorService(session, search_space_id=space_id)
result_object, raw_docs = await service.search_google_drive(
user_query="quarterly budget",
search_space_id=space_id,
top_k=10,
)
returned_types = set()
for doc in raw_docs:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
assert "GOOGLE_DRIVE_FILE" in returned_types, (
"Native Drive docs should appear in search_google_drive results"
)
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
"Legacy Composio Drive docs should appear in search_google_drive results"
)
assert "FILE" not in returned_types, (
"Plain FILE docs should NOT appear in search_google_drive results"
)
async def test_search_files_does_not_include_google_types(
async_engine, committed_google_data, patched_session_factory, patched_embed
):
"""search_files returns only FILE docs, not Google Drive docs."""
space_id = committed_google_data["search_space_id"]
async with patched_session_factory() as session:
service = ConnectorService(session, search_space_id=space_id)
result_object, raw_docs = await service.search_files(
user_query="quarterly budget",
search_space_id=space_id,
top_k=10,
)
returned_types = set()
for doc in raw_docs:
doc_info = doc.get("document", {})
dtype = doc_info.get("document_type")
if dtype:
returned_types.add(dtype)
if returned_types:
assert "GOOGLE_DRIVE_FILE" not in returned_types
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" not in returned_types