mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
feat: add integration and unit tests for Google unification connectors
- Introduced comprehensive integration tests for Google Drive, Gmail, and Calendar indexers, ensuring proper credential handling for both Composio and native connectors. - Added unit tests to validate the acceptance of Composio-sourced credentials across various connector types. - Implemented fixtures to seed test data and facilitate testing of hybrid search functionality, ensuring accurate document type filtering.
This commit is contained in:
parent
83152e8e7e
commit
36f4709225
12 changed files with 1310 additions and 0 deletions
|
|
@ -0,0 +1,328 @@
|
|||
"""Shared fixtures for Google unification integration tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import UTC, datetime
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from app.config import config as app_config
|
||||
from app.db import (
|
||||
Chunk,
|
||||
Document,
|
||||
DocumentType,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
User,
|
||||
)
|
||||
|
||||
EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
||||
DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM
|
||||
|
||||
|
||||
def make_document(
|
||||
*,
|
||||
title: str,
|
||||
document_type: DocumentType,
|
||||
content: str,
|
||||
search_space_id: int,
|
||||
created_by_id: str,
|
||||
) -> Document:
|
||||
"""Build a Document instance with unique hashes and a dummy embedding."""
|
||||
uid = uuid.uuid4().hex[:12]
|
||||
return Document(
|
||||
title=title,
|
||||
document_type=document_type,
|
||||
content=content,
|
||||
content_hash=f"content-{uid}",
|
||||
unique_identifier_hash=f"uid-{uid}",
|
||||
source_markdown=content,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=created_by_id,
|
||||
embedding=DUMMY_EMBEDDING,
|
||||
updated_at=datetime.now(UTC),
|
||||
status={"state": "ready"},
|
||||
)
|
||||
|
||||
|
||||
def make_chunk(*, content: str, document_id: int) -> Chunk:
|
||||
return Chunk(
|
||||
content=content,
|
||||
document_id=document_id,
|
||||
embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Savepoint-based fixture (used by retriever tests that receive db_session)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def seed_google_docs(
|
||||
db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace"
|
||||
):
|
||||
"""Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc.
|
||||
|
||||
Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``,
|
||||
plus ``search_space`` and ``user``.
|
||||
"""
|
||||
user_id = str(db_user.id)
|
||||
space_id = db_search_space.id
|
||||
|
||||
native_doc = make_document(
|
||||
title="Native Drive Document",
|
||||
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
||||
content="quarterly report from native google drive connector",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
legacy_doc = make_document(
|
||||
title="Legacy Composio Drive Document",
|
||||
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
content="quarterly report from composio google drive connector",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
file_doc = make_document(
|
||||
title="Uploaded PDF",
|
||||
document_type=DocumentType.FILE,
|
||||
content="unrelated uploaded file about quarterly reports",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
|
||||
db_session.add_all([native_doc, legacy_doc, file_doc])
|
||||
await db_session.flush()
|
||||
|
||||
native_chunk = make_chunk(
|
||||
content="quarterly report from native google drive connector",
|
||||
document_id=native_doc.id,
|
||||
)
|
||||
legacy_chunk = make_chunk(
|
||||
content="quarterly report from composio google drive connector",
|
||||
document_id=legacy_doc.id,
|
||||
)
|
||||
file_chunk = make_chunk(
|
||||
content="unrelated uploaded file about quarterly reports",
|
||||
document_id=file_doc.id,
|
||||
)
|
||||
|
||||
db_session.add_all([native_chunk, legacy_chunk, file_chunk])
|
||||
await db_session.flush()
|
||||
|
||||
return {
|
||||
"native_doc": native_doc,
|
||||
"legacy_doc": legacy_doc,
|
||||
"file_doc": file_doc,
|
||||
"search_space": db_search_space,
|
||||
"user": db_user,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Committed-data fixture (used by service / browse tests that create their
|
||||
# own sessions internally and therefore cannot see savepoint-scoped data)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def committed_google_data(async_engine):
|
||||
"""Insert native, legacy, and FILE docs via a committed transaction.
|
||||
|
||||
Yields ``{"search_space_id": int, "user_id": str}``.
|
||||
Cleans up by deleting the search space (cascades to documents / chunks).
|
||||
"""
|
||||
space_id = None
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
session = AsyncSession(bind=conn, expire_on_commit=False)
|
||||
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net",
|
||||
hashed_password="hashed",
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
is_verified=True,
|
||||
)
|
||||
session.add(user)
|
||||
await session.flush()
|
||||
|
||||
space = SearchSpace(
|
||||
name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id
|
||||
)
|
||||
session.add(space)
|
||||
await session.flush()
|
||||
space_id = space.id
|
||||
user_id = str(user.id)
|
||||
|
||||
native_doc = make_document(
|
||||
title="Native Drive Doc",
|
||||
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
||||
content="quarterly budget from native google drive",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
legacy_doc = make_document(
|
||||
title="Legacy Composio Drive Doc",
|
||||
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
content="quarterly budget from composio google drive",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
file_doc = make_document(
|
||||
title="Plain File",
|
||||
document_type=DocumentType.FILE,
|
||||
content="quarterly budget uploaded as file",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
session.add_all([native_doc, legacy_doc, file_doc])
|
||||
await session.flush()
|
||||
|
||||
for doc in [native_doc, legacy_doc, file_doc]:
|
||||
session.add(
|
||||
Chunk(
|
||||
content=doc.content,
|
||||
document_id=doc.id,
|
||||
embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
)
|
||||
await session.flush()
|
||||
|
||||
yield {"search_space_id": space_id, "user_id": user_id}
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
await conn.execute(
|
||||
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Monkeypatch fixtures for system boundaries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_session_factory(async_engine, monkeypatch):
|
||||
"""Replace ``async_session_maker`` in connector_service with one bound to the test engine."""
|
||||
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
||||
monkeypatch.setattr("app.services.connector_service.async_session_maker", test_maker)
|
||||
return test_maker
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_embed(monkeypatch):
|
||||
"""Mock the embedding model (system boundary) to return a fixed vector."""
|
||||
mock = MagicMock(return_value=DUMMY_EMBEDDING)
|
||||
monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_shielded_session(async_engine, monkeypatch):
|
||||
"""Replace ``shielded_async_session`` in the knowledge_base module
|
||||
with one that yields sessions from the test engine."""
|
||||
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
||||
|
||||
@asynccontextmanager
|
||||
async def _test_shielded():
|
||||
async with test_maker() as session:
|
||||
yield session
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.tools.knowledge_base.shielded_async_session",
|
||||
_test_shielded,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Indexer test helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_session_factory(async_engine):
|
||||
"""Create a session factory bound to the test engine."""
|
||||
return async_sessionmaker(async_engine, expire_on_commit=False)
|
||||
|
||||
|
||||
def mock_task_logger():
|
||||
"""Return a fully-mocked TaskLoggingService with async methods."""
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
mock = AsyncMock()
|
||||
mock.log_task_start = AsyncMock(return_value=MagicMock())
|
||||
mock.log_task_progress = AsyncMock()
|
||||
mock.log_task_failure = AsyncMock()
|
||||
mock.log_task_success = AsyncMock()
|
||||
return mock
|
||||
|
||||
|
||||
async def seed_connector(
|
||||
async_engine,
|
||||
*,
|
||||
connector_type: "SearchSourceConnectorType",
|
||||
config: dict,
|
||||
name_prefix: str = "test",
|
||||
):
|
||||
"""Seed a connector with committed data. Returns dict and cleanup function.
|
||||
|
||||
Yields ``{"connector_id", "search_space_id", "user_id"}``.
|
||||
"""
|
||||
space_id = None
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
session = AsyncSession(bind=conn, expire_on_commit=False)
|
||||
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net",
|
||||
hashed_password="hashed",
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
is_verified=True,
|
||||
)
|
||||
session.add(user)
|
||||
await session.flush()
|
||||
|
||||
space = SearchSpace(
|
||||
name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id
|
||||
)
|
||||
session.add(space)
|
||||
await session.flush()
|
||||
space_id = space.id
|
||||
|
||||
connector = SearchSourceConnector(
|
||||
name=f"{name_prefix} connector",
|
||||
connector_type=connector_type,
|
||||
is_indexable=True,
|
||||
config=config,
|
||||
search_space_id=space_id,
|
||||
user_id=user.id,
|
||||
)
|
||||
session.add(connector)
|
||||
await session.flush()
|
||||
connector_id = connector.id
|
||||
user_id = str(user.id)
|
||||
|
||||
return {
|
||||
"connector_id": connector_id,
|
||||
"search_space_id": space_id,
|
||||
"user_id": user_id,
|
||||
}
|
||||
|
||||
|
||||
async def cleanup_space(async_engine, space_id: int):
|
||||
"""Delete a search space (cascades to connectors/documents)."""
|
||||
async with async_engine.begin() as conn:
|
||||
await conn.execute(
|
||||
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
||||
)
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
"""Integration test: _browse_recent_documents returns docs of multiple types.
|
||||
|
||||
Exercises the browse path (degenerate-query fallback) with a real PostgreSQL
|
||||
database. Verifies that passing a list of document types correctly returns
|
||||
documents of all listed types -- the same ``.in_()`` SQL path used by hybrid
|
||||
search but through the browse/recency-ordered code path.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
async def test_browse_recent_documents_with_list_type_returns_both(
|
||||
committed_google_data, patched_shielded_session
|
||||
):
|
||||
"""_browse_recent_documents returns docs of all types when given a list."""
|
||||
from app.agents.new_chat.tools.knowledge_base import _browse_recent_documents
|
||||
|
||||
space_id = committed_google_data["search_space_id"]
|
||||
|
||||
results = await _browse_recent_documents(
|
||||
search_space_id=space_id,
|
||||
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
|
||||
top_k=10,
|
||||
start_date=None,
|
||||
end_date=None,
|
||||
)
|
||||
|
||||
returned_types = set()
|
||||
for doc in results:
|
||||
doc_info = doc.get("document", {})
|
||||
dtype = doc_info.get("document_type")
|
||||
if dtype:
|
||||
returned_types.add(dtype)
|
||||
|
||||
assert "GOOGLE_DRIVE_FILE" in returned_types, (
|
||||
"Native Drive docs should appear in browse results"
|
||||
)
|
||||
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
|
||||
"Legacy Composio Drive docs should appear in browse results"
|
||||
)
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
"""Integration tests: Calendar indexer credential resolution for Composio vs native connectors.
|
||||
|
||||
Exercises ``index_google_calendar_events`` with a real PostgreSQL database
|
||||
containing seeded connector records. Google API and Composio SDK are
|
||||
mocked at their system boundaries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from app.db import SearchSourceConnectorType
|
||||
|
||||
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
|
||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def composio_calendar(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
|
||||
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
|
||||
name_prefix="cal-composio",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def composio_calendar_no_id(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
|
||||
config={},
|
||||
name_prefix="cal-noid",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def native_calendar(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
|
||||
config={
|
||||
"token": "fake", "refresh_token": "fake",
|
||||
"client_id": "fake", "client_secret": "fake",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
},
|
||||
name_prefix="cal-native",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_calendar_uses_composio_credentials(
|
||||
mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, composio_calendar,
|
||||
):
|
||||
"""Calendar indexer calls build_composio_credentials for a Composio connector."""
|
||||
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
|
||||
|
||||
data = composio_calendar
|
||||
mock_creds = MagicMock(name="composio-creds")
|
||||
mock_build_creds.return_value = mock_creds
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
mock_cal_instance = MagicMock()
|
||||
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None))
|
||||
mock_cal_cls.return_value = mock_cal_instance
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_calendar_events(
|
||||
session=session, connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"], user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
||||
mock_cal_cls.assert_called_once()
|
||||
_, kwargs = mock_cal_cls.call_args
|
||||
assert kwargs.get("credentials") is mock_creds
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_calendar_without_account_id_returns_error(
|
||||
mock_build_creds, mock_tl_cls, async_engine, composio_calendar_no_id,
|
||||
):
|
||||
"""Calendar indexer returns error when Composio connector lacks connected_account_id."""
|
||||
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
|
||||
|
||||
data = composio_calendar_no_id
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
count, error = await index_google_calendar_events(
|
||||
session=session, connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"], user_id=data["user_id"],
|
||||
)
|
||||
|
||||
assert count == 0
|
||||
assert error is not None
|
||||
assert "composio" in error.lower()
|
||||
mock_build_creds.assert_not_called()
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_native_calendar_does_not_use_composio_credentials(
|
||||
mock_build_creds, mock_cal_cls, mock_tl_cls, async_engine, native_calendar,
|
||||
):
|
||||
"""Calendar indexer does NOT call build_composio_credentials for a native connector."""
|
||||
from app.tasks.connector_indexers.google_calendar_indexer import index_google_calendar_events
|
||||
|
||||
data = native_calendar
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
mock_cal_instance = MagicMock()
|
||||
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(return_value=([], None))
|
||||
mock_cal_cls.return_value = mock_cal_instance
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_calendar_events(
|
||||
session=session, connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"], user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_not_called()
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
"""Integration tests: Drive indexer credential resolution for Composio vs native connectors.
|
||||
|
||||
Exercises ``index_google_drive_files`` with a real PostgreSQL database
|
||||
containing seeded connector records. Google API and Composio SDK are
|
||||
mocked at their system boundaries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from app.db import SearchSourceConnectorType
|
||||
|
||||
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
_COMPOSIO_ACCOUNT_ID = "composio-test-account-123"
|
||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_drive_indexer"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def committed_drive_connector(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
|
||||
name_prefix="drive-composio",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def committed_native_drive_connector(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR,
|
||||
config={
|
||||
"token": "fake-token",
|
||||
"refresh_token": "fake-refresh",
|
||||
"client_id": "fake-client-id",
|
||||
"client_secret": "fake-secret",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
},
|
||||
name_prefix="drive-native",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def committed_composio_no_account_id(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
config={},
|
||||
name_prefix="drive-noid",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_connector_uses_composio_credentials(
|
||||
mock_build_creds,
|
||||
mock_client_cls,
|
||||
mock_task_logger_cls,
|
||||
async_engine,
|
||||
committed_drive_connector,
|
||||
):
|
||||
"""Drive indexer calls build_composio_credentials for a Composio connector
|
||||
and passes the result to GoogleDriveClient."""
|
||||
from app.tasks.connector_indexers.google_drive_indexer import (
|
||||
index_google_drive_files,
|
||||
)
|
||||
|
||||
data = committed_drive_connector
|
||||
mock_creds = MagicMock(name="composio-credentials")
|
||||
mock_build_creds.return_value = mock_creds
|
||||
mock_task_logger_cls.return_value = mock_task_logger()
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_drive_files(
|
||||
session=session,
|
||||
connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"],
|
||||
user_id=data["user_id"],
|
||||
folder_id="test-folder-id",
|
||||
)
|
||||
|
||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
||||
mock_client_cls.assert_called_once()
|
||||
_, kwargs = mock_client_cls.call_args
|
||||
assert kwargs.get("credentials") is mock_creds
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_connector_without_account_id_returns_error(
|
||||
mock_build_creds,
|
||||
mock_task_logger_cls,
|
||||
async_engine,
|
||||
committed_composio_no_account_id,
|
||||
):
|
||||
"""Drive indexer returns an error when Composio connector lacks connected_account_id."""
|
||||
from app.tasks.connector_indexers.google_drive_indexer import (
|
||||
index_google_drive_files,
|
||||
)
|
||||
|
||||
data = committed_composio_no_account_id
|
||||
mock_task_logger_cls.return_value = mock_task_logger()
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
count, error = await index_google_drive_files(
|
||||
session=session,
|
||||
connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"],
|
||||
user_id=data["user_id"],
|
||||
folder_id="test-folder-id",
|
||||
)
|
||||
|
||||
assert count == 0
|
||||
assert error is not None
|
||||
assert "composio_connected_account_id" in error.lower() or "composio" in error.lower()
|
||||
mock_build_creds.assert_not_called()
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_native_connector_does_not_use_composio_credentials(
|
||||
mock_build_creds,
|
||||
mock_client_cls,
|
||||
mock_task_logger_cls,
|
||||
async_engine,
|
||||
committed_native_drive_connector,
|
||||
):
|
||||
"""Drive indexer does NOT call build_composio_credentials for a native connector."""
|
||||
from app.tasks.connector_indexers.google_drive_indexer import (
|
||||
index_google_drive_files,
|
||||
)
|
||||
|
||||
data = committed_native_drive_connector
|
||||
mock_task_logger_cls.return_value = mock_task_logger()
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_drive_files(
|
||||
session=session,
|
||||
connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"],
|
||||
user_id=data["user_id"],
|
||||
folder_id="test-folder-id",
|
||||
)
|
||||
|
||||
mock_build_creds.assert_not_called()
|
||||
mock_client_cls.assert_called_once()
|
||||
_, kwargs = mock_client_cls.call_args
|
||||
assert kwargs.get("credentials") is None
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
"""Integration tests: Gmail indexer credential resolution for Composio vs native connectors.
|
||||
|
||||
Exercises ``index_google_gmail_messages`` with a real PostgreSQL database
|
||||
containing seeded connector records. Google API and Composio SDK are
|
||||
mocked at their system boundaries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from app.db import SearchSourceConnectorType
|
||||
|
||||
from .conftest import cleanup_space, make_session_factory, mock_task_logger, seed_connector
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
|
||||
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def composio_gmail(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
|
||||
config={"composio_connected_account_id": _COMPOSIO_ACCOUNT_ID},
|
||||
name_prefix="gmail-composio",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def composio_gmail_no_id(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR,
|
||||
config={},
|
||||
name_prefix="gmail-noid",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def native_gmail(async_engine):
|
||||
data = await seed_connector(
|
||||
async_engine,
|
||||
connector_type=SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR,
|
||||
config={
|
||||
"token": "fake", "refresh_token": "fake",
|
||||
"client_id": "fake", "client_secret": "fake",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
},
|
||||
name_prefix="gmail-native",
|
||||
)
|
||||
yield data
|
||||
await cleanup_space(async_engine, data["search_space_id"])
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_gmail_uses_composio_credentials(
|
||||
mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, composio_gmail,
|
||||
):
|
||||
"""Gmail indexer calls build_composio_credentials for a Composio connector."""
|
||||
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
|
||||
|
||||
data = composio_gmail
|
||||
mock_creds = MagicMock(name="composio-creds")
|
||||
mock_build_creds.return_value = mock_creds
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
mock_gmail_instance = MagicMock()
|
||||
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
|
||||
mock_gmail_cls.return_value = mock_gmail_instance
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_gmail_messages(
|
||||
session=session, connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"], user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
|
||||
mock_gmail_cls.assert_called_once()
|
||||
args, _ = mock_gmail_cls.call_args
|
||||
assert args[0] is mock_creds
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_composio_gmail_without_account_id_returns_error(
|
||||
mock_build_creds, mock_tl_cls, async_engine, composio_gmail_no_id,
|
||||
):
|
||||
"""Gmail indexer returns error when Composio connector lacks connected_account_id."""
|
||||
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
|
||||
|
||||
data = composio_gmail_no_id
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
count, error = await index_google_gmail_messages(
|
||||
session=session, connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"], user_id=data["user_id"],
|
||||
)
|
||||
|
||||
assert count == 0
|
||||
assert error is not None
|
||||
assert "composio" in error.lower()
|
||||
mock_build_creds.assert_not_called()
|
||||
|
||||
|
||||
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
|
||||
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
|
||||
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
|
||||
async def test_native_gmail_does_not_use_composio_credentials(
|
||||
mock_build_creds, mock_gmail_cls, mock_tl_cls, async_engine, native_gmail,
|
||||
):
|
||||
"""Gmail indexer does NOT call build_composio_credentials for a native connector."""
|
||||
from app.tasks.connector_indexers.google_gmail_indexer import index_google_gmail_messages
|
||||
|
||||
data = native_gmail
|
||||
mock_tl_cls.return_value = mock_task_logger()
|
||||
|
||||
mock_gmail_instance = MagicMock()
|
||||
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
|
||||
mock_gmail_cls.return_value = mock_gmail_instance
|
||||
|
||||
maker = make_session_factory(async_engine)
|
||||
async with maker() as session:
|
||||
await index_google_gmail_messages(
|
||||
session=session, connector_id=data["connector_id"],
|
||||
search_space_id=data["search_space_id"], user_id=data["user_id"],
|
||||
)
|
||||
|
||||
mock_build_creds.assert_not_called()
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
"""Integration tests: hybrid search correctly filters by document type lists.
|
||||
|
||||
These tests exercise the public ``hybrid_search`` method on
|
||||
``ChucksHybridSearchRetriever`` with a real PostgreSQL database.
|
||||
They verify that the ``.in_()`` SQL path works for list-of-types filtering,
|
||||
which is the foundation of the Google unification changes.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from app.config import config as app_config
|
||||
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
|
||||
|
||||
from .conftest import DUMMY_EMBEDDING
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
async def test_list_of_types_returns_both_matching_doc_types(
|
||||
db_session, seed_google_docs
|
||||
):
|
||||
"""Searching with a list of document types returns documents of ALL listed types."""
|
||||
space_id = seed_google_docs["search_space"].id
|
||||
|
||||
retriever = ChucksHybridSearchRetriever(db_session)
|
||||
results = await retriever.hybrid_search(
|
||||
query_text="quarterly report",
|
||||
top_k=10,
|
||||
search_space_id=space_id,
|
||||
document_type=["GOOGLE_DRIVE_FILE", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"],
|
||||
query_embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
|
||||
returned_types = {
|
||||
r["document"]["document_type"] for r in results if r.get("document")
|
||||
}
|
||||
assert "GOOGLE_DRIVE_FILE" in returned_types
|
||||
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types
|
||||
assert "FILE" not in returned_types
|
||||
|
||||
|
||||
async def test_single_string_type_returns_only_that_type(
|
||||
db_session, seed_google_docs
|
||||
):
|
||||
"""Searching with a single string type returns only documents of that exact type."""
|
||||
space_id = seed_google_docs["search_space"].id
|
||||
|
||||
retriever = ChucksHybridSearchRetriever(db_session)
|
||||
results = await retriever.hybrid_search(
|
||||
query_text="quarterly report",
|
||||
top_k=10,
|
||||
search_space_id=space_id,
|
||||
document_type="GOOGLE_DRIVE_FILE",
|
||||
query_embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
|
||||
returned_types = {
|
||||
r["document"]["document_type"] for r in results if r.get("document")
|
||||
}
|
||||
assert returned_types == {"GOOGLE_DRIVE_FILE"}
|
||||
|
||||
|
||||
async def test_all_invalid_types_returns_empty(db_session, seed_google_docs):
|
||||
"""Searching with a list of nonexistent types returns an empty list, no exceptions."""
|
||||
space_id = seed_google_docs["search_space"].id
|
||||
|
||||
retriever = ChucksHybridSearchRetriever(db_session)
|
||||
results = await retriever.hybrid_search(
|
||||
query_text="quarterly report",
|
||||
top_k=10,
|
||||
search_space_id=space_id,
|
||||
document_type=["NONEXISTENT_TYPE"],
|
||||
query_embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
|
||||
assert results == []
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
"""Integration tests: ConnectorService search transparently includes legacy Composio docs.
|
||||
|
||||
These tests exercise ``ConnectorService.search_google_drive`` and
|
||||
``ConnectorService.search_files`` through a real PostgreSQL database.
|
||||
They verify that the legacy-type alias expansion works end-to-end:
|
||||
searching for native Google Drive docs also returns old Composio-typed docs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.connector_service import ConnectorService
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
async def test_search_google_drive_includes_legacy_composio_docs(
|
||||
async_engine, committed_google_data, patched_session_factory, patched_embed
|
||||
):
|
||||
"""search_google_drive returns both GOOGLE_DRIVE_FILE and COMPOSIO_GOOGLE_DRIVE_CONNECTOR docs."""
|
||||
space_id = committed_google_data["search_space_id"]
|
||||
|
||||
async with patched_session_factory() as session:
|
||||
service = ConnectorService(session, search_space_id=space_id)
|
||||
result_object, raw_docs = await service.search_google_drive(
|
||||
user_query="quarterly budget",
|
||||
search_space_id=space_id,
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
returned_types = set()
|
||||
for doc in raw_docs:
|
||||
doc_info = doc.get("document", {})
|
||||
dtype = doc_info.get("document_type")
|
||||
if dtype:
|
||||
returned_types.add(dtype)
|
||||
|
||||
assert "GOOGLE_DRIVE_FILE" in returned_types, (
|
||||
"Native Drive docs should appear in search_google_drive results"
|
||||
)
|
||||
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in returned_types, (
|
||||
"Legacy Composio Drive docs should appear in search_google_drive results"
|
||||
)
|
||||
assert "FILE" not in returned_types, (
|
||||
"Plain FILE docs should NOT appear in search_google_drive results"
|
||||
)
|
||||
|
||||
|
||||
async def test_search_files_does_not_include_google_types(
|
||||
async_engine, committed_google_data, patched_session_factory, patched_embed
|
||||
):
|
||||
"""search_files returns only FILE docs, not Google Drive docs."""
|
||||
space_id = committed_google_data["search_space_id"]
|
||||
|
||||
async with patched_session_factory() as session:
|
||||
service = ConnectorService(session, search_space_id=space_id)
|
||||
result_object, raw_docs = await service.search_files(
|
||||
user_query="quarterly budget",
|
||||
search_space_id=space_id,
|
||||
top_k=10,
|
||||
)
|
||||
|
||||
returned_types = set()
|
||||
for doc in raw_docs:
|
||||
doc_info = doc.get("document", {})
|
||||
dtype = doc_info.get("document_type")
|
||||
if dtype:
|
||||
returned_types.add(dtype)
|
||||
|
||||
if returned_types:
|
||||
assert "GOOGLE_DRIVE_FILE" not in returned_types
|
||||
assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" not in returned_types
|
||||
Loading…
Add table
Add a link
Reference in a new issue