test(integration): enhance Drive indexer credential resolution tests for Composio and native connectors

This commit is contained in:
Anish Sarkar 2026-05-09 05:26:36 +05:30
parent dbf575fbd0
commit f7bac59a4b
2 changed files with 178 additions and 37 deletions

View file

@ -1,13 +1,26 @@
"""Integration tests: Drive indexer credential resolution for Composio vs native connectors.
"""Integration tests: Drive indexer client + credential resolution.
Exercises ``index_google_drive_files`` with a real PostgreSQL database
containing seeded connector records. Google API and Composio SDK are
mocked at their system boundaries.
Locks in the post-cea8618 architectural contract:
- Composio Drive connectors MUST use ``ComposioDriveClient`` (which routes
through ``composio.tools.execute``) and MUST NOT depend on a raw OAuth
access token via ``ComposioService.get_access_token``.
- Native Drive connectors MUST continue to use ``GoogleDriveClient`` with
credentials loaded from the connector config (no Composio involvement).
- Composio Drive connectors missing ``composio_connected_account_id`` MUST
short-circuit with a clear error before any client is constructed.
Background: prior to ``cea8618`` the Composio path used
``build_composio_credentials GoogleDriveClient``. That broke in production
once Composio's "Mask Connected Account Secrets" project toggle was on,
because the masked token failed the ``len(access_token) >= 20`` guard in
``ComposioService.get_access_token``. The structural assertions here make
any future regression to that token-based path fail at PR time.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
from unittest.mock import ANY, AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
@ -25,6 +38,21 @@ pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-test-account-123"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_drive_indexer"
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
def _mock_drive_client(
*, list_files_return: tuple = ([], None, None)
) -> MagicMock:
"""Duck-typed client mock whose ``list_files`` yields the supplied tuple.
Returning an empty file list short-circuits the indexer's full-scan
loop after the first page so the test exercises only the
construction + listing path, not download / ETL / DB writes.
"""
mock = MagicMock()
mock.list_files = AsyncMock(return_value=list_files_return)
return mock
@pytest_asyncio.fixture
@ -69,25 +97,31 @@ async def committed_composio_no_account_id(async_engine):
await cleanup_space(async_engine, data["search_space_id"])
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_connector_uses_composio_credentials(
mock_build_creds,
mock_client_cls,
@patch(f"{_INDEXER_MODULE}.ComposioDriveClient")
async def test_composio_drive_indexer_uses_composio_drive_client(
mock_composio_client_cls,
mock_native_client_cls,
mock_task_logger_cls,
mock_get_access_token,
async_engine,
committed_drive_connector,
):
"""Drive indexer calls build_composio_credentials for a Composio connector
and passes the result to GoogleDriveClient."""
"""Composio Drive must construct ComposioDriveClient and never read raw tokens.
Reverting to the pre-cea8618 ``build_composio_credentials GoogleDriveClient``
path would either trip ``mock_native_client_cls.assert_not_called()`` (because
GoogleDriveClient would be constructed) or ``mock_get_access_token.assert_not_called()``
(because the credential builder reads the raw token).
"""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_drive_connector
mock_creds = MagicMock(name="composio-credentials")
mock_build_creds.return_value = mock_creds
mock_composio_client_cls.return_value = _mock_drive_client()
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
@ -100,21 +134,29 @@ async def test_composio_connector_uses_composio_credentials(
folder_id="test-folder-id",
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_client_cls.assert_called_once()
_, kwargs = mock_client_cls.call_args
assert kwargs.get("credentials") is mock_creds
mock_composio_client_cls.assert_called_once_with(
ANY,
data["connector_id"],
_COMPOSIO_ACCOUNT_ID,
entity_id=f"surfsense_{data['user_id']}",
)
mock_native_client_cls.assert_not_called()
mock_get_access_token.assert_not_called()
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.ComposioDriveClient")
async def test_composio_connector_without_account_id_returns_error(
mock_build_creds,
mock_composio_client_cls,
mock_native_client_cls,
mock_task_logger_cls,
mock_get_access_token,
async_engine,
committed_composio_no_account_id,
):
"""Drive indexer returns an error when Composio connector lacks connected_account_id."""
"""Missing ``composio_connected_account_id`` must short-circuit before any client construction."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
@ -134,28 +176,32 @@ async def test_composio_connector_without_account_id_returns_error(
assert count == 0
assert error is not None
assert (
"composio_connected_account_id" in error.lower() or "composio" in error.lower()
)
mock_build_creds.assert_not_called()
assert "composio" in error.lower()
assert "connected_account_id" in error.lower()
mock_composio_client_cls.assert_not_called()
mock_native_client_cls.assert_not_called()
mock_get_access_token.assert_not_called()
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.ComposioDriveClient")
@patch(f"{_INDEXER_MODULE}.GoogleDriveClient")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_connector_does_not_use_composio_credentials(
mock_build_creds,
mock_client_cls,
async def test_native_connector_uses_google_drive_client(
mock_native_client_cls,
mock_composio_client_cls,
mock_task_logger_cls,
mock_get_access_token,
async_engine,
committed_native_drive_connector,
):
"""Drive indexer does NOT call build_composio_credentials for a native connector."""
"""Native Drive connector must use GoogleDriveClient (no Composio involvement at all)."""
from app.tasks.connector_indexers.google_drive_indexer import (
index_google_drive_files,
)
data = committed_native_drive_connector
mock_native_client_cls.return_value = _mock_drive_client()
mock_task_logger_cls.return_value = mock_task_logger()
maker = make_session_factory(async_engine)
@ -168,7 +214,6 @@ async def test_native_connector_does_not_use_composio_credentials(
folder_id="test-folder-id",
)
mock_build_creds.assert_not_called()
mock_client_cls.assert_called_once()
_, kwargs = mock_client_cls.call_args
assert kwargs.get("credentials") is None
mock_native_client_cls.assert_called_once_with(ANY, data["connector_id"])
mock_composio_client_cls.assert_not_called()
mock_get_access_token.assert_not_called()

View file

@ -1,8 +1,17 @@
"""Unit tests: build_composio_credentials returns valid Google Credentials.
"""Unit tests: Composio credential helpers + ``get_access_token`` masking guard.
Mocks the Composio SDK (external system boundary) and verifies that the
returned ``google.oauth2.credentials.Credentials`` object is correctly
configured with a token and a working refresh handler.
Covers two seams between Surfsense and Composio:
1. ``build_composio_credentials`` returns a ``google.oauth2.credentials.Credentials``
object with a working refresh handler (mocks the whole ``ComposioService``).
2. ``ComposioService.get_access_token`` rejects masked / missing tokens with
actionable error messages (mocks only the Composio SDK boundary so the
real guard logic is exercised).
The masking guard is the boundary handler that production tripped over when
Composio's "Mask Connected Account Secrets" project setting was enabled.
The corresponding fix landed in ``cea8618``; these tests lock that contract
in place so any future weakening of the guard surfaces immediately.
"""
from datetime import UTC, datetime
@ -14,6 +23,11 @@ from google.oauth2.credentials import Credentials
pytestmark = pytest.mark.unit
# ---------------------------------------------------------------------------
# build_composio_credentials — high-level wrapper tests
# ---------------------------------------------------------------------------
@patch("app.services.composio_service.ComposioService")
def test_returns_credentials_with_token_and_expiry(mock_composio_service):
"""build_composio_credentials returns a Credentials object with the Composio access token."""
@ -54,3 +68,85 @@ def test_refresh_handler_fetches_fresh_token(mock_composio_service):
assert new_token == "refreshed-token"
assert new_expiry > datetime.now(UTC).replace(tzinfo=None)
assert mock_service.get_access_token.call_count == 2
# ---------------------------------------------------------------------------
# ComposioService.get_access_token — boundary masking guard tests
# ---------------------------------------------------------------------------
def _service_with_account(account: object):
"""Build a real ``ComposioService`` whose underlying Composio SDK is faked.
Only the SDK boundary is patched the real ``get_access_token`` method
runs, so changes to the masking / missing-token guards surface here.
"""
from app.services import composio_service as composio_service_module
with patch.object(composio_service_module, "Composio") as mock_composio_cls:
mock_client = MagicMock()
mock_client.connected_accounts.get.return_value = account
mock_composio_cls.return_value = mock_client
service = composio_service_module.ComposioService(api_key="unit-test-api-key")
# ``service.client`` already references ``mock_client`` even after the
# patch context exits because the constructor captured it during init.
return service
@pytest.mark.parametrize("masked_token", ["x", "xxxxxxxx", "x" * 19])
def test_get_access_token_raises_on_masked_token(masked_token):
"""Tokens shorter than the 20-char unmask threshold must raise with the dashboard hint.
Composio masks ``state.val.access_token`` by default (project setting
"Mask Connected Account Secrets"). A masked token will always silently
fail downstream OAuth calls, so the guard surfaces it with the exact
text needed to fix the dashboard config.
"""
fake_account = MagicMock()
fake_account.state.val.access_token = masked_token
service = _service_with_account(fake_account)
with pytest.raises(ValueError, match="Mask Connected Account Secrets"):
service.get_access_token("any-account-id")
def test_get_access_token_raises_when_state_val_missing():
"""No ``state.val`` on the connected account is a hard failure with an account-id hint."""
fake_account = MagicMock()
fake_account.state = None
service = _service_with_account(fake_account)
with pytest.raises(ValueError, match="No state.val.*missing-state-account"):
service.get_access_token("missing-state-account")
def test_get_access_token_raises_when_access_token_empty():
"""``state.val`` present but ``access_token`` empty must fail before the masking check."""
fake_account = MagicMock()
fake_account.state.val.access_token = ""
service = _service_with_account(fake_account)
with pytest.raises(ValueError, match="No access_token.*missing-token-account"):
service.get_access_token("missing-token-account")
def test_get_access_token_raises_when_access_token_none():
"""``state.val.access_token = None`` must fail before the masking check."""
fake_account = MagicMock()
fake_account.state.val.access_token = None
service = _service_with_account(fake_account)
with pytest.raises(ValueError, match="No access_token.*none-token-account"):
service.get_access_token("none-token-account")
def test_get_access_token_returns_unmasked_token():
"""Happy path: a >=20-char access token is returned verbatim."""
fake_account = MagicMock()
unmasked = "u" * 32
fake_account.state.val.access_token = unmasked
service = _service_with_account(fake_account)
assert service.get_access_token("happy-account") == unmasked