From f7bac59a4bf97c317f75dfccfd50757165504738 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Sat, 9 May 2026 05:26:36 +0530 Subject: [PATCH] test(integration): enhance Drive indexer credential resolution tests for Composio and native connectors --- .../test_drive_indexer_credentials.py | 111 ++++++++++++------ .../test_composio_credentials.py | 104 +++++++++++++++- 2 files changed, 178 insertions(+), 37 deletions(-) diff --git a/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py index e669fa143..fceda8453 100644 --- a/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py +++ b/surfsense_backend/tests/integration/google_unification/test_drive_indexer_credentials.py @@ -1,13 +1,26 @@ -"""Integration tests: Drive indexer credential resolution for Composio vs native connectors. +"""Integration tests: Drive indexer client + credential resolution. -Exercises ``index_google_drive_files`` with a real PostgreSQL database -containing seeded connector records. Google API and Composio SDK are -mocked at their system boundaries. +Locks in the post-cea8618 architectural contract: + +- Composio Drive connectors MUST use ``ComposioDriveClient`` (which routes + through ``composio.tools.execute``) and MUST NOT depend on a raw OAuth + access token via ``ComposioService.get_access_token``. +- Native Drive connectors MUST continue to use ``GoogleDriveClient`` with + credentials loaded from the connector config (no Composio involvement). +- Composio Drive connectors missing ``composio_connected_account_id`` MUST + short-circuit with a clear error before any client is constructed. + +Background: prior to ``cea8618`` the Composio path used +``build_composio_credentials → GoogleDriveClient``. That broke in production +once Composio's "Mask Connected Account Secrets" project toggle was on, +because the masked token failed the ``len(access_token) >= 20`` guard in +``ComposioService.get_access_token``. The structural assertions here make +any future regression to that token-based path fail at PR time. """ from __future__ import annotations -from unittest.mock import MagicMock, patch +from unittest.mock import ANY, AsyncMock, MagicMock, patch import pytest import pytest_asyncio @@ -25,6 +38,21 @@ pytestmark = pytest.mark.integration _COMPOSIO_ACCOUNT_ID = "composio-test-account-123" _INDEXER_MODULE = "app.tasks.connector_indexers.google_drive_indexer" +_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token" + + +def _mock_drive_client( + *, list_files_return: tuple = ([], None, None) +) -> MagicMock: + """Duck-typed client mock whose ``list_files`` yields the supplied tuple. + + Returning an empty file list short-circuits the indexer's full-scan + loop after the first page so the test exercises only the + construction + listing path, not download / ETL / DB writes. + """ + mock = MagicMock() + mock.list_files = AsyncMock(return_value=list_files_return) + return mock @pytest_asyncio.fixture @@ -69,25 +97,31 @@ async def committed_composio_no_account_id(async_engine): await cleanup_space(async_engine, data["search_space_id"]) +@patch(_GET_ACCESS_TOKEN) @patch(f"{_INDEXER_MODULE}.TaskLoggingService") @patch(f"{_INDEXER_MODULE}.GoogleDriveClient") -@patch(f"{_INDEXER_MODULE}.build_composio_credentials") -async def test_composio_connector_uses_composio_credentials( - mock_build_creds, - mock_client_cls, +@patch(f"{_INDEXER_MODULE}.ComposioDriveClient") +async def test_composio_drive_indexer_uses_composio_drive_client( + mock_composio_client_cls, + mock_native_client_cls, mock_task_logger_cls, + mock_get_access_token, async_engine, committed_drive_connector, ): - """Drive indexer calls build_composio_credentials for a Composio connector - and passes the result to GoogleDriveClient.""" + """Composio Drive must construct ComposioDriveClient and never read raw tokens. + + Reverting to the pre-cea8618 ``build_composio_credentials → GoogleDriveClient`` + path would either trip ``mock_native_client_cls.assert_not_called()`` (because + GoogleDriveClient would be constructed) or ``mock_get_access_token.assert_not_called()`` + (because the credential builder reads the raw token). + """ from app.tasks.connector_indexers.google_drive_indexer import ( index_google_drive_files, ) data = committed_drive_connector - mock_creds = MagicMock(name="composio-credentials") - mock_build_creds.return_value = mock_creds + mock_composio_client_cls.return_value = _mock_drive_client() mock_task_logger_cls.return_value = mock_task_logger() maker = make_session_factory(async_engine) @@ -100,21 +134,29 @@ async def test_composio_connector_uses_composio_credentials( folder_id="test-folder-id", ) - mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID) - mock_client_cls.assert_called_once() - _, kwargs = mock_client_cls.call_args - assert kwargs.get("credentials") is mock_creds + mock_composio_client_cls.assert_called_once_with( + ANY, + data["connector_id"], + _COMPOSIO_ACCOUNT_ID, + entity_id=f"surfsense_{data['user_id']}", + ) + mock_native_client_cls.assert_not_called() + mock_get_access_token.assert_not_called() +@patch(_GET_ACCESS_TOKEN) @patch(f"{_INDEXER_MODULE}.TaskLoggingService") -@patch(f"{_INDEXER_MODULE}.build_composio_credentials") +@patch(f"{_INDEXER_MODULE}.GoogleDriveClient") +@patch(f"{_INDEXER_MODULE}.ComposioDriveClient") async def test_composio_connector_without_account_id_returns_error( - mock_build_creds, + mock_composio_client_cls, + mock_native_client_cls, mock_task_logger_cls, + mock_get_access_token, async_engine, committed_composio_no_account_id, ): - """Drive indexer returns an error when Composio connector lacks connected_account_id.""" + """Missing ``composio_connected_account_id`` must short-circuit before any client construction.""" from app.tasks.connector_indexers.google_drive_indexer import ( index_google_drive_files, ) @@ -134,28 +176,32 @@ async def test_composio_connector_without_account_id_returns_error( assert count == 0 assert error is not None - assert ( - "composio_connected_account_id" in error.lower() or "composio" in error.lower() - ) - mock_build_creds.assert_not_called() + assert "composio" in error.lower() + assert "connected_account_id" in error.lower() + mock_composio_client_cls.assert_not_called() + mock_native_client_cls.assert_not_called() + mock_get_access_token.assert_not_called() +@patch(_GET_ACCESS_TOKEN) @patch(f"{_INDEXER_MODULE}.TaskLoggingService") +@patch(f"{_INDEXER_MODULE}.ComposioDriveClient") @patch(f"{_INDEXER_MODULE}.GoogleDriveClient") -@patch(f"{_INDEXER_MODULE}.build_composio_credentials") -async def test_native_connector_does_not_use_composio_credentials( - mock_build_creds, - mock_client_cls, +async def test_native_connector_uses_google_drive_client( + mock_native_client_cls, + mock_composio_client_cls, mock_task_logger_cls, + mock_get_access_token, async_engine, committed_native_drive_connector, ): - """Drive indexer does NOT call build_composio_credentials for a native connector.""" + """Native Drive connector must use GoogleDriveClient (no Composio involvement at all).""" from app.tasks.connector_indexers.google_drive_indexer import ( index_google_drive_files, ) data = committed_native_drive_connector + mock_native_client_cls.return_value = _mock_drive_client() mock_task_logger_cls.return_value = mock_task_logger() maker = make_session_factory(async_engine) @@ -168,7 +214,6 @@ async def test_native_connector_does_not_use_composio_credentials( folder_id="test-folder-id", ) - mock_build_creds.assert_not_called() - mock_client_cls.assert_called_once() - _, kwargs = mock_client_cls.call_args - assert kwargs.get("credentials") is None + mock_native_client_cls.assert_called_once_with(ANY, data["connector_id"]) + mock_composio_client_cls.assert_not_called() + mock_get_access_token.assert_not_called() diff --git a/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py b/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py index 9ab618aa3..32f42437d 100644 --- a/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py +++ b/surfsense_backend/tests/unit/google_unification/test_composio_credentials.py @@ -1,8 +1,17 @@ -"""Unit tests: build_composio_credentials returns valid Google Credentials. +"""Unit tests: Composio credential helpers + ``get_access_token`` masking guard. -Mocks the Composio SDK (external system boundary) and verifies that the -returned ``google.oauth2.credentials.Credentials`` object is correctly -configured with a token and a working refresh handler. +Covers two seams between Surfsense and Composio: + +1. ``build_composio_credentials`` returns a ``google.oauth2.credentials.Credentials`` + object with a working refresh handler (mocks the whole ``ComposioService``). +2. ``ComposioService.get_access_token`` rejects masked / missing tokens with + actionable error messages (mocks only the Composio SDK boundary so the + real guard logic is exercised). + +The masking guard is the boundary handler that production tripped over when +Composio's "Mask Connected Account Secrets" project setting was enabled. +The corresponding fix landed in ``cea8618``; these tests lock that contract +in place so any future weakening of the guard surfaces immediately. """ from datetime import UTC, datetime @@ -14,6 +23,11 @@ from google.oauth2.credentials import Credentials pytestmark = pytest.mark.unit +# --------------------------------------------------------------------------- +# build_composio_credentials — high-level wrapper tests +# --------------------------------------------------------------------------- + + @patch("app.services.composio_service.ComposioService") def test_returns_credentials_with_token_and_expiry(mock_composio_service): """build_composio_credentials returns a Credentials object with the Composio access token.""" @@ -54,3 +68,85 @@ def test_refresh_handler_fetches_fresh_token(mock_composio_service): assert new_token == "refreshed-token" assert new_expiry > datetime.now(UTC).replace(tzinfo=None) assert mock_service.get_access_token.call_count == 2 + + +# --------------------------------------------------------------------------- +# ComposioService.get_access_token — boundary masking guard tests +# --------------------------------------------------------------------------- + + +def _service_with_account(account: object): + """Build a real ``ComposioService`` whose underlying Composio SDK is faked. + + Only the SDK boundary is patched — the real ``get_access_token`` method + runs, so changes to the masking / missing-token guards surface here. + """ + from app.services import composio_service as composio_service_module + + with patch.object(composio_service_module, "Composio") as mock_composio_cls: + mock_client = MagicMock() + mock_client.connected_accounts.get.return_value = account + mock_composio_cls.return_value = mock_client + + service = composio_service_module.ComposioService(api_key="unit-test-api-key") + + # ``service.client`` already references ``mock_client`` even after the + # patch context exits because the constructor captured it during init. + return service + + +@pytest.mark.parametrize("masked_token", ["x", "xxxxxxxx", "x" * 19]) +def test_get_access_token_raises_on_masked_token(masked_token): + """Tokens shorter than the 20-char unmask threshold must raise with the dashboard hint. + + Composio masks ``state.val.access_token`` by default (project setting + "Mask Connected Account Secrets"). A masked token will always silently + fail downstream OAuth calls, so the guard surfaces it with the exact + text needed to fix the dashboard config. + """ + fake_account = MagicMock() + fake_account.state.val.access_token = masked_token + service = _service_with_account(fake_account) + + with pytest.raises(ValueError, match="Mask Connected Account Secrets"): + service.get_access_token("any-account-id") + + +def test_get_access_token_raises_when_state_val_missing(): + """No ``state.val`` on the connected account is a hard failure with an account-id hint.""" + fake_account = MagicMock() + fake_account.state = None + service = _service_with_account(fake_account) + + with pytest.raises(ValueError, match="No state.val.*missing-state-account"): + service.get_access_token("missing-state-account") + + +def test_get_access_token_raises_when_access_token_empty(): + """``state.val`` present but ``access_token`` empty must fail before the masking check.""" + fake_account = MagicMock() + fake_account.state.val.access_token = "" + service = _service_with_account(fake_account) + + with pytest.raises(ValueError, match="No access_token.*missing-token-account"): + service.get_access_token("missing-token-account") + + +def test_get_access_token_raises_when_access_token_none(): + """``state.val.access_token = None`` must fail before the masking check.""" + fake_account = MagicMock() + fake_account.state.val.access_token = None + service = _service_with_account(fake_account) + + with pytest.raises(ValueError, match="No access_token.*none-token-account"): + service.get_access_token("none-token-account") + + +def test_get_access_token_returns_unmasked_token(): + """Happy path: a >=20-char access token is returned verbatim.""" + fake_account = MagicMock() + unmasked = "u" * 32 + fake_account.state.val.access_token = unmasked + service = _service_with_account(fake_account) + + assert service.get_access_token("happy-account") == unmasked