Merge pull request #1390 from AnishSarkar22/fix/backend-tests

fix: unit and integration tests
This commit is contained in:
Rohan Verma 2026-05-17 18:15:53 -07:00 committed by GitHub
commit 3c27fe688a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 197 additions and 82 deletions

View file

@ -350,6 +350,25 @@ def _drive_list_files(args: dict[str, Any]) -> dict[str, Any]:
folder id and serve the matching fixture list.
"""
q = args.get("q", "")
if "in owners" in q:
return {
"data": {
"files": [
{
"id": "fake-file-owner-probe",
"name": "owner-probe",
"owners": [
{
"me": True,
"emailAddress": "e2e-fake@surfsense.example",
}
],
}
],
"nextPageToken": None,
}
}
folder_id = "root"
if "in parents" in q:
# q looks like: '<folder_id>' in parents and trashed = false ...

View file

@ -1,13 +1,11 @@
"""Composio route integration fixtures.
The sys.modules hijack happens at module import time, before importing
app.app, so production `from composio import Composio` bindings resolve to
the strict E2E fake in this pytest process too.
The `composio` sys.modules hijack lives in the parent integration conftest
so it runs before any sibling suite imports `app.routes`.
"""
from __future__ import annotations
import sys
from collections.abc import AsyncGenerator
import httpx
@ -16,19 +14,15 @@ import pytest_asyncio
from httpx import ASGITransport
from sqlalchemy.ext.asyncio import AsyncSession
from tests.e2e.fakes import composio_module as _fake_composio
sys.modules["composio"] = _fake_composio
from app.app import app, limiter # noqa: E402
from app.config import config # noqa: E402
from app.db import ( # noqa: E402
from app.app import app, limiter
from app.config import config
from app.db import (
SearchSourceConnector,
SearchSourceConnectorType,
User,
get_async_session,
)
from app.users import current_active_user # noqa: E402
from app.users import current_active_user
pytestmark = pytest.mark.integration

View file

@ -1,3 +1,5 @@
import importlib
import sys
import uuid
from unittest.mock import AsyncMock, MagicMock
@ -7,17 +9,27 @@ from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.pool import NullPool
from app.config import config as app_config
from app.db import (
Base,
DocumentType,
SearchSourceConnector,
SearchSourceConnectorType,
SearchSpace,
User,
)
from app.indexing_pipeline.connector_document import ConnectorDocument
from tests.conftest import TEST_DATABASE_URL
# Hijack `composio` before any `from app.*` import; the `from composio import
# Composio` in app.services.composio_service binds once at first import.
from tests.e2e.fakes import composio_module as _fake_composio
sys.modules["composio"] = _fake_composio
app_config = importlib.import_module("app.config").config
app_db = importlib.import_module("app.db")
Base = app_db.Base
DocumentType = app_db.DocumentType
SearchSourceConnector = app_db.SearchSourceConnector
SearchSourceConnectorType = app_db.SearchSourceConnectorType
SearchSpace = app_db.SearchSpace
User = app_db.User
ConnectorDocument = importlib.import_module(
"app.indexing_pipeline.connector_document"
).ConnectorDocument
create_default_roles_and_membership = importlib.import_module(
"app.routes.search_spaces_routes"
).create_default_roles_and_membership
TEST_DATABASE_URL = importlib.import_module("tests.conftest").TEST_DATABASE_URL
_EMBEDDING_DIM = app_config.embedding_model_instance.dimension
@ -105,6 +117,9 @@ async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpac
)
db_session.add(space)
await db_session.flush()
# Mirror POST /searchspaces so routes guarded by check_permission find a membership.
await create_default_roles_and_membership(db_session, space.id, db_user.id)
await db_session.flush()
return space
@ -145,6 +160,10 @@ def patched_chunk_text(monkeypatch) -> MagicMock:
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
mock,
)
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
mock,
)
return mock

View file

@ -204,6 +204,7 @@ class TestStripeCheckoutSessionCreation:
assert (
fake_client.last_params["success_url"]
== f"http://localhost:3000/dashboard/{search_space_id}/purchase-success"
"?session_id={CHECKOUT_SESSION_ID}"
)
assert (
fake_client.last_params["cancel_url"]

View file

@ -7,7 +7,7 @@ mocked at their system boundaries.
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import ANY, AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-calendar-test-789"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_calendar_indexer"
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
@pytest_asyncio.fixture
@ -69,32 +70,29 @@ async def native_calendar(async_engine):
await cleanup_space(async_engine, data["search_space_id"])
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_calendar_uses_composio_credentials(
mock_build_creds,
@patch(f"{_INDEXER_MODULE}.ComposioService")
async def test_composio_calendar_uses_composio_service(
mock_composio_service_cls,
mock_cal_cls,
mock_tl_cls,
mock_get_access_token,
async_engine,
composio_calendar,
):
"""Calendar indexer calls build_composio_credentials for a Composio connector."""
"""Calendar indexer uses Composio tools directly for a Composio connector."""
from app.tasks.connector_indexers.google_calendar_indexer import (
index_google_calendar_events,
)
data = composio_calendar
mock_creds = MagicMock(name="composio-creds")
mock_build_creds.return_value = mock_creds
mock_composio_service = MagicMock()
mock_composio_service.get_calendar_events = AsyncMock(return_value=([], None))
mock_composio_service_cls.return_value = mock_composio_service
mock_tl_cls.return_value = mock_task_logger()
mock_cal_instance = MagicMock()
mock_cal_instance.get_all_primary_calendar_events = AsyncMock(
return_value=([], None)
)
mock_cal_cls.return_value = mock_cal_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_calendar_events(
@ -104,17 +102,25 @@ async def test_composio_calendar_uses_composio_credentials(
user_id=data["user_id"],
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_cal_cls.assert_called_once()
_, kwargs = mock_cal_cls.call_args
assert kwargs.get("credentials") is mock_creds
mock_composio_service_cls.assert_called_once()
mock_composio_service.get_calendar_events.assert_called_once_with(
connected_account_id=_COMPOSIO_ACCOUNT_ID,
entity_id=f"surfsense_{data['user_id']}",
time_min=ANY,
time_max=ANY,
max_results=250,
)
mock_cal_cls.assert_not_called()
mock_get_access_token.assert_not_called()
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
@patch(f"{_INDEXER_MODULE}.ComposioService")
async def test_composio_calendar_without_account_id_returns_error(
mock_build_creds,
mock_composio_service_cls,
mock_tl_cls,
mock_get_access_token,
async_engine,
composio_calendar_no_id,
):
@ -138,20 +144,23 @@ async def test_composio_calendar_without_account_id_returns_error(
assert count == 0
assert error is not None
assert "composio" in error.lower()
mock_build_creds.assert_not_called()
mock_composio_service_cls.assert_not_called()
mock_get_access_token.assert_not_called()
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.ComposioService")
@patch(f"{_INDEXER_MODULE}.GoogleCalendarConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_calendar_does_not_use_composio_credentials(
mock_build_creds,
async def test_native_calendar_uses_google_calendar_connector(
mock_cal_cls,
mock_composio_service_cls,
mock_tl_cls,
mock_get_access_token,
async_engine,
native_calendar,
):
"""Calendar indexer does NOT call build_composio_credentials for a native connector."""
"""Native Calendar connector uses GoogleCalendarConnector with no Composio path."""
from app.tasks.connector_indexers.google_calendar_indexer import (
index_google_calendar_events,
)
@ -174,4 +183,6 @@ async def test_native_calendar_does_not_use_composio_credentials(
user_id=data["user_id"],
)
mock_build_creds.assert_not_called()
mock_cal_cls.assert_called_once()
mock_composio_service_cls.assert_not_called()
mock_get_access_token.assert_not_called()

View file

@ -7,7 +7,7 @@ mocked at their system boundaries.
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
from unittest.mock import ANY, AsyncMock, MagicMock, patch
import pytest
import pytest_asyncio
@ -25,6 +25,7 @@ pytestmark = pytest.mark.integration
_COMPOSIO_ACCOUNT_ID = "composio-gmail-test-456"
_INDEXER_MODULE = "app.tasks.connector_indexers.google_gmail_indexer"
_GET_ACCESS_TOKEN = "app.services.composio_service.ComposioService.get_access_token"
@pytest_asyncio.fixture
@ -69,30 +70,32 @@ async def native_gmail(async_engine):
await cleanup_space(async_engine, data["search_space_id"])
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_composio_gmail_uses_composio_credentials(
mock_build_creds,
@patch(f"{_INDEXER_MODULE}.ComposioService")
async def test_composio_gmail_uses_composio_service(
mock_composio_service_cls,
mock_gmail_cls,
mock_tl_cls,
mock_get_access_token,
async_engine,
composio_gmail,
):
"""Gmail indexer calls build_composio_credentials for a Composio connector."""
"""Gmail indexer uses Composio tools directly for a Composio connector."""
from app.tasks.connector_indexers.google_gmail_indexer import (
index_google_gmail_messages,
)
data = composio_gmail
mock_creds = MagicMock(name="composio-creds")
mock_build_creds.return_value = mock_creds
mock_composio_service = MagicMock()
mock_composio_service.get_gmail_messages = AsyncMock(
return_value=([], None, None, None)
)
mock_composio_service.get_gmail_message_detail = AsyncMock(return_value=({}, None))
mock_composio_service_cls.return_value = mock_composio_service
mock_tl_cls.return_value = mock_task_logger()
mock_gmail_instance = MagicMock()
mock_gmail_instance.get_recent_messages = AsyncMock(return_value=([], None))
mock_gmail_cls.return_value = mock_gmail_instance
maker = make_session_factory(async_engine)
async with maker() as session:
await index_google_gmail_messages(
@ -102,17 +105,25 @@ async def test_composio_gmail_uses_composio_credentials(
user_id=data["user_id"],
)
mock_build_creds.assert_called_once_with(_COMPOSIO_ACCOUNT_ID)
mock_gmail_cls.assert_called_once()
args, _ = mock_gmail_cls.call_args
assert args[0] is mock_creds
mock_composio_service_cls.assert_called_once()
mock_composio_service.get_gmail_messages.assert_called_once_with(
connected_account_id=_COMPOSIO_ACCOUNT_ID,
entity_id=f"surfsense_{data['user_id']}",
query=ANY,
max_results=ANY,
page_token=None,
)
mock_gmail_cls.assert_not_called()
mock_get_access_token.assert_not_called()
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
@patch(f"{_INDEXER_MODULE}.ComposioService")
async def test_composio_gmail_without_account_id_returns_error(
mock_build_creds,
mock_composio_service_cls,
mock_tl_cls,
mock_get_access_token,
async_engine,
composio_gmail_no_id,
):
@ -136,20 +147,23 @@ async def test_composio_gmail_without_account_id_returns_error(
assert count == 0
assert error is not None
assert "composio" in error.lower()
mock_build_creds.assert_not_called()
mock_composio_service_cls.assert_not_called()
mock_get_access_token.assert_not_called()
@patch(_GET_ACCESS_TOKEN)
@patch(f"{_INDEXER_MODULE}.TaskLoggingService")
@patch(f"{_INDEXER_MODULE}.ComposioService")
@patch(f"{_INDEXER_MODULE}.GoogleGmailConnector")
@patch(f"{_INDEXER_MODULE}.build_composio_credentials")
async def test_native_gmail_does_not_use_composio_credentials(
mock_build_creds,
async def test_native_gmail_uses_google_gmail_connector(
mock_gmail_cls,
mock_composio_service_cls,
mock_tl_cls,
mock_get_access_token,
async_engine,
native_gmail,
):
"""Gmail indexer does NOT call build_composio_credentials for a native connector."""
"""Native Gmail connector uses GoogleGmailConnector with no Composio path."""
from app.tasks.connector_indexers.google_gmail_indexer import (
index_google_gmail_messages,
)
@ -170,4 +184,6 @@ async def test_native_gmail_does_not_use_composio_credentials(
user_id=data["user_id"],
)
mock_build_creds.assert_not_called()
mock_gmail_cls.assert_called_once()
mock_composio_service_cls.assert_not_called()
mock_get_access_token.assert_not_called()

View file

@ -200,7 +200,7 @@ async def test_reindex_sets_status_ready(db_session, db_search_space, db_user, m
async def test_reindex_replaces_chunks(db_session, db_search_space, db_user, mocker):
"""Reindexing replaces old chunks with new content rather than appending."""
mocker.patch(
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
side_effect=[["Original chunk."], ["Updated chunk."]],
)

View file

@ -37,7 +37,12 @@ def _make_orm_doc(connector_doc, doc_id):
async def test_index_calls_embed_and_chunk_via_to_thread(
pipeline, make_connector_document, monkeypatch
):
"""index() runs embed_texts and the chunker via asyncio.to_thread, not blocking the loop."""
"""index() runs the chunker and embed_texts via asyncio.to_thread, not blocking the loop.
Routing between ``chunk_text`` (code path) and ``chunk_text_hybrid`` (default
path, see issue #1334) is verified separately in
``test_non_code_documents_use_hybrid_chunker``.
"""
to_thread_calls = []
original_to_thread = asyncio.to_thread
@ -51,12 +56,6 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
AsyncMock(return_value="Summary."),
)
mock_chunk = MagicMock(return_value=["chunk1"])
mock_chunk.__name__ = "chunk_text"
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
mock_chunk,
)
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
monkeypatch.setattr(
@ -71,6 +70,11 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
mock_embed,
)
# Bypass set_committed_value, which requires a real ORM instance (not MagicMock).
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.attach_chunks_to_document",
MagicMock(),
)
connector_doc = make_connector_document(
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
@ -83,11 +87,62 @@ async def test_index_calls_embed_and_chunk_via_to_thread(
await pipeline.index(document, connector_doc, llm=MagicMock())
# Non-code documents now route through the table-aware hybrid chunker
# (see commit 2f3a33c9). Either chunker entry point satisfies the
# "chunking runs off the event loop" contract this test guards.
# Either chunker entry point satisfies the "chunking runs off the event
# loop" contract this test guards. Routing between the two is verified
# in test_non_code_documents_use_hybrid_chunker.
assert {"chunk_text", "chunk_text_hybrid"} & set(to_thread_calls)
assert "embed_texts" in to_thread_calls
assert document.status == DocumentStatus.ready()
async def test_non_code_documents_use_hybrid_chunker(
pipeline, make_connector_document, monkeypatch
):
"""Non-code documents route through ``chunk_text_hybrid`` (issue #1334).
The hybrid chunker preserves Markdown table integrity by avoiding splits
mid-row. Only documents flagged with ``should_use_code_chunker=True``
should take the ``chunk_text`` path.
"""
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.summarize_document",
AsyncMock(return_value="Summary."),
)
mock_chunk_hybrid = MagicMock(return_value=["chunk1"])
mock_chunk_hybrid.__name__ = "chunk_text_hybrid"
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.chunk_text_hybrid",
mock_chunk_hybrid,
)
mock_chunk_code = MagicMock(return_value=["chunk1"])
mock_chunk_code.__name__ = "chunk_text"
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.chunk_text",
mock_chunk_code,
)
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.embed_texts",
MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]),
)
monkeypatch.setattr(
"app.indexing_pipeline.indexing_pipeline_service.attach_chunks_to_document",
MagicMock(),
)
connector_doc = make_connector_document(
document_type=DocumentType.GOOGLE_GMAIL_CONNECTOR,
unique_id="msg-1",
search_space_id=1,
should_use_code_chunker=False,
)
document = MagicMock(spec=Document)
document.id = 1
document.status = DocumentStatus.pending()
await pipeline.index(document, connector_doc, llm=MagicMock())
mock_chunk_hybrid.assert_called_once()
mock_chunk_code.assert_not_called()
def _mock_session_factory(orm_docs_by_id):