mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
feat: add integration and unit tests for Google unification connectors
- Introduced comprehensive integration tests for Google Drive, Gmail, and Calendar indexers, ensuring proper credential handling for both Composio and native connectors. - Added unit tests to validate the acceptance of Composio-sourced credentials across various connector types. - Implemented fixtures to seed test data and facilitate testing of hybrid search functionality, ensuring accurate document type filtering.
This commit is contained in:
parent
83152e8e7e
commit
36f4709225
12 changed files with 1310 additions and 0 deletions
|
|
@ -0,0 +1,328 @@
|
|||
"""Shared fixtures for Google unification integration tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import UTC, datetime
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from app.config import config as app_config
|
||||
from app.db import (
|
||||
Chunk,
|
||||
Document,
|
||||
DocumentType,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
User,
|
||||
)
|
||||
|
||||
EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
||||
DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM
|
||||
|
||||
|
||||
def make_document(
|
||||
*,
|
||||
title: str,
|
||||
document_type: DocumentType,
|
||||
content: str,
|
||||
search_space_id: int,
|
||||
created_by_id: str,
|
||||
) -> Document:
|
||||
"""Build a Document instance with unique hashes and a dummy embedding."""
|
||||
uid = uuid.uuid4().hex[:12]
|
||||
return Document(
|
||||
title=title,
|
||||
document_type=document_type,
|
||||
content=content,
|
||||
content_hash=f"content-{uid}",
|
||||
unique_identifier_hash=f"uid-{uid}",
|
||||
source_markdown=content,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=created_by_id,
|
||||
embedding=DUMMY_EMBEDDING,
|
||||
updated_at=datetime.now(UTC),
|
||||
status={"state": "ready"},
|
||||
)
|
||||
|
||||
|
||||
def make_chunk(*, content: str, document_id: int) -> Chunk:
|
||||
return Chunk(
|
||||
content=content,
|
||||
document_id=document_id,
|
||||
embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Savepoint-based fixture (used by retriever tests that receive db_session)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def seed_google_docs(
|
||||
db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace"
|
||||
):
|
||||
"""Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc.
|
||||
|
||||
Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``,
|
||||
plus ``search_space`` and ``user``.
|
||||
"""
|
||||
user_id = str(db_user.id)
|
||||
space_id = db_search_space.id
|
||||
|
||||
native_doc = make_document(
|
||||
title="Native Drive Document",
|
||||
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
||||
content="quarterly report from native google drive connector",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
legacy_doc = make_document(
|
||||
title="Legacy Composio Drive Document",
|
||||
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
content="quarterly report from composio google drive connector",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
file_doc = make_document(
|
||||
title="Uploaded PDF",
|
||||
document_type=DocumentType.FILE,
|
||||
content="unrelated uploaded file about quarterly reports",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
|
||||
db_session.add_all([native_doc, legacy_doc, file_doc])
|
||||
await db_session.flush()
|
||||
|
||||
native_chunk = make_chunk(
|
||||
content="quarterly report from native google drive connector",
|
||||
document_id=native_doc.id,
|
||||
)
|
||||
legacy_chunk = make_chunk(
|
||||
content="quarterly report from composio google drive connector",
|
||||
document_id=legacy_doc.id,
|
||||
)
|
||||
file_chunk = make_chunk(
|
||||
content="unrelated uploaded file about quarterly reports",
|
||||
document_id=file_doc.id,
|
||||
)
|
||||
|
||||
db_session.add_all([native_chunk, legacy_chunk, file_chunk])
|
||||
await db_session.flush()
|
||||
|
||||
return {
|
||||
"native_doc": native_doc,
|
||||
"legacy_doc": legacy_doc,
|
||||
"file_doc": file_doc,
|
||||
"search_space": db_search_space,
|
||||
"user": db_user,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Committed-data fixture (used by service / browse tests that create their
|
||||
# own sessions internally and therefore cannot see savepoint-scoped data)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def committed_google_data(async_engine):
|
||||
"""Insert native, legacy, and FILE docs via a committed transaction.
|
||||
|
||||
Yields ``{"search_space_id": int, "user_id": str}``.
|
||||
Cleans up by deleting the search space (cascades to documents / chunks).
|
||||
"""
|
||||
space_id = None
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
session = AsyncSession(bind=conn, expire_on_commit=False)
|
||||
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net",
|
||||
hashed_password="hashed",
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
is_verified=True,
|
||||
)
|
||||
session.add(user)
|
||||
await session.flush()
|
||||
|
||||
space = SearchSpace(
|
||||
name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id
|
||||
)
|
||||
session.add(space)
|
||||
await session.flush()
|
||||
space_id = space.id
|
||||
user_id = str(user.id)
|
||||
|
||||
native_doc = make_document(
|
||||
title="Native Drive Doc",
|
||||
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
||||
content="quarterly budget from native google drive",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
legacy_doc = make_document(
|
||||
title="Legacy Composio Drive Doc",
|
||||
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
content="quarterly budget from composio google drive",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
file_doc = make_document(
|
||||
title="Plain File",
|
||||
document_type=DocumentType.FILE,
|
||||
content="quarterly budget uploaded as file",
|
||||
search_space_id=space_id,
|
||||
created_by_id=user_id,
|
||||
)
|
||||
session.add_all([native_doc, legacy_doc, file_doc])
|
||||
await session.flush()
|
||||
|
||||
for doc in [native_doc, legacy_doc, file_doc]:
|
||||
session.add(
|
||||
Chunk(
|
||||
content=doc.content,
|
||||
document_id=doc.id,
|
||||
embedding=DUMMY_EMBEDDING,
|
||||
)
|
||||
)
|
||||
await session.flush()
|
||||
|
||||
yield {"search_space_id": space_id, "user_id": user_id}
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
await conn.execute(
|
||||
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Monkeypatch fixtures for system boundaries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_session_factory(async_engine, monkeypatch):
|
||||
"""Replace ``async_session_maker`` in connector_service with one bound to the test engine."""
|
||||
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
||||
monkeypatch.setattr("app.services.connector_service.async_session_maker", test_maker)
|
||||
return test_maker
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_embed(monkeypatch):
|
||||
"""Mock the embedding model (system boundary) to return a fixed vector."""
|
||||
mock = MagicMock(return_value=DUMMY_EMBEDDING)
|
||||
monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock)
|
||||
return mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_shielded_session(async_engine, monkeypatch):
|
||||
"""Replace ``shielded_async_session`` in the knowledge_base module
|
||||
with one that yields sessions from the test engine."""
|
||||
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
||||
|
||||
@asynccontextmanager
|
||||
async def _test_shielded():
|
||||
async with test_maker() as session:
|
||||
yield session
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.agents.new_chat.tools.knowledge_base.shielded_async_session",
|
||||
_test_shielded,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Indexer test helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_session_factory(async_engine):
|
||||
"""Create a session factory bound to the test engine."""
|
||||
return async_sessionmaker(async_engine, expire_on_commit=False)
|
||||
|
||||
|
||||
def mock_task_logger():
|
||||
"""Return a fully-mocked TaskLoggingService with async methods."""
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
mock = AsyncMock()
|
||||
mock.log_task_start = AsyncMock(return_value=MagicMock())
|
||||
mock.log_task_progress = AsyncMock()
|
||||
mock.log_task_failure = AsyncMock()
|
||||
mock.log_task_success = AsyncMock()
|
||||
return mock
|
||||
|
||||
|
||||
async def seed_connector(
|
||||
async_engine,
|
||||
*,
|
||||
connector_type: "SearchSourceConnectorType",
|
||||
config: dict,
|
||||
name_prefix: str = "test",
|
||||
):
|
||||
"""Seed a connector with committed data. Returns dict and cleanup function.
|
||||
|
||||
Yields ``{"connector_id", "search_space_id", "user_id"}``.
|
||||
"""
|
||||
space_id = None
|
||||
|
||||
async with async_engine.begin() as conn:
|
||||
session = AsyncSession(bind=conn, expire_on_commit=False)
|
||||
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net",
|
||||
hashed_password="hashed",
|
||||
is_active=True,
|
||||
is_superuser=False,
|
||||
is_verified=True,
|
||||
)
|
||||
session.add(user)
|
||||
await session.flush()
|
||||
|
||||
space = SearchSpace(
|
||||
name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id
|
||||
)
|
||||
session.add(space)
|
||||
await session.flush()
|
||||
space_id = space.id
|
||||
|
||||
connector = SearchSourceConnector(
|
||||
name=f"{name_prefix} connector",
|
||||
connector_type=connector_type,
|
||||
is_indexable=True,
|
||||
config=config,
|
||||
search_space_id=space_id,
|
||||
user_id=user.id,
|
||||
)
|
||||
session.add(connector)
|
||||
await session.flush()
|
||||
connector_id = connector.id
|
||||
user_id = str(user.id)
|
||||
|
||||
return {
|
||||
"connector_id": connector_id,
|
||||
"search_space_id": space_id,
|
||||
"user_id": user_id,
|
||||
}
|
||||
|
||||
|
||||
async def cleanup_space(async_engine, space_id: int):
|
||||
"""Delete a search space (cascades to connectors/documents)."""
|
||||
async with async_engine.begin() as conn:
|
||||
await conn.execute(
|
||||
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue