mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 17:26:23 +02:00
- Introduced comprehensive integration tests for Google Drive, Gmail, and Calendar indexers, ensuring proper credential handling for both Composio and native connectors. - Added unit tests to validate the acceptance of Composio-sourced credentials across various connector types. - Implemented fixtures to seed test data and facilitate testing of hybrid search functionality, ensuring accurate document type filtering.
328 lines
10 KiB
Python
328 lines
10 KiB
Python
"""Shared fixtures for Google unification integration tests."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from contextlib import asynccontextmanager
|
|
from datetime import UTC, datetime
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
|
|
from app.config import config as app_config
|
|
from app.db import (
|
|
Chunk,
|
|
Document,
|
|
DocumentType,
|
|
SearchSourceConnector,
|
|
SearchSourceConnectorType,
|
|
SearchSpace,
|
|
User,
|
|
)
|
|
|
|
EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
|
DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM
|
|
|
|
|
|
def make_document(
|
|
*,
|
|
title: str,
|
|
document_type: DocumentType,
|
|
content: str,
|
|
search_space_id: int,
|
|
created_by_id: str,
|
|
) -> Document:
|
|
"""Build a Document instance with unique hashes and a dummy embedding."""
|
|
uid = uuid.uuid4().hex[:12]
|
|
return Document(
|
|
title=title,
|
|
document_type=document_type,
|
|
content=content,
|
|
content_hash=f"content-{uid}",
|
|
unique_identifier_hash=f"uid-{uid}",
|
|
source_markdown=content,
|
|
search_space_id=search_space_id,
|
|
created_by_id=created_by_id,
|
|
embedding=DUMMY_EMBEDDING,
|
|
updated_at=datetime.now(UTC),
|
|
status={"state": "ready"},
|
|
)
|
|
|
|
|
|
def make_chunk(*, content: str, document_id: int) -> Chunk:
|
|
return Chunk(
|
|
content=content,
|
|
document_id=document_id,
|
|
embedding=DUMMY_EMBEDDING,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Savepoint-based fixture (used by retriever tests that receive db_session)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def seed_google_docs(
|
|
db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace"
|
|
):
|
|
"""Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc.
|
|
|
|
Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``,
|
|
plus ``search_space`` and ``user``.
|
|
"""
|
|
user_id = str(db_user.id)
|
|
space_id = db_search_space.id
|
|
|
|
native_doc = make_document(
|
|
title="Native Drive Document",
|
|
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
|
content="quarterly report from native google drive connector",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
legacy_doc = make_document(
|
|
title="Legacy Composio Drive Document",
|
|
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
|
content="quarterly report from composio google drive connector",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
file_doc = make_document(
|
|
title="Uploaded PDF",
|
|
document_type=DocumentType.FILE,
|
|
content="unrelated uploaded file about quarterly reports",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
|
|
db_session.add_all([native_doc, legacy_doc, file_doc])
|
|
await db_session.flush()
|
|
|
|
native_chunk = make_chunk(
|
|
content="quarterly report from native google drive connector",
|
|
document_id=native_doc.id,
|
|
)
|
|
legacy_chunk = make_chunk(
|
|
content="quarterly report from composio google drive connector",
|
|
document_id=legacy_doc.id,
|
|
)
|
|
file_chunk = make_chunk(
|
|
content="unrelated uploaded file about quarterly reports",
|
|
document_id=file_doc.id,
|
|
)
|
|
|
|
db_session.add_all([native_chunk, legacy_chunk, file_chunk])
|
|
await db_session.flush()
|
|
|
|
return {
|
|
"native_doc": native_doc,
|
|
"legacy_doc": legacy_doc,
|
|
"file_doc": file_doc,
|
|
"search_space": db_search_space,
|
|
"user": db_user,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Committed-data fixture (used by service / browse tests that create their
|
|
# own sessions internally and therefore cannot see savepoint-scoped data)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def committed_google_data(async_engine):
|
|
"""Insert native, legacy, and FILE docs via a committed transaction.
|
|
|
|
Yields ``{"search_space_id": int, "user_id": str}``.
|
|
Cleans up by deleting the search space (cascades to documents / chunks).
|
|
"""
|
|
space_id = None
|
|
|
|
async with async_engine.begin() as conn:
|
|
session = AsyncSession(bind=conn, expire_on_commit=False)
|
|
|
|
user = User(
|
|
id=uuid.uuid4(),
|
|
email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net",
|
|
hashed_password="hashed",
|
|
is_active=True,
|
|
is_superuser=False,
|
|
is_verified=True,
|
|
)
|
|
session.add(user)
|
|
await session.flush()
|
|
|
|
space = SearchSpace(
|
|
name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id
|
|
)
|
|
session.add(space)
|
|
await session.flush()
|
|
space_id = space.id
|
|
user_id = str(user.id)
|
|
|
|
native_doc = make_document(
|
|
title="Native Drive Doc",
|
|
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
|
content="quarterly budget from native google drive",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
legacy_doc = make_document(
|
|
title="Legacy Composio Drive Doc",
|
|
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
|
content="quarterly budget from composio google drive",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
file_doc = make_document(
|
|
title="Plain File",
|
|
document_type=DocumentType.FILE,
|
|
content="quarterly budget uploaded as file",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
session.add_all([native_doc, legacy_doc, file_doc])
|
|
await session.flush()
|
|
|
|
for doc in [native_doc, legacy_doc, file_doc]:
|
|
session.add(
|
|
Chunk(
|
|
content=doc.content,
|
|
document_id=doc.id,
|
|
embedding=DUMMY_EMBEDDING,
|
|
)
|
|
)
|
|
await session.flush()
|
|
|
|
yield {"search_space_id": space_id, "user_id": user_id}
|
|
|
|
async with async_engine.begin() as conn:
|
|
await conn.execute(
|
|
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Monkeypatch fixtures for system boundaries
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_session_factory(async_engine, monkeypatch):
|
|
"""Replace ``async_session_maker`` in connector_service with one bound to the test engine."""
|
|
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
|
monkeypatch.setattr("app.services.connector_service.async_session_maker", test_maker)
|
|
return test_maker
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_embed(monkeypatch):
|
|
"""Mock the embedding model (system boundary) to return a fixed vector."""
|
|
mock = MagicMock(return_value=DUMMY_EMBEDDING)
|
|
monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock)
|
|
return mock
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_shielded_session(async_engine, monkeypatch):
|
|
"""Replace ``shielded_async_session`` in the knowledge_base module
|
|
with one that yields sessions from the test engine."""
|
|
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
|
|
|
@asynccontextmanager
|
|
async def _test_shielded():
|
|
async with test_maker() as session:
|
|
yield session
|
|
|
|
monkeypatch.setattr(
|
|
"app.agents.new_chat.tools.knowledge_base.shielded_async_session",
|
|
_test_shielded,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Indexer test helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def make_session_factory(async_engine):
|
|
"""Create a session factory bound to the test engine."""
|
|
return async_sessionmaker(async_engine, expire_on_commit=False)
|
|
|
|
|
|
def mock_task_logger():
|
|
"""Return a fully-mocked TaskLoggingService with async methods."""
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
mock = AsyncMock()
|
|
mock.log_task_start = AsyncMock(return_value=MagicMock())
|
|
mock.log_task_progress = AsyncMock()
|
|
mock.log_task_failure = AsyncMock()
|
|
mock.log_task_success = AsyncMock()
|
|
return mock
|
|
|
|
|
|
async def seed_connector(
|
|
async_engine,
|
|
*,
|
|
connector_type: "SearchSourceConnectorType",
|
|
config: dict,
|
|
name_prefix: str = "test",
|
|
):
|
|
"""Seed a connector with committed data. Returns dict and cleanup function.
|
|
|
|
Yields ``{"connector_id", "search_space_id", "user_id"}``.
|
|
"""
|
|
space_id = None
|
|
|
|
async with async_engine.begin() as conn:
|
|
session = AsyncSession(bind=conn, expire_on_commit=False)
|
|
|
|
user = User(
|
|
id=uuid.uuid4(),
|
|
email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net",
|
|
hashed_password="hashed",
|
|
is_active=True,
|
|
is_superuser=False,
|
|
is_verified=True,
|
|
)
|
|
session.add(user)
|
|
await session.flush()
|
|
|
|
space = SearchSpace(
|
|
name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id
|
|
)
|
|
session.add(space)
|
|
await session.flush()
|
|
space_id = space.id
|
|
|
|
connector = SearchSourceConnector(
|
|
name=f"{name_prefix} connector",
|
|
connector_type=connector_type,
|
|
is_indexable=True,
|
|
config=config,
|
|
search_space_id=space_id,
|
|
user_id=user.id,
|
|
)
|
|
session.add(connector)
|
|
await session.flush()
|
|
connector_id = connector.id
|
|
user_id = str(user.id)
|
|
|
|
return {
|
|
"connector_id": connector_id,
|
|
"search_space_id": space_id,
|
|
"user_id": user_id,
|
|
}
|
|
|
|
|
|
async def cleanup_space(async_engine, space_id: int):
|
|
"""Delete a search space (cascades to connectors/documents)."""
|
|
async with async_engine.begin() as conn:
|
|
await conn.execute(
|
|
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
|
)
|