mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
328 lines
10 KiB
Python
328 lines
10 KiB
Python
"""Shared fixtures for Google unification integration tests."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from contextlib import asynccontextmanager
|
|
from datetime import UTC, datetime
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
|
|
|
from app.config import config as app_config
|
|
from app.db import (
|
|
Chunk,
|
|
Document,
|
|
DocumentType,
|
|
SearchSourceConnector,
|
|
SearchSourceConnectorType,
|
|
SearchSpace,
|
|
User,
|
|
)
|
|
|
|
EMBEDDING_DIM = app_config.embedding_model_instance.dimension
|
|
DUMMY_EMBEDDING = [0.1] * EMBEDDING_DIM
|
|
|
|
|
|
def make_document(
|
|
*,
|
|
title: str,
|
|
document_type: DocumentType,
|
|
content: str,
|
|
search_space_id: int,
|
|
created_by_id: str,
|
|
) -> Document:
|
|
"""Build a Document instance with unique hashes and a dummy embedding."""
|
|
uid = uuid.uuid4().hex[:12]
|
|
return Document(
|
|
title=title,
|
|
document_type=document_type,
|
|
content=content,
|
|
content_hash=f"content-{uid}",
|
|
unique_identifier_hash=f"uid-{uid}",
|
|
source_markdown=content,
|
|
search_space_id=search_space_id,
|
|
created_by_id=created_by_id,
|
|
embedding=DUMMY_EMBEDDING,
|
|
updated_at=datetime.now(UTC),
|
|
status={"state": "ready"},
|
|
)
|
|
|
|
|
|
def make_chunk(*, content: str, document_id: int) -> Chunk:
|
|
return Chunk(
|
|
content=content,
|
|
document_id=document_id,
|
|
embedding=DUMMY_EMBEDDING,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Savepoint-based fixture (used by retriever tests that receive db_session)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def seed_google_docs(
|
|
db_session: AsyncSession, db_user: "User", db_search_space: "SearchSpace"
|
|
):
|
|
"""Insert a native Drive doc, a legacy Composio Drive doc, and a FILE doc.
|
|
|
|
Returns a dict with keys ``native_doc``, ``legacy_doc``, ``file_doc``,
|
|
plus ``search_space`` and ``user``.
|
|
"""
|
|
user_id = str(db_user.id)
|
|
space_id = db_search_space.id
|
|
|
|
native_doc = make_document(
|
|
title="Native Drive Document",
|
|
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
|
content="quarterly report from native google drive connector",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
legacy_doc = make_document(
|
|
title="Legacy Composio Drive Document",
|
|
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
|
content="quarterly report from composio google drive connector",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
file_doc = make_document(
|
|
title="Uploaded PDF",
|
|
document_type=DocumentType.FILE,
|
|
content="unrelated uploaded file about quarterly reports",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
|
|
db_session.add_all([native_doc, legacy_doc, file_doc])
|
|
await db_session.flush()
|
|
|
|
native_chunk = make_chunk(
|
|
content="quarterly report from native google drive connector",
|
|
document_id=native_doc.id,
|
|
)
|
|
legacy_chunk = make_chunk(
|
|
content="quarterly report from composio google drive connector",
|
|
document_id=legacy_doc.id,
|
|
)
|
|
file_chunk = make_chunk(
|
|
content="unrelated uploaded file about quarterly reports",
|
|
document_id=file_doc.id,
|
|
)
|
|
|
|
db_session.add_all([native_chunk, legacy_chunk, file_chunk])
|
|
await db_session.flush()
|
|
|
|
return {
|
|
"native_doc": native_doc,
|
|
"legacy_doc": legacy_doc,
|
|
"file_doc": file_doc,
|
|
"search_space": db_search_space,
|
|
"user": db_user,
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Committed-data fixture (used by service / browse tests that create their
|
|
# own sessions internally and therefore cannot see savepoint-scoped data)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def committed_google_data(async_engine):
|
|
"""Insert native, legacy, and FILE docs via a committed transaction.
|
|
|
|
Yields ``{"search_space_id": int, "user_id": str}``.
|
|
Cleans up by deleting the search space (cascades to documents / chunks).
|
|
"""
|
|
space_id = None
|
|
|
|
async with async_engine.begin() as conn:
|
|
session = AsyncSession(bind=conn, expire_on_commit=False)
|
|
|
|
user = User(
|
|
id=uuid.uuid4(),
|
|
email=f"google-test-{uuid.uuid4().hex[:6]}@surfsense.net",
|
|
hashed_password="hashed",
|
|
is_active=True,
|
|
is_superuser=False,
|
|
is_verified=True,
|
|
)
|
|
session.add(user)
|
|
await session.flush()
|
|
|
|
space = SearchSpace(name=f"Google Test {uuid.uuid4().hex[:6]}", user_id=user.id)
|
|
session.add(space)
|
|
await session.flush()
|
|
space_id = space.id
|
|
user_id = str(user.id)
|
|
|
|
native_doc = make_document(
|
|
title="Native Drive Doc",
|
|
document_type=DocumentType.GOOGLE_DRIVE_FILE,
|
|
content="quarterly budget from native google drive",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
legacy_doc = make_document(
|
|
title="Legacy Composio Drive Doc",
|
|
document_type=DocumentType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
|
content="quarterly budget from composio google drive",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
file_doc = make_document(
|
|
title="Plain File",
|
|
document_type=DocumentType.FILE,
|
|
content="quarterly budget uploaded as file",
|
|
search_space_id=space_id,
|
|
created_by_id=user_id,
|
|
)
|
|
session.add_all([native_doc, legacy_doc, file_doc])
|
|
await session.flush()
|
|
|
|
for doc in [native_doc, legacy_doc, file_doc]:
|
|
session.add(
|
|
Chunk(
|
|
content=doc.content,
|
|
document_id=doc.id,
|
|
embedding=DUMMY_EMBEDDING,
|
|
)
|
|
)
|
|
await session.flush()
|
|
|
|
yield {"search_space_id": space_id, "user_id": user_id}
|
|
|
|
async with async_engine.begin() as conn:
|
|
await conn.execute(
|
|
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Monkeypatch fixtures for system boundaries
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_session_factory(async_engine, monkeypatch):
|
|
"""Replace ``async_session_maker`` in connector_service with one bound to the test engine."""
|
|
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
|
monkeypatch.setattr(
|
|
"app.services.connector_service.async_session_maker", test_maker
|
|
)
|
|
return test_maker
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_embed(monkeypatch):
|
|
"""Mock the embedding model (system boundary) to return a fixed vector."""
|
|
mock = MagicMock(return_value=DUMMY_EMBEDDING)
|
|
monkeypatch.setattr("app.config.config.embedding_model_instance.embed", mock)
|
|
return mock
|
|
|
|
|
|
@pytest.fixture
|
|
def patched_shielded_session(async_engine, monkeypatch):
|
|
"""Replace ``shielded_async_session`` in the knowledge_base module
|
|
with one that yields sessions from the test engine."""
|
|
test_maker = async_sessionmaker(async_engine, expire_on_commit=False)
|
|
|
|
@asynccontextmanager
|
|
async def _test_shielded():
|
|
async with test_maker() as session:
|
|
yield session
|
|
|
|
monkeypatch.setattr(
|
|
"app.agents.new_chat.tools.knowledge_base.shielded_async_session",
|
|
_test_shielded,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Indexer test helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def make_session_factory(async_engine):
|
|
"""Create a session factory bound to the test engine."""
|
|
return async_sessionmaker(async_engine, expire_on_commit=False)
|
|
|
|
|
|
def mock_task_logger():
|
|
"""Return a fully-mocked TaskLoggingService with async methods."""
|
|
from unittest.mock import AsyncMock, MagicMock
|
|
|
|
mock = AsyncMock()
|
|
mock.log_task_start = AsyncMock(return_value=MagicMock())
|
|
mock.log_task_progress = AsyncMock()
|
|
mock.log_task_failure = AsyncMock()
|
|
mock.log_task_success = AsyncMock()
|
|
return mock
|
|
|
|
|
|
async def seed_connector(
|
|
async_engine,
|
|
*,
|
|
connector_type: "SearchSourceConnectorType",
|
|
config: dict,
|
|
name_prefix: str = "test",
|
|
):
|
|
"""Seed a connector with committed data. Returns dict and cleanup function.
|
|
|
|
Yields ``{"connector_id", "search_space_id", "user_id"}``.
|
|
"""
|
|
space_id = None
|
|
|
|
async with async_engine.begin() as conn:
|
|
session = AsyncSession(bind=conn, expire_on_commit=False)
|
|
|
|
user = User(
|
|
id=uuid.uuid4(),
|
|
email=f"{name_prefix}-{uuid.uuid4().hex[:6]}@surfsense.net",
|
|
hashed_password="hashed",
|
|
is_active=True,
|
|
is_superuser=False,
|
|
is_verified=True,
|
|
)
|
|
session.add(user)
|
|
await session.flush()
|
|
|
|
space = SearchSpace(
|
|
name=f"{name_prefix} {uuid.uuid4().hex[:6]}", user_id=user.id
|
|
)
|
|
session.add(space)
|
|
await session.flush()
|
|
space_id = space.id
|
|
|
|
connector = SearchSourceConnector(
|
|
name=f"{name_prefix} connector",
|
|
connector_type=connector_type,
|
|
is_indexable=True,
|
|
config=config,
|
|
search_space_id=space_id,
|
|
user_id=user.id,
|
|
)
|
|
session.add(connector)
|
|
await session.flush()
|
|
connector_id = connector.id
|
|
user_id = str(user.id)
|
|
|
|
return {
|
|
"connector_id": connector_id,
|
|
"search_space_id": space_id,
|
|
"user_id": user_id,
|
|
}
|
|
|
|
|
|
async def cleanup_space(async_engine, space_id: int):
|
|
"""Delete a search space (cascades to connectors/documents)."""
|
|
async with async_engine.begin() as conn:
|
|
await conn.execute(
|
|
text("DELETE FROM searchspaces WHERE id = :sid"), {"sid": space_id}
|
|
)
|