diff --git a/surfsense_backend/app/indexing_pipeline/connector_document.py b/surfsense_backend/app/indexing_pipeline/connector_document.py index 71e3ab700..3ae7f57c4 100644 --- a/surfsense_backend/app/indexing_pipeline/connector_document.py +++ b/surfsense_backend/app/indexing_pipeline/connector_document.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel, field_validator +from pydantic import BaseModel, Field, field_validator from app.db import DocumentType @@ -9,14 +9,14 @@ class ConnectorDocument(BaseModel): source_markdown: str unique_id: str document_type: DocumentType - search_space_id: int + search_space_id: int = Field(gt=0) should_summarize: bool = True should_use_code_chunker: bool = False metadata: dict = {} - connector_id: int | None = None - created_by_id: str | None = None + connector_id: int = Field(gt=0) + created_by_id: str - @field_validator("title", "source_markdown", "unique_id") + @field_validator("title", "source_markdown", "unique_id", "created_by_id") @classmethod def not_empty(cls, v: str, info) -> str: if not v.strip(): diff --git a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py index ed117eb4c..05bb79218 100644 --- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py +++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py @@ -116,7 +116,6 @@ class IndexingPipelineService: ) ] - document.source_markdown = connector_doc.source_markdown document.content = content document.embedding = embedding _safe_set_chunks(document, chunks) diff --git a/surfsense_backend/tests/conftest.py b/surfsense_backend/tests/conftest.py index dd12bad7e..c5d3b191b 100644 --- a/surfsense_backend/tests/conftest.py +++ b/surfsense_backend/tests/conftest.py @@ -28,6 +28,8 @@ def make_connector_document(): "unique_id": "test-id-001", "document_type": DocumentType.CLICKUP_CONNECTOR, "search_space_id": 1, + "connector_id": 1, + "created_by_id": "00000000-0000-0000-0000-000000000001", } defaults.update(overrides) return ConnectorDocument(**defaults) diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 155f8e74d..99e182c6b 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -9,8 +9,10 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.pool import NullPool -from app.db import Base, SearchSpace +from app.db import Base, SearchSpace, SearchSourceConnector, SearchSourceConnectorType from app.db import User +from app.db import DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument _EMBEDDING_DIM = 1024 # must match the Vector() dimension used in DB column creation @@ -77,6 +79,20 @@ async def db_user(db_session: AsyncSession) -> User: return user +@pytest_asyncio.fixture +async def db_connector(db_session: AsyncSession, db_user: User, db_search_space: "SearchSpace") -> SearchSourceConnector: + connector = SearchSourceConnector( + name="Test Connector", + connector_type=SearchSourceConnectorType.CLICKUP_CONNECTOR, + config={}, + search_space_id=db_search_space.id, + user_id=db_user.id, + ) + db_session.add(connector) + await db_session.flush() + return connector + + @pytest_asyncio.fixture async def db_search_space(db_session: AsyncSession, db_user: User) -> SearchSpace: space = SearchSpace( @@ -128,3 +144,21 @@ def patched_chunk_text(monkeypatch) -> MagicMock: return mock +@pytest.fixture +def make_connector_document(db_connector, db_user): + """Integration-scoped override: uses real DB connector and user IDs.""" + def _make(**overrides): + defaults = { + "title": "Test Document", + "source_markdown": "## Heading\n\nSome content.", + "unique_id": "test-id-001", + "document_type": DocumentType.CLICKUP_CONNECTOR, + "search_space_id": db_connector.search_space_id, + "connector_id": db_connector.id, + "created_by_id": str(db_user.id), + } + defaults.update(overrides) + return ConnectorDocument(**defaults) + return _make + + diff --git a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py index 2f6cbf47a..b3cce7eaa 100644 --- a/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py +++ b/surfsense_backend/tests/integration/indexing_pipeline/test_prepare_for_indexing.py @@ -180,21 +180,6 @@ async def test_updated_at_advances_when_content_changes( assert updated_at_v2 > updated_at_v1 -async def test_updated_at_is_set_on_creation( - db_session, db_search_space, make_connector_document -): - doc = make_connector_document(search_space_id=db_search_space.id) - service = IndexingPipelineService(session=db_session) - - results = await service.prepare_for_indexing([doc]) - document_id = results[0].id - - result = await db_session.execute(select(Document).filter(Document.id == document_id)) - reloaded = result.scalars().first() - - assert reloaded.updated_at is not None - - async def test_same_content_from_different_source_is_skipped( db_session, db_search_space, make_connector_document ): diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py index fe727a590..6bf953f25 100644 --- a/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py @@ -5,19 +5,45 @@ from app.db import DocumentType from app.indexing_pipeline.connector_document import ConnectorDocument -def test_valid_document_created_with_defaults(): +def test_valid_document_created_with_required_fields(): doc = ConnectorDocument( title="Task", source_markdown="## Task\n\nSome content.", unique_id="task-1", document_type=DocumentType.CLICKUP_CONNECTOR, search_space_id=1, + connector_id=42, + created_by_id="00000000-0000-0000-0000-000000000001", ) assert doc.should_summarize is True assert doc.should_use_code_chunker is False assert doc.metadata == {} - assert doc.connector_id is None - assert doc.created_by_id is None + assert doc.connector_id == 42 + assert doc.created_by_id == "00000000-0000-0000-0000-000000000001" + + +def test_omitting_connector_id_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="## Content", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + created_by_id="00000000-0000-0000-0000-000000000001", + ) + + +def test_omitting_created_by_id_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="## Content", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + connector_id=42, + ) def test_empty_source_markdown_raises(): @@ -42,30 +68,6 @@ def test_whitespace_only_source_markdown_raises(): ) -def test_should_summarize_false_accepted(): - doc = ConnectorDocument( - title="Message", - source_markdown="Hello world.", - unique_id="msg-1", - document_type=DocumentType.SLACK_CONNECTOR, - search_space_id=1, - should_summarize=False, - ) - assert doc.should_summarize is False - - -def test_should_use_code_chunker_accepted(): - doc = ConnectorDocument( - title="Repository", - source_markdown="def hello():\n pass", - unique_id="repo-1", - document_type=DocumentType.GITHUB_CONNECTOR, - search_space_id=1, - should_use_code_chunker=True, - ) - assert doc.should_use_code_chunker is True - - def test_empty_title_raises(): with pytest.raises(ValidationError): ConnectorDocument( @@ -77,6 +79,45 @@ def test_empty_title_raises(): ) +def test_empty_created_by_id_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="## Content", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + connector_id=42, + created_by_id="", + ) + + +def test_zero_connector_id_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="## Content", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + connector_id=0, + created_by_id="00000000-0000-0000-0000-000000000001", + ) + + +def test_zero_search_space_id_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="## Content", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=0, + connector_id=42, + created_by_id="00000000-0000-0000-0000-000000000001", + ) + + def test_empty_unique_id_raises(): with pytest.raises(ValidationError): ConnectorDocument(