diff --git a/surfsense_backend/app/indexing_pipeline/__init__.py b/surfsense_backend/app/indexing_pipeline/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/indexing_pipeline/connector_document.py b/surfsense_backend/app/indexing_pipeline/connector_document.py new file mode 100644 index 000000000..cf756ba42 --- /dev/null +++ b/surfsense_backend/app/indexing_pipeline/connector_document.py @@ -0,0 +1,23 @@ +from pydantic import BaseModel, field_validator + +from app.db import DocumentType + + +class ConnectorDocument(BaseModel): + title: str + source_markdown: str + unique_id: str + document_type: DocumentType + search_space_id: int + should_summarize: bool = True + should_use_code_chunker: bool = False + metadata: dict = {} + connector_id: int | None = None + created_by_id: str | None = None + + @field_validator("title", "source_markdown", "unique_id") + @classmethod + def not_empty(cls, v: str, info) -> str: + if not v.strip(): + raise ValueError(f"{info.field_name} must not be empty or whitespace") + return v diff --git a/surfsense_backend/tests/conftest.py b/surfsense_backend/tests/conftest.py index df36827b1..dd12bad7e 100644 --- a/surfsense_backend/tests/conftest.py +++ b/surfsense_backend/tests/conftest.py @@ -1,5 +1,8 @@ import pytest +from app.db import DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument + @pytest.fixture def sample_user_id() -> str: @@ -14,3 +17,18 @@ def sample_search_space_id() -> int: @pytest.fixture def sample_connector_id() -> int: return 42 + + +@pytest.fixture +def make_connector_document(): + def _make(**overrides): + defaults = { + "title": "Test Document", + "source_markdown": "## Heading\n\nSome content.", + "unique_id": "test-id-001", + "document_type": DocumentType.CLICKUP_CONNECTOR, + "search_space_id": 1, + } + defaults.update(overrides) + return ConnectorDocument(**defaults) + return _make diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py new file mode 100644 index 000000000..fe727a590 --- /dev/null +++ b/surfsense_backend/tests/unit/indexing_pipeline/test_connector_document.py @@ -0,0 +1,88 @@ +import pytest +from pydantic import ValidationError + +from app.db import DocumentType +from app.indexing_pipeline.connector_document import ConnectorDocument + + +def test_valid_document_created_with_defaults(): + doc = ConnectorDocument( + title="Task", + source_markdown="## Task\n\nSome content.", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + ) + assert doc.should_summarize is True + assert doc.should_use_code_chunker is False + assert doc.metadata == {} + assert doc.connector_id is None + assert doc.created_by_id is None + + +def test_empty_source_markdown_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + ) + + +def test_whitespace_only_source_markdown_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown=" \n\t ", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + ) + + +def test_should_summarize_false_accepted(): + doc = ConnectorDocument( + title="Message", + source_markdown="Hello world.", + unique_id="msg-1", + document_type=DocumentType.SLACK_CONNECTOR, + search_space_id=1, + should_summarize=False, + ) + assert doc.should_summarize is False + + +def test_should_use_code_chunker_accepted(): + doc = ConnectorDocument( + title="Repository", + source_markdown="def hello():\n pass", + unique_id="repo-1", + document_type=DocumentType.GITHUB_CONNECTOR, + search_space_id=1, + should_use_code_chunker=True, + ) + assert doc.should_use_code_chunker is True + + +def test_empty_title_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="", + source_markdown="## Content", + unique_id="task-1", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + ) + + +def test_empty_unique_id_raises(): + with pytest.raises(ValidationError): + ConnectorDocument( + title="Task", + source_markdown="## Content", + unique_id="", + document_type=DocumentType.CLICKUP_CONNECTOR, + search_space_id=1, + )