mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
test: add document hashing unit tests and clean up conftest mocks
This commit is contained in:
parent
d5e10bd8f9
commit
a0134a5830
4 changed files with 79 additions and 50 deletions
|
|
@ -0,0 +1,42 @@
|
|||
import pytest
|
||||
|
||||
from app.db import DocumentType
|
||||
from app.indexing_pipeline.document_hashing import compute_content_hash, compute_unique_identifier_hash
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_different_unique_id_produces_different_hash(make_connector_document):
|
||||
doc_a = make_connector_document(unique_id="id-001")
|
||||
doc_b = make_connector_document(unique_id="id-002")
|
||||
assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
|
||||
|
||||
|
||||
def test_different_search_space_produces_different_identifier_hash(make_connector_document):
|
||||
doc_a = make_connector_document(search_space_id=1)
|
||||
doc_b = make_connector_document(search_space_id=2)
|
||||
assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
|
||||
|
||||
|
||||
def test_different_document_type_produces_different_identifier_hash(make_connector_document):
|
||||
doc_a = make_connector_document(document_type=DocumentType.CLICKUP_CONNECTOR)
|
||||
doc_b = make_connector_document(document_type=DocumentType.NOTION_CONNECTOR)
|
||||
assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
|
||||
|
||||
|
||||
def test_same_content_same_space_produces_same_content_hash(make_connector_document):
|
||||
doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
|
||||
doc_b = make_connector_document(source_markdown="Hello world", search_space_id=1)
|
||||
assert compute_content_hash(doc_a) == compute_content_hash(doc_b)
|
||||
|
||||
|
||||
def test_same_content_different_space_produces_different_content_hash(make_connector_document):
|
||||
doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
|
||||
doc_b = make_connector_document(source_markdown="Hello world", search_space_id=2)
|
||||
assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
|
||||
|
||||
|
||||
def test_different_content_produces_different_content_hash(make_connector_document):
|
||||
doc_a = make_connector_document(source_markdown="Original content")
|
||||
doc_b = make_connector_document(source_markdown="Updated content")
|
||||
assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
|
||||
Loading…
Add table
Add a link
Reference in a new issue