test: add document hashing unit tests and clean up conftest mocks

This commit is contained in:
CREDO23 2026-02-24 22:48:40 +02:00
parent d5e10bd8f9
commit a0134a5830
4 changed files with 79 additions and 50 deletions

View file

@ -0,0 +1,13 @@
import hashlib
from app.indexing_pipeline.connector_document import ConnectorDocument
def compute_unique_identifier_hash(doc: ConnectorDocument) -> str:
combined = f"{doc.document_type.value}:{doc.unique_id}:{doc.search_space_id}"
return hashlib.sha256(combined.encode("utf-8")).hexdigest()
def compute_content_hash(doc: ConnectorDocument) -> str:
combined = f"{doc.search_space_id}:{doc.source_markdown}"
return hashlib.sha256(combined.encode("utf-8")).hexdigest()