mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 01:06:23 +02:00
test: add document hashing unit tests and clean up conftest mocks
This commit is contained in:
parent
d5e10bd8f9
commit
a0134a5830
4 changed files with 79 additions and 50 deletions
13
surfsense_backend/app/indexing_pipeline/document_hashing.py
Normal file
13
surfsense_backend/app/indexing_pipeline/document_hashing.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import hashlib
|
||||
|
||||
from app.indexing_pipeline.connector_document import ConnectorDocument
|
||||
|
||||
|
||||
def compute_unique_identifier_hash(doc: ConnectorDocument) -> str:
|
||||
combined = f"{doc.document_type.value}:{doc.unique_id}:{doc.search_space_id}"
|
||||
return hashlib.sha256(combined.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def compute_content_hash(doc: ConnectorDocument) -> str:
|
||||
combined = f"{doc.search_space_id}:{doc.source_markdown}"
|
||||
return hashlib.sha256(combined.encode("utf-8")).hexdigest()
|
||||
Loading…
Add table
Add a link
Reference in a new issue