diff --git a/surfsense_backend/app/indexing_pipeline/document_hashing.py b/surfsense_backend/app/indexing_pipeline/document_hashing.py
new file mode 100644
index 000000000..6b352a953
--- /dev/null
+++ b/surfsense_backend/app/indexing_pipeline/document_hashing.py
@@ -0,0 +1,13 @@
+import hashlib
+
+from app.indexing_pipeline.connector_document import ConnectorDocument
+
+
+def compute_unique_identifier_hash(doc: ConnectorDocument) -> str:
+    combined = f"{doc.document_type.value}:{doc.unique_id}:{doc.search_space_id}"
+    return hashlib.sha256(combined.encode("utf-8")).hexdigest()
+
+
+def compute_content_hash(doc: ConnectorDocument) -> str:
+    combined = f"{doc.search_space_id}:{doc.source_markdown}"
+    return hashlib.sha256(combined.encode("utf-8")).hexdigest()
diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py
index f36649c5b..3fd4e1a31 100644
--- a/surfsense_backend/tests/integration/conftest.py
+++ b/surfsense_backend/tests/integration/conftest.py
@@ -1,13 +1,17 @@
 
 import os
+from unittest.mock import AsyncMock, MagicMock
 
+import pytest
 import pytest_asyncio
 from sqlalchemy import text
-from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
 from sqlalchemy.pool import NullPool
 
 from app.db import Base
 
+_EMBEDDING_DIM = 4  # keep vectors tiny; real model uses 768+
+
 _DEFAULT_TEST_DB = "postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_test"
 TEST_DATABASE_URL = os.environ.get("TEST_DATABASE_URL", _DEFAULT_TEST_DB)
 
@@ -44,3 +48,19 @@ async def db_session(async_engine) -> AsyncSession:
         ) as session:
             yield session
         await transaction.rollback()
+
+
+@pytest.fixture
+def mock_llm() -> AsyncMock:
+    llm = AsyncMock()
+    llm.ainvoke = AsyncMock(return_value=MagicMock(content="Mocked summary."))
+    return llm
+
+
+@pytest.fixture
+def mock_embedding_model() -> MagicMock:
+    model = MagicMock()
+    model.embed = MagicMock(
+        side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]
+    )
+    return model
diff --git a/surfsense_backend/tests/unit/conftest.py b/surfsense_backend/tests/unit/conftest.py
index 98fcfc147..4e5c81bcb 100644
--- a/surfsense_backend/tests/unit/conftest.py
+++ b/surfsense_backend/tests/unit/conftest.py
@@ -1,49 +1,3 @@
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-_EMBEDDING_DIM = 4  # keep vectors tiny in tests; real model uses 768+
-
-
-@pytest.fixture
-def mock_session() -> AsyncMock:
-    session = AsyncMock()
-    session.add = MagicMock()  # synchronous in real SQLAlchemy
-    session.execute = AsyncMock()
-    session.scalar = AsyncMock()
-    session.scalars = AsyncMock()
-    session.flush = AsyncMock()
-    session.commit = AsyncMock()
-    session.rollback = AsyncMock()
-    session.refresh = AsyncMock()
-    return session
-
-
-@pytest.fixture
-def mock_llm() -> AsyncMock:
-    llm = AsyncMock()
-    llm.ainvoke = AsyncMock(return_value=MagicMock(content="Mocked summary."))
-    return llm
-
-
-@pytest.fixture
-def mock_embedding_model() -> MagicMock:
-    model = MagicMock()
-    model.embed = MagicMock(
-        side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]
-    )
-    return model
-
-
-@pytest.fixture
-def mock_chunker() -> MagicMock:
-    chunker = MagicMock()
-    chunker.chunk = MagicMock(return_value=["chunk one", "chunk two"])
-    return chunker
-
-
-@pytest.fixture
-def mock_code_chunker() -> MagicMock:
-    chunker = MagicMock()
-    chunker.chunk = MagicMock(return_value=["chunk one", "chunk two"])
-    return chunker
+# No fixtures needed for unit tests yet.
+# Unit tests cover pure functions and value objects with no dependencies.
+# External-boundary mocks (llm, embedding_model) live in tests/integration/conftest.py.
diff --git a/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
new file mode 100644
index 000000000..c8e2e97e9
--- /dev/null
+++ b/surfsense_backend/tests/unit/indexing_pipeline/test_document_hashing.py
@@ -0,0 +1,42 @@
+import pytest
+
+from app.db import DocumentType
+from app.indexing_pipeline.document_hashing import compute_content_hash, compute_unique_identifier_hash
+
+pytestmark = pytest.mark.unit
+
+
+def test_different_unique_id_produces_different_hash(make_connector_document):
+    doc_a = make_connector_document(unique_id="id-001")
+    doc_b = make_connector_document(unique_id="id-002")
+    assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
+
+
+def test_different_search_space_produces_different_identifier_hash(make_connector_document):
+    doc_a = make_connector_document(search_space_id=1)
+    doc_b = make_connector_document(search_space_id=2)
+    assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
+
+
+def test_different_document_type_produces_different_identifier_hash(make_connector_document):
+    doc_a = make_connector_document(document_type=DocumentType.CLICKUP_CONNECTOR)
+    doc_b = make_connector_document(document_type=DocumentType.NOTION_CONNECTOR)
+    assert compute_unique_identifier_hash(doc_a) != compute_unique_identifier_hash(doc_b)
+
+
+def test_same_content_same_space_produces_same_content_hash(make_connector_document):
+    doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
+    doc_b = make_connector_document(source_markdown="Hello world", search_space_id=1)
+    assert compute_content_hash(doc_a) == compute_content_hash(doc_b)
+
+
+def test_same_content_different_space_produces_different_content_hash(make_connector_document):
+    doc_a = make_connector_document(source_markdown="Hello world", search_space_id=1)
+    doc_b = make_connector_document(source_markdown="Hello world", search_space_id=2)
+    assert compute_content_hash(doc_a) != compute_content_hash(doc_b)
+
+
+def test_different_content_produces_different_content_hash(make_connector_document):
+    doc_a = make_connector_document(source_markdown="Original content")
+    doc_b = make_connector_document(source_markdown="Updated content")
+    assert compute_content_hash(doc_a) != compute_content_hash(doc_b)