diff --git a/surfsense_backend/tests/unit/etl_pipeline/cache/conftest.py b/surfsense_backend/tests/unit/etl_pipeline/cache/conftest.py
new file mode 100644
index 000000000..c6efddc09
--- /dev/null
+++ b/surfsense_backend/tests/unit/etl_pipeline/cache/conftest.py
@@ -0,0 +1,28 @@
+"""Stub the cache package __init__s so unit tests import only pure leaf modules.
+
+The real ``cache``/``storage``/``eviction``/``persistence`` __init__s eagerly
+import the facade, file storage, Celery, and ``app.db`` -- none of which a pure
+unit test should need. Turning those packages into bare namespace packages lets
+``from app.etl_pipeline.cache.<pkg>.<leaf> import ...`` resolve the leaf module
+without running the heavy __init__. ``schemas`` is left real (it is pure).
+"""
+
+import sys
+import types
+from pathlib import Path
+
+_CACHE_DIR = Path(__file__).resolve().parents[4] / "app" / "etl_pipeline" / "cache"
+
+
+def _stub_namespace_package(dotted: str, fs_dir: Path) -> None:
+    if dotted in sys.modules:
+        return
+    module = types.ModuleType(dotted)
+    module.__path__ = [str(fs_dir)]
+    module.__package__ = dotted
+    sys.modules[dotted] = module
+
+
+_stub_namespace_package("app.etl_pipeline.cache", _CACHE_DIR)
+_stub_namespace_package("app.etl_pipeline.cache.storage", _CACHE_DIR / "storage")
+_stub_namespace_package("app.etl_pipeline.cache.eviction", _CACHE_DIR / "eviction")
diff --git a/surfsense_backend/tests/unit/etl_pipeline/cache/test_parse_key.py b/surfsense_backend/tests/unit/etl_pipeline/cache/test_parse_key.py
new file mode 100644
index 000000000..d69e74ee0
--- /dev/null
+++ b/surfsense_backend/tests/unit/etl_pipeline/cache/test_parse_key.py
@@ -0,0 +1,70 @@
+"""Content-addressing: equal (bytes + recipe) must map to one storage location.
+
+This is the dedup guarantee the whole cache rests on -- two users uploading the
+same file under the same parser settings have to land on the same object key, and
+any change to bytes or recipe has to land somewhere else.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.etl_pipeline.cache.schemas import ParseKey
+from app.etl_pipeline.cache.storage.object_keys import (
+    CACHE_PREFIX,
+    build_parse_object_key,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _key(**overrides) -> ParseKey:
+    base = {
+        "source_sha256": "a" * 64,
+        "etl_service": "LLAMACLOUD",
+        "mode": "basic",
+        "version": 1,
+    }
+    base.update(overrides)
+    return ParseKey.for_document(
+        base["source_sha256"],
+        etl_service=base["etl_service"],
+        mode=base["mode"],
+        version=base["version"],
+    )
+
+
+def test_same_bytes_and_recipe_produce_the_same_object_key():
+    assert build_parse_object_key(_key()) == build_parse_object_key(_key())
+
+
+def test_different_bytes_produce_different_object_keys():
+    assert build_parse_object_key(
+        _key(source_sha256="a" * 64)
+    ) != build_parse_object_key(_key(source_sha256="b" * 64))
+
+
+@pytest.mark.parametrize(
+    "field, value",
+    [
+        ("etl_service", "DOCLING"),
+        ("mode", "premium"),
+        ("version", 2),
+    ],
+)
+def test_any_recipe_change_produces_a_different_object_key(field, value):
+    # Same bytes but a different parser/mode/version must not collide: the recipe
+    # is part of the identity, so changing it has to re-parse, not reuse.
+    assert build_parse_object_key(_key()) != build_parse_object_key(
+        _key(**{field: value})
+    )
+
+
+def test_object_key_is_prefixed_and_sharded_by_source_hash():
+    # Shape matters operationally: a dedicated top-level prefix keeps cache blobs
+    # out of the normal store, and the sha directory groups every recipe variant
+    # of one file together.
+    key = _key()
+    assert build_parse_object_key(key) == (
+        f"{CACHE_PREFIX}/{key.source_sha256}/LLAMACLOUD.basic.v1.md"
+    )