feat(obsidian_plugin): validate binary attachments and enforce MIME type checks

This commit is contained in:
Anish Sarkar 2026-04-25 00:23:17 +05:30
parent 3b9be79d65
commit e84dc87c5b
9 changed files with 275 additions and 40 deletions

View file

@ -490,6 +490,7 @@ class TestWireContractSmoke:
binary_note.extension = "png"
binary_note.is_binary = True
binary_note.binary_base64 = "aGVsbG8="
binary_note.mime_type = "image/png"
binary_note.content = ""
with (
@ -517,3 +518,107 @@ class TestWireContractSmoke:
assert statuses == {"ok.md": "ok", "image.png": "queued"}
assert upsert_mock.await_count == 1
queue_mock.assert_called_once()
async def test_sync_rejects_unsupported_attachment_extension(
self, db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
):
vault_id = str(uuid.uuid4())
await obsidian_connect(
ConnectRequest(
vault_id=vault_id,
vault_name="Reject Vault",
search_space_id=db_search_space.id,
vault_fingerprint="fp-" + uuid.uuid4().hex,
),
user=db_user,
session=db_session,
)
fake_doc = type("FakeDoc", (), {"id": 12345})()
bad_note = _make_note_payload(vault_id, "photo.heic", "hash-heic")
bad_note.extension = "heic"
bad_note.is_binary = True
bad_note.binary_base64 = "aGVsbG8="
bad_note.mime_type = "image/heic"
bad_note.content = ""
with (
patch(
"app.routes.obsidian_plugin_routes.upsert_note",
new=AsyncMock(return_value=fake_doc),
),
patch("app.routes.obsidian_plugin_routes._queue_obsidian_attachment") as queue_mock,
):
sync_resp = await obsidian_sync(
SyncBatchRequest(
vault_id=vault_id,
notes=[
_make_note_payload(vault_id, "ok.md", "hash-ok"),
bad_note,
],
),
user=db_user,
session=db_session,
)
assert sync_resp.indexed == 1
assert sync_resp.failed == 1
items_by_path = {it.path: it for it in sync_resp.items}
assert items_by_path["ok.md"].status == "ok"
assert items_by_path["photo.heic"].status == "error"
assert "unsupported attachment extension" in (
items_by_path["photo.heic"].error or ""
)
queue_mock.assert_not_called()
async def test_sync_rejects_mime_extension_mismatch(
self, db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
):
vault_id = str(uuid.uuid4())
await obsidian_connect(
ConnectRequest(
vault_id=vault_id,
vault_name="Mismatch Vault",
search_space_id=db_search_space.id,
vault_fingerprint="fp-" + uuid.uuid4().hex,
),
user=db_user,
session=db_session,
)
fake_doc = type("FakeDoc", (), {"id": 12345})()
mismatched = _make_note_payload(vault_id, "image.png", "hash-png")
mismatched.extension = "png"
mismatched.is_binary = True
mismatched.binary_base64 = "aGVsbG8="
mismatched.mime_type = "application/pdf"
mismatched.content = ""
with (
patch(
"app.routes.obsidian_plugin_routes.upsert_note",
new=AsyncMock(return_value=fake_doc),
),
patch("app.routes.obsidian_plugin_routes._queue_obsidian_attachment") as queue_mock,
):
sync_resp = await obsidian_sync(
SyncBatchRequest(
vault_id=vault_id,
notes=[
_make_note_payload(vault_id, "ok.md", "hash-ok"),
mismatched,
],
),
user=db_user,
session=db_session,
)
assert sync_resp.indexed == 1
assert sync_resp.failed == 1
items_by_path = {it.path: it for it in sync_resp.items}
assert items_by_path["ok.md"].status == "ok"
assert items_by_path["image.png"].status == "error"
assert "does not match extension" in (
items_by_path["image.png"].error or ""
)
queue_mock.assert_not_called()

View file

@ -4,6 +4,7 @@ import base64
from datetime import UTC, datetime
import pytest
from pydantic import ValidationError
from app.etl_pipeline.etl_document import EtlResult
from app.schemas.obsidian_plugin import HeadingRef, NotePayload
@ -15,6 +16,9 @@ from app.services.obsidian_plugin_indexer import (
)
_FAKE_PNG_B64 = base64.b64encode(b"\x89PNG\r\n\x1a\n").decode("ascii")
def test_build_metadata_serializes_headings_to_plain_json() -> None:
now = datetime.now(UTC)
payload = NotePayload(
@ -46,6 +50,7 @@ def test_build_metadata_marks_binary_attachment_fields() -> None:
mtime=now,
ctime=now,
is_binary=True,
binary_base64=_FAKE_PNG_B64,
mime_type="image/png",
)
@ -69,6 +74,7 @@ async def test_extract_binary_attachment_markdown_handles_invalid_base64() -> No
ctime=now,
is_binary=True,
binary_base64="not-valid-base64!!",
mime_type="image/png",
)
content, metadata = await _extract_binary_attachment_markdown(
@ -93,6 +99,7 @@ async def test_extract_binary_attachment_markdown_uses_etl(monkeypatch) -> None:
ctime=now,
is_binary=True,
binary_base64=base64.b64encode(b"%PDF-1.7 fake bytes").decode("ascii"),
mime_type="application/pdf",
)
async def _fake_run_etl_extract( # noqa: ANN001
@ -133,6 +140,8 @@ def test_is_image_attachment_detects_image_extensions() -> None:
mtime=now,
ctime=now,
is_binary=True,
binary_base64=_FAKE_PNG_B64,
mime_type="image/png",
)
pdf_payload = NotePayload(
vault_id="vault-1",
@ -144,12 +153,67 @@ def test_is_image_attachment_detects_image_extensions() -> None:
mtime=now,
ctime=now,
is_binary=True,
binary_base64=_FAKE_PNG_B64,
mime_type="application/pdf",
)
assert _is_image_attachment(image_payload) is True
assert _is_image_attachment(pdf_payload) is False
def test_note_payload_rejects_binary_without_base64() -> None:
now = datetime.now(UTC)
with pytest.raises(ValidationError, match="binary_base64 is required"):
NotePayload(
vault_id="vault-1",
path="assets/diagram.png",
name="diagram",
extension="png",
content="",
content_hash="abc123",
mtime=now,
ctime=now,
is_binary=True,
mime_type="image/png",
)
def test_note_payload_rejects_binary_without_mime_type() -> None:
now = datetime.now(UTC)
with pytest.raises(ValidationError, match="mime_type is required"):
NotePayload(
vault_id="vault-1",
path="assets/diagram.png",
name="diagram",
extension="png",
content="",
content_hash="abc123",
mtime=now,
ctime=now,
is_binary=True,
binary_base64=_FAKE_PNG_B64,
)
def test_note_payload_rejects_markdown_with_binary_fields() -> None:
now = datetime.now(UTC)
with pytest.raises(
ValidationError,
match="binary_base64 and mime_type must be omitted when is_binary is False",
):
NotePayload(
vault_id="vault-1",
path="notes.md",
name="notes",
extension="md",
content="# Notes",
content_hash="abc123",
mtime=now,
ctime=now,
binary_base64=_FAKE_PNG_B64,
)
def test_require_extracted_attachment_content_rejects_empty_content() -> None:
with pytest.raises(
RuntimeError, match="Attachment extraction failed for assets/img.png"