mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-02 04:12:47 +02:00
feat(obsidian_plugin): validate binary attachments and enforce MIME type checks
This commit is contained in:
parent
3b9be79d65
commit
e84dc87c5b
9 changed files with 275 additions and 40 deletions
|
|
@ -490,6 +490,7 @@ class TestWireContractSmoke:
|
|||
binary_note.extension = "png"
|
||||
binary_note.is_binary = True
|
||||
binary_note.binary_base64 = "aGVsbG8="
|
||||
binary_note.mime_type = "image/png"
|
||||
binary_note.content = ""
|
||||
|
||||
with (
|
||||
|
|
@ -517,3 +518,107 @@ class TestWireContractSmoke:
|
|||
assert statuses == {"ok.md": "ok", "image.png": "queued"}
|
||||
assert upsert_mock.await_count == 1
|
||||
queue_mock.assert_called_once()
|
||||
|
||||
async def test_sync_rejects_unsupported_attachment_extension(
|
||||
self, db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
|
||||
):
|
||||
vault_id = str(uuid.uuid4())
|
||||
await obsidian_connect(
|
||||
ConnectRequest(
|
||||
vault_id=vault_id,
|
||||
vault_name="Reject Vault",
|
||||
search_space_id=db_search_space.id,
|
||||
vault_fingerprint="fp-" + uuid.uuid4().hex,
|
||||
),
|
||||
user=db_user,
|
||||
session=db_session,
|
||||
)
|
||||
|
||||
fake_doc = type("FakeDoc", (), {"id": 12345})()
|
||||
bad_note = _make_note_payload(vault_id, "photo.heic", "hash-heic")
|
||||
bad_note.extension = "heic"
|
||||
bad_note.is_binary = True
|
||||
bad_note.binary_base64 = "aGVsbG8="
|
||||
bad_note.mime_type = "image/heic"
|
||||
bad_note.content = ""
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.routes.obsidian_plugin_routes.upsert_note",
|
||||
new=AsyncMock(return_value=fake_doc),
|
||||
),
|
||||
patch("app.routes.obsidian_plugin_routes._queue_obsidian_attachment") as queue_mock,
|
||||
):
|
||||
sync_resp = await obsidian_sync(
|
||||
SyncBatchRequest(
|
||||
vault_id=vault_id,
|
||||
notes=[
|
||||
_make_note_payload(vault_id, "ok.md", "hash-ok"),
|
||||
bad_note,
|
||||
],
|
||||
),
|
||||
user=db_user,
|
||||
session=db_session,
|
||||
)
|
||||
|
||||
assert sync_resp.indexed == 1
|
||||
assert sync_resp.failed == 1
|
||||
items_by_path = {it.path: it for it in sync_resp.items}
|
||||
assert items_by_path["ok.md"].status == "ok"
|
||||
assert items_by_path["photo.heic"].status == "error"
|
||||
assert "unsupported attachment extension" in (
|
||||
items_by_path["photo.heic"].error or ""
|
||||
)
|
||||
queue_mock.assert_not_called()
|
||||
|
||||
async def test_sync_rejects_mime_extension_mismatch(
|
||||
self, db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
|
||||
):
|
||||
vault_id = str(uuid.uuid4())
|
||||
await obsidian_connect(
|
||||
ConnectRequest(
|
||||
vault_id=vault_id,
|
||||
vault_name="Mismatch Vault",
|
||||
search_space_id=db_search_space.id,
|
||||
vault_fingerprint="fp-" + uuid.uuid4().hex,
|
||||
),
|
||||
user=db_user,
|
||||
session=db_session,
|
||||
)
|
||||
|
||||
fake_doc = type("FakeDoc", (), {"id": 12345})()
|
||||
mismatched = _make_note_payload(vault_id, "image.png", "hash-png")
|
||||
mismatched.extension = "png"
|
||||
mismatched.is_binary = True
|
||||
mismatched.binary_base64 = "aGVsbG8="
|
||||
mismatched.mime_type = "application/pdf"
|
||||
mismatched.content = ""
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.routes.obsidian_plugin_routes.upsert_note",
|
||||
new=AsyncMock(return_value=fake_doc),
|
||||
),
|
||||
patch("app.routes.obsidian_plugin_routes._queue_obsidian_attachment") as queue_mock,
|
||||
):
|
||||
sync_resp = await obsidian_sync(
|
||||
SyncBatchRequest(
|
||||
vault_id=vault_id,
|
||||
notes=[
|
||||
_make_note_payload(vault_id, "ok.md", "hash-ok"),
|
||||
mismatched,
|
||||
],
|
||||
),
|
||||
user=db_user,
|
||||
session=db_session,
|
||||
)
|
||||
|
||||
assert sync_resp.indexed == 1
|
||||
assert sync_resp.failed == 1
|
||||
items_by_path = {it.path: it for it in sync_resp.items}
|
||||
assert items_by_path["ok.md"].status == "ok"
|
||||
assert items_by_path["image.png"].status == "error"
|
||||
assert "does not match extension" in (
|
||||
items_by_path["image.png"].error or ""
|
||||
)
|
||||
queue_mock.assert_not_called()
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import base64
|
|||
from datetime import UTC, datetime
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from app.etl_pipeline.etl_document import EtlResult
|
||||
from app.schemas.obsidian_plugin import HeadingRef, NotePayload
|
||||
|
|
@ -15,6 +16,9 @@ from app.services.obsidian_plugin_indexer import (
|
|||
)
|
||||
|
||||
|
||||
_FAKE_PNG_B64 = base64.b64encode(b"\x89PNG\r\n\x1a\n").decode("ascii")
|
||||
|
||||
|
||||
def test_build_metadata_serializes_headings_to_plain_json() -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
|
|
@ -46,6 +50,7 @@ def test_build_metadata_marks_binary_attachment_fields() -> None:
|
|||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
|
|
@ -69,6 +74,7 @@ async def test_extract_binary_attachment_markdown_handles_invalid_base64() -> No
|
|||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64="not-valid-base64!!",
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
content, metadata = await _extract_binary_attachment_markdown(
|
||||
|
|
@ -93,6 +99,7 @@ async def test_extract_binary_attachment_markdown_uses_etl(monkeypatch) -> None:
|
|||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=base64.b64encode(b"%PDF-1.7 fake bytes").decode("ascii"),
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
async def _fake_run_etl_extract( # noqa: ANN001
|
||||
|
|
@ -133,6 +140,8 @@ def test_is_image_attachment_detects_image_extensions() -> None:
|
|||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
mime_type="image/png",
|
||||
)
|
||||
pdf_payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
|
|
@ -144,12 +153,67 @@ def test_is_image_attachment_detects_image_extensions() -> None:
|
|||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
assert _is_image_attachment(image_payload) is True
|
||||
assert _is_image_attachment(pdf_payload) is False
|
||||
|
||||
|
||||
def test_note_payload_rejects_binary_without_base64() -> None:
|
||||
now = datetime.now(UTC)
|
||||
with pytest.raises(ValidationError, match="binary_base64 is required"):
|
||||
NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
|
||||
def test_note_payload_rejects_binary_without_mime_type() -> None:
|
||||
now = datetime.now(UTC)
|
||||
with pytest.raises(ValidationError, match="mime_type is required"):
|
||||
NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
)
|
||||
|
||||
|
||||
def test_note_payload_rejects_markdown_with_binary_fields() -> None:
|
||||
now = datetime.now(UTC)
|
||||
with pytest.raises(
|
||||
ValidationError,
|
||||
match="binary_base64 and mime_type must be omitted when is_binary is False",
|
||||
):
|
||||
NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="notes.md",
|
||||
name="notes",
|
||||
extension="md",
|
||||
content="# Notes",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
)
|
||||
|
||||
|
||||
def test_require_extracted_attachment_content_rejects_empty_content() -> None:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Attachment extraction failed for assets/img.png"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue