feat(obsidian_plugin): validate binary attachments and enforce MIME type checks

This commit is contained in:
Anish Sarkar 2026-04-25 00:23:17 +05:30
parent 3b9be79d65
commit e84dc87c5b
9 changed files with 275 additions and 40 deletions

View file

@ -26,6 +26,8 @@ from app.db import (
get_async_session,
)
from app.schemas.obsidian_plugin import (
ALLOWED_ATTACHMENT_EXTENSIONS,
ATTACHMENT_MIME_TYPES,
ConnectRequest,
ConnectResponse,
DeleteAck,
@ -465,6 +467,31 @@ async def obsidian_sync(
for note in payload.notes:
try:
if note.is_binary:
ext = note.extension.lstrip(".").lower()
if ext not in ALLOWED_ATTACHMENT_EXTENSIONS:
failed += 1
items.append(
SyncAckItem(
path=note.path,
status="error",
error=f"unsupported attachment extension: .{ext}",
)
)
continue
expected_mime = ATTACHMENT_MIME_TYPES[ext]
if note.mime_type != expected_mime:
failed += 1
items.append(
SyncAckItem(
path=note.path,
status="error",
error=(
f"mime_type '{note.mime_type}' does not match "
f"extension .{ext}"
),
)
)
continue
_queue_obsidian_attachment(
connector_id=connector.id,
note_payload=note.model_dump(mode="json"),

View file

@ -10,11 +10,26 @@ from __future__ import annotations
from datetime import datetime
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, model_validator
_PLUGIN_MODEL_CONFIG = ConfigDict(extra="ignore")
# Source of truth for the attachment whitelist. Mirrors MIME_BY_EXTENSION in
# surfsense_obsidian/src/sync-engine.ts — keep in sync.
ATTACHMENT_MIME_TYPES: dict[str, str] = {
"pdf": "application/pdf",
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"webp": "image/webp",
"svg": "image/svg+xml",
"txt": "text/plain",
}
ALLOWED_ATTACHMENT_EXTENSIONS: frozenset[str] = frozenset(ATTACHMENT_MIME_TYPES)
class _PluginBase(BaseModel):
"""Base schema carrying the shared forward-compatibility config."""
@ -78,6 +93,19 @@ class NotePayload(_PluginBase):
mtime: datetime
ctime: datetime
@model_validator(mode="after")
def _enforce_binary_invariants(self) -> NotePayload:
if self.is_binary:
if not self.binary_base64:
raise ValueError("binary_base64 is required when is_binary is True")
if not self.mime_type:
raise ValueError("mime_type is required when is_binary is True")
elif self.binary_base64 is not None or self.mime_type is not None:
raise ValueError(
"binary_base64 and mime_type must be omitted when is_binary is False",
)
return self
class SyncBatchRequest(_PluginBase):
"""Batch upsert; plugin sends 10-20 notes per request."""

View file

@ -119,8 +119,7 @@ def _build_metadata(
}
if payload.is_binary:
meta["is_binary"] = True
if payload.mime_type:
meta["mime_type"] = payload.mime_type
meta["mime_type"] = payload.mime_type
if extra:
meta.update(extra)
return meta
@ -154,9 +153,6 @@ def _build_document_string(
async def _extract_binary_attachment_markdown(
payload: NotePayload, *, vision_llm
) -> tuple[str, dict[str, Any]]:
if not payload.binary_base64:
return "", {"attachment_extraction_status": "missing_binary_payload"}
try:
raw_bytes = base64.b64decode(payload.binary_base64, validate=True)
except Exception:
@ -208,7 +204,7 @@ async def _run_etl_extract(*, file_path: str, filename: str, vision_llm):
def _is_image_attachment(payload: NotePayload) -> bool:
ext = payload.extension.lower().lstrip(".")
return ext in {"png", "jpg", "jpeg", "gif", "webp", "bmp", "tiff", "svg"}
return ext in {"png", "jpg", "jpeg", "gif", "webp", "svg"}
async def _resolve_attachment_vision_llm(