mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
feat(obsidian_plugin): validate binary attachments and enforce MIME type checks
This commit is contained in:
parent
3b9be79d65
commit
e84dc87c5b
9 changed files with 275 additions and 40 deletions
|
|
@ -26,6 +26,8 @@ from app.db import (
|
|||
get_async_session,
|
||||
)
|
||||
from app.schemas.obsidian_plugin import (
|
||||
ALLOWED_ATTACHMENT_EXTENSIONS,
|
||||
ATTACHMENT_MIME_TYPES,
|
||||
ConnectRequest,
|
||||
ConnectResponse,
|
||||
DeleteAck,
|
||||
|
|
@ -465,6 +467,31 @@ async def obsidian_sync(
|
|||
for note in payload.notes:
|
||||
try:
|
||||
if note.is_binary:
|
||||
ext = note.extension.lstrip(".").lower()
|
||||
if ext not in ALLOWED_ATTACHMENT_EXTENSIONS:
|
||||
failed += 1
|
||||
items.append(
|
||||
SyncAckItem(
|
||||
path=note.path,
|
||||
status="error",
|
||||
error=f"unsupported attachment extension: .{ext}",
|
||||
)
|
||||
)
|
||||
continue
|
||||
expected_mime = ATTACHMENT_MIME_TYPES[ext]
|
||||
if note.mime_type != expected_mime:
|
||||
failed += 1
|
||||
items.append(
|
||||
SyncAckItem(
|
||||
path=note.path,
|
||||
status="error",
|
||||
error=(
|
||||
f"mime_type '{note.mime_type}' does not match "
|
||||
f"extension .{ext}"
|
||||
),
|
||||
)
|
||||
)
|
||||
continue
|
||||
_queue_obsidian_attachment(
|
||||
connector_id=connector.id,
|
||||
note_payload=note.model_dump(mode="json"),
|
||||
|
|
|
|||
|
|
@ -10,11 +10,26 @@ from __future__ import annotations
|
|||
from datetime import datetime
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
||||
|
||||
_PLUGIN_MODEL_CONFIG = ConfigDict(extra="ignore")
|
||||
|
||||
|
||||
# Source of truth for the attachment whitelist. Mirrors MIME_BY_EXTENSION in
|
||||
# surfsense_obsidian/src/sync-engine.ts — keep in sync.
|
||||
ATTACHMENT_MIME_TYPES: dict[str, str] = {
|
||||
"pdf": "application/pdf",
|
||||
"png": "image/png",
|
||||
"jpg": "image/jpeg",
|
||||
"jpeg": "image/jpeg",
|
||||
"gif": "image/gif",
|
||||
"webp": "image/webp",
|
||||
"svg": "image/svg+xml",
|
||||
"txt": "text/plain",
|
||||
}
|
||||
ALLOWED_ATTACHMENT_EXTENSIONS: frozenset[str] = frozenset(ATTACHMENT_MIME_TYPES)
|
||||
|
||||
|
||||
class _PluginBase(BaseModel):
|
||||
"""Base schema carrying the shared forward-compatibility config."""
|
||||
|
||||
|
|
@ -78,6 +93,19 @@ class NotePayload(_PluginBase):
|
|||
mtime: datetime
|
||||
ctime: datetime
|
||||
|
||||
@model_validator(mode="after")
|
||||
def _enforce_binary_invariants(self) -> NotePayload:
|
||||
if self.is_binary:
|
||||
if not self.binary_base64:
|
||||
raise ValueError("binary_base64 is required when is_binary is True")
|
||||
if not self.mime_type:
|
||||
raise ValueError("mime_type is required when is_binary is True")
|
||||
elif self.binary_base64 is not None or self.mime_type is not None:
|
||||
raise ValueError(
|
||||
"binary_base64 and mime_type must be omitted when is_binary is False",
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
class SyncBatchRequest(_PluginBase):
|
||||
"""Batch upsert; plugin sends 10-20 notes per request."""
|
||||
|
|
|
|||
|
|
@ -119,8 +119,7 @@ def _build_metadata(
|
|||
}
|
||||
if payload.is_binary:
|
||||
meta["is_binary"] = True
|
||||
if payload.mime_type:
|
||||
meta["mime_type"] = payload.mime_type
|
||||
meta["mime_type"] = payload.mime_type
|
||||
if extra:
|
||||
meta.update(extra)
|
||||
return meta
|
||||
|
|
@ -154,9 +153,6 @@ def _build_document_string(
|
|||
async def _extract_binary_attachment_markdown(
|
||||
payload: NotePayload, *, vision_llm
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
if not payload.binary_base64:
|
||||
return "", {"attachment_extraction_status": "missing_binary_payload"}
|
||||
|
||||
try:
|
||||
raw_bytes = base64.b64decode(payload.binary_base64, validate=True)
|
||||
except Exception:
|
||||
|
|
@ -208,7 +204,7 @@ async def _run_etl_extract(*, file_path: str, filename: str, vision_llm):
|
|||
|
||||
def _is_image_attachment(payload: NotePayload) -> bool:
|
||||
ext = payload.extension.lower().lstrip(".")
|
||||
return ext in {"png", "jpg", "jpeg", "gif", "webp", "bmp", "tiff", "svg"}
|
||||
return ext in {"png", "jpg", "jpeg", "gif", "webp", "svg"}
|
||||
|
||||
|
||||
async def _resolve_attachment_vision_llm(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue