mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-03 12:52:39 +02:00
Merge pull request #1286 from AnishSarkar22/feat/obsidian-plugin
feat: introduce Obsidian vault sync plugin
This commit is contained in:
commit
f607636ba6
83 changed files with 12540 additions and 1837 deletions
225
surfsense_backend/tests/unit/test_obsidian_plugin_indexer.py
Normal file
225
surfsense_backend/tests/unit/test_obsidian_plugin_indexer.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from datetime import UTC, datetime
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from app.etl_pipeline.etl_document import EtlResult
|
||||
from app.schemas.obsidian_plugin import HeadingRef, NotePayload
|
||||
from app.services.obsidian_plugin_indexer import (
|
||||
_build_metadata,
|
||||
_extract_binary_attachment_markdown,
|
||||
_is_image_attachment,
|
||||
_require_extracted_attachment_content,
|
||||
)
|
||||
|
||||
|
||||
_FAKE_PNG_B64 = base64.b64encode(b"\x89PNG\r\n\x1a\n").decode("ascii")
|
||||
|
||||
|
||||
def test_build_metadata_serializes_headings_to_plain_json() -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="notes.md",
|
||||
name="notes",
|
||||
extension="md",
|
||||
content="# Notes",
|
||||
headings=[HeadingRef(heading="Notes", level=1)],
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
)
|
||||
|
||||
metadata = _build_metadata(payload, vault_name="My Vault", connector_id=42)
|
||||
|
||||
assert metadata["headings"] == [{"heading": "Notes", "level": 1}]
|
||||
|
||||
|
||||
def test_build_metadata_marks_binary_attachment_fields() -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
metadata = _build_metadata(payload, vault_name="My Vault", connector_id=42)
|
||||
|
||||
assert metadata["is_binary"] is True
|
||||
assert metadata["mime_type"] == "image/png"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_binary_attachment_markdown_handles_invalid_base64() -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64="not-valid-base64!!",
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
content, metadata = await _extract_binary_attachment_markdown(
|
||||
payload, vision_llm=None
|
||||
)
|
||||
|
||||
assert content == ""
|
||||
assert metadata["attachment_extraction_status"] == "invalid_binary_payload"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_binary_attachment_markdown_uses_etl(monkeypatch) -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/spec.pdf",
|
||||
name="spec",
|
||||
extension="pdf",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=base64.b64encode(b"%PDF-1.7 fake bytes").decode("ascii"),
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
async def _fake_run_etl_extract( # noqa: ANN001
|
||||
*, file_path, filename, vision_llm
|
||||
):
|
||||
assert filename == "spec.pdf"
|
||||
assert file_path
|
||||
assert vision_llm is None
|
||||
return EtlResult(
|
||||
markdown_content="Extracted content",
|
||||
etl_service="TEST_ETL",
|
||||
content_type="document",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.services.obsidian_plugin_indexer._run_etl_extract",
|
||||
_fake_run_etl_extract,
|
||||
)
|
||||
|
||||
content, metadata = await _extract_binary_attachment_markdown(
|
||||
payload, vision_llm=None
|
||||
)
|
||||
|
||||
assert content == "Extracted content"
|
||||
assert metadata["attachment_extraction_status"] == "ok"
|
||||
assert metadata["attachment_etl_service"] == "TEST_ETL"
|
||||
|
||||
|
||||
def test_is_image_attachment_detects_image_extensions() -> None:
|
||||
now = datetime.now(UTC)
|
||||
image_payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/screenshot.PNG",
|
||||
name="screenshot",
|
||||
extension="PNG",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
mime_type="image/png",
|
||||
)
|
||||
pdf_payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/spec.pdf",
|
||||
name="spec",
|
||||
extension="pdf",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
mime_type="application/pdf",
|
||||
)
|
||||
|
||||
assert _is_image_attachment(image_payload) is True
|
||||
assert _is_image_attachment(pdf_payload) is False
|
||||
|
||||
|
||||
def test_note_payload_rejects_binary_without_base64() -> None:
|
||||
now = datetime.now(UTC)
|
||||
with pytest.raises(ValidationError, match="binary_base64 is required"):
|
||||
NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
|
||||
def test_note_payload_rejects_binary_without_mime_type() -> None:
|
||||
now = datetime.now(UTC)
|
||||
with pytest.raises(ValidationError, match="mime_type is required"):
|
||||
NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
)
|
||||
|
||||
|
||||
def test_note_payload_rejects_markdown_with_binary_fields() -> None:
|
||||
now = datetime.now(UTC)
|
||||
with pytest.raises(
|
||||
ValidationError,
|
||||
match="binary_base64 and mime_type must be omitted when is_binary is False",
|
||||
):
|
||||
NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="notes.md",
|
||||
name="notes",
|
||||
extension="md",
|
||||
content="# Notes",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
binary_base64=_FAKE_PNG_B64,
|
||||
)
|
||||
|
||||
|
||||
def test_require_extracted_attachment_content_rejects_empty_content() -> None:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Attachment extraction failed for assets/img.png"
|
||||
):
|
||||
_require_extracted_attachment_content(
|
||||
content=" ",
|
||||
etl_meta={"attachment_extraction_status": "etl_failed"},
|
||||
path="assets/img.png",
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue