mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: implement background processing for binary attachments in Obsidian plugin
- Added a new Celery task for indexing non-markdown attachments. - Enhanced the Obsidian plugin schema to support binary attachments. - Updated routes to enqueue binary attachments for background processing. - Improved metadata handling for binary attachments during indexing. - Added tests for binary attachment processing and validation.
This commit is contained in:
parent
5047527b47
commit
6ac5256431
11 changed files with 519 additions and 68 deletions
|
|
@ -90,6 +90,7 @@ celery_app = Celery(
|
|||
"app.tasks.celery_tasks.podcast_tasks",
|
||||
"app.tasks.celery_tasks.video_presentation_tasks",
|
||||
"app.tasks.celery_tasks.connector_tasks",
|
||||
"app.tasks.celery_tasks.obsidian_tasks",
|
||||
"app.tasks.celery_tasks.schedule_checker_task",
|
||||
"app.tasks.celery_tasks.document_reindex_tasks",
|
||||
"app.tasks.celery_tasks.stale_notification_cleanup_task",
|
||||
|
|
@ -153,6 +154,7 @@ celery_app.conf.update(
|
|||
"index_crawled_urls": {"queue": CONNECTORS_QUEUE},
|
||||
"index_bookstack_pages": {"queue": CONNECTORS_QUEUE},
|
||||
"index_composio_connector": {"queue": CONNECTORS_QUEUE},
|
||||
"index_obsidian_attachment": {"queue": CONNECTORS_QUEUE},
|
||||
# Everything else (document processing, podcasts, reindexing,
|
||||
# schedule checker, cleanup) stays on the default fast queue.
|
||||
},
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ from app.services.obsidian_plugin_indexer import (
|
|||
rename_note,
|
||||
upsert_note,
|
||||
)
|
||||
from app.tasks.celery_tasks.obsidian_tasks import index_obsidian_attachment_task
|
||||
from app.users import current_active_user
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -204,6 +205,17 @@ async def _resolve_vault_connector(
|
|||
)
|
||||
|
||||
|
||||
def _queue_obsidian_attachment(
|
||||
*, connector_id: int, note_payload: dict, user_id: str
|
||||
) -> None:
|
||||
"""Enqueue one non-markdown Obsidian note for background ETL/indexing."""
|
||||
index_obsidian_attachment_task.delay(
|
||||
connector_id=connector_id,
|
||||
payload_data=note_payload,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
|
||||
async def _ensure_search_space_access(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
|
|
@ -452,6 +464,16 @@ async def obsidian_sync(
|
|||
|
||||
for note in payload.notes:
|
||||
try:
|
||||
if note.is_binary:
|
||||
_queue_obsidian_attachment(
|
||||
connector_id=connector.id,
|
||||
note_payload=note.model_dump(mode="json"),
|
||||
user_id=str(user.id),
|
||||
)
|
||||
indexed += 1
|
||||
items.append(SyncAckItem(path=note.path, status="queued"))
|
||||
continue
|
||||
|
||||
doc = await upsert_note(
|
||||
session, connector=connector, payload=note, user_id=str(user.id)
|
||||
)
|
||||
|
|
|
|||
|
|
@ -52,6 +52,24 @@ class NotePayload(_PluginBase):
|
|||
content_hash: str = Field(
|
||||
..., description="Plugin-computed SHA-256 of the raw content"
|
||||
)
|
||||
is_binary: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"True when payload represents a non-markdown attachment. "
|
||||
"If set, the plugin may include binary_base64 for ETL extraction."
|
||||
),
|
||||
)
|
||||
binary_base64: str | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Base64-encoded raw file bytes for binary attachments. "
|
||||
"Used by the backend ETL pipeline."
|
||||
),
|
||||
)
|
||||
mime_type: str | None = Field(
|
||||
default=None,
|
||||
description="Optional MIME type hint for binary attachments.",
|
||||
)
|
||||
size: int | None = Field(
|
||||
default=None,
|
||||
ge=0,
|
||||
|
|
@ -138,7 +156,7 @@ class HealthResponse(_PluginBase):
|
|||
|
||||
class SyncAckItem(_PluginBase):
|
||||
path: str
|
||||
status: Literal["ok", "error"]
|
||||
status: Literal["ok", "queued", "error"]
|
||||
document_id: int | None = None
|
||||
error: str | None = None
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,11 @@ compare without re-downloading content.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
|
@ -113,12 +117,18 @@ def _build_metadata(
|
|||
"connector_id": connector_id,
|
||||
"url": _build_source_url(vault_name, payload.path),
|
||||
}
|
||||
if payload.is_binary:
|
||||
meta["is_binary"] = True
|
||||
if payload.mime_type:
|
||||
meta["mime_type"] = payload.mime_type
|
||||
if extra:
|
||||
meta.update(extra)
|
||||
return meta
|
||||
|
||||
|
||||
def _build_document_string(payload: NotePayload, vault_name: str) -> str:
|
||||
def _build_document_string(
|
||||
payload: NotePayload, vault_name: str, *, content_override: str | None = None
|
||||
) -> str:
|
||||
"""Compose the indexable string the pipeline embeds and chunks.
|
||||
|
||||
Mirrors the legacy obsidian indexer's METADATA + CONTENT framing so
|
||||
|
|
@ -126,6 +136,7 @@ def _build_document_string(payload: NotePayload, vault_name: str) -> str:
|
|||
"""
|
||||
tags_line = ", ".join(payload.tags) if payload.tags else "None"
|
||||
links_line = ", ".join(payload.resolved_links) if payload.resolved_links else "None"
|
||||
body = payload.content if content_override is None else content_override
|
||||
return (
|
||||
"<METADATA>\n"
|
||||
f"Title: {payload.name}\n"
|
||||
|
|
@ -135,11 +146,120 @@ def _build_document_string(payload: NotePayload, vault_name: str) -> str:
|
|||
f"Links to: {links_line}\n"
|
||||
"</METADATA>\n\n"
|
||||
"<CONTENT>\n"
|
||||
f"{payload.content}\n"
|
||||
f"{body}\n"
|
||||
"</CONTENT>\n"
|
||||
)
|
||||
|
||||
|
||||
async def _extract_binary_attachment_markdown(
|
||||
payload: NotePayload, *, vision_llm
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
if not payload.binary_base64:
|
||||
return "", {"attachment_extraction_status": "missing_binary_payload"}
|
||||
|
||||
try:
|
||||
raw_bytes = base64.b64decode(payload.binary_base64, validate=True)
|
||||
except Exception:
|
||||
logger.warning("obsidian attachment payload had invalid base64: %s", payload.path)
|
||||
return "", {"attachment_extraction_status": "invalid_binary_payload"}
|
||||
|
||||
suffix = f".{payload.extension.lstrip('.')}" if payload.extension else ""
|
||||
temp_path: str | None = None
|
||||
filename = payload.path.rsplit("/", 1)[-1] or payload.name
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(raw_bytes)
|
||||
temp_path = tmp.name
|
||||
|
||||
result = await _run_etl_extract(
|
||||
file_path=temp_path,
|
||||
filename=filename,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
metadata: dict[str, Any] = {
|
||||
"attachment_extraction_status": "ok",
|
||||
"attachment_etl_service": result.etl_service,
|
||||
"attachment_content_type": result.content_type,
|
||||
}
|
||||
return result.markdown_content, metadata
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"obsidian attachment ETL failed for %s: %s", payload.path, exc, exc_info=True
|
||||
)
|
||||
return "", {
|
||||
"attachment_extraction_status": "etl_failed",
|
||||
"attachment_extraction_error": str(exc)[:300],
|
||||
}
|
||||
finally:
|
||||
if temp_path and os.path.exists(temp_path):
|
||||
with contextlib.suppress(Exception):
|
||||
os.unlink(temp_path)
|
||||
|
||||
|
||||
async def _run_etl_extract(*, file_path: str, filename: str, vision_llm):
|
||||
"""Lazy-load ETL dependencies to avoid module-import cycles."""
|
||||
from app.etl_pipeline.etl_document import EtlRequest
|
||||
from app.etl_pipeline.etl_pipeline_service import EtlPipelineService
|
||||
|
||||
return await EtlPipelineService(vision_llm=vision_llm).extract(
|
||||
EtlRequest(file_path=file_path, filename=filename)
|
||||
)
|
||||
|
||||
|
||||
def _is_image_attachment(payload: NotePayload) -> bool:
|
||||
ext = payload.extension.lower().lstrip(".")
|
||||
return ext in {"png", "jpg", "jpeg", "gif", "webp", "bmp", "tiff", "svg"}
|
||||
|
||||
|
||||
async def _resolve_attachment_vision_llm(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
connector: SearchSourceConnector,
|
||||
search_space_id: int,
|
||||
payload: NotePayload,
|
||||
):
|
||||
"""Match connector indexers: only fetch vision LLM for image attachments
|
||||
when the connector has vision indexing enabled."""
|
||||
if not payload.is_binary:
|
||||
return None
|
||||
if not _is_image_attachment(payload):
|
||||
return None
|
||||
if not getattr(connector, "enable_vision_llm", False):
|
||||
return None
|
||||
|
||||
from app.services.llm_service import get_vision_llm
|
||||
|
||||
return await get_vision_llm(session, search_space_id)
|
||||
|
||||
|
||||
async def _resolve_summary_llm(
|
||||
session: AsyncSession, *, user_id: str, search_space_id: int, should_summarize: bool
|
||||
):
|
||||
"""Fetch summary LLM only when indexing summary is enabled."""
|
||||
if not should_summarize:
|
||||
return None
|
||||
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
|
||||
return await get_user_long_context_llm(session, user_id, search_space_id)
|
||||
|
||||
|
||||
def _require_extracted_attachment_content(
|
||||
*, content: str, etl_meta: dict[str, Any], path: str
|
||||
) -> str:
|
||||
extracted = content.strip()
|
||||
if extracted:
|
||||
return extracted
|
||||
|
||||
status = etl_meta.get("attachment_extraction_status", "unknown")
|
||||
reason = etl_meta.get("attachment_extraction_error")
|
||||
if reason:
|
||||
raise RuntimeError(
|
||||
f"Attachment extraction failed for {path} ({status}): {reason}"
|
||||
)
|
||||
raise RuntimeError(f"Attachment extraction failed for {path} ({status})")
|
||||
|
||||
|
||||
async def _find_existing_document(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
|
|
@ -207,11 +327,42 @@ async def upsert_note(
|
|||
exc_info=True,
|
||||
)
|
||||
|
||||
document_string = _build_document_string(payload, vault_name)
|
||||
content_for_index = payload.content
|
||||
extra_meta: dict[str, Any] = {}
|
||||
vision_llm = None
|
||||
if payload.is_binary:
|
||||
vision_llm = await _resolve_attachment_vision_llm(
|
||||
session,
|
||||
connector=connector,
|
||||
search_space_id=search_space_id,
|
||||
payload=payload,
|
||||
)
|
||||
content_for_index, etl_meta = await _extract_binary_attachment_markdown(
|
||||
payload, vision_llm=vision_llm
|
||||
)
|
||||
extra_meta.update(etl_meta)
|
||||
# Strict KB behavior: do not index metadata-only attachments.
|
||||
content_for_index = _require_extracted_attachment_content(
|
||||
content=content_for_index,
|
||||
etl_meta=etl_meta,
|
||||
path=payload.path,
|
||||
)
|
||||
|
||||
llm = await _resolve_summary_llm(
|
||||
session,
|
||||
user_id=str(user_id),
|
||||
search_space_id=search_space_id,
|
||||
should_summarize=connector.enable_summary,
|
||||
)
|
||||
|
||||
document_string = _build_document_string(
|
||||
payload, vault_name, content_override=content_for_index
|
||||
)
|
||||
metadata = _build_metadata(
|
||||
payload,
|
||||
vault_name=vault_name,
|
||||
connector_id=connector.id,
|
||||
extra=extra_meta,
|
||||
)
|
||||
|
||||
connector_doc = ConnectorDocument(
|
||||
|
|
@ -223,7 +374,7 @@ async def upsert_note(
|
|||
connector_id=connector.id,
|
||||
created_by_id=str(user_id),
|
||||
should_summarize=connector.enable_summary,
|
||||
fallback_summary=f"Obsidian Note: {payload.name}\n\n{payload.content}",
|
||||
fallback_summary=f"Obsidian Note: {payload.name}\n\n{content_for_index}",
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
|
|
@ -236,9 +387,6 @@ async def upsert_note(
|
|||
|
||||
document = prepared[0]
|
||||
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
|
||||
llm = await get_user_long_context_llm(session, str(user_id), search_space_id)
|
||||
return await pipeline.index(document, connector_doc, llm)
|
||||
|
||||
|
||||
|
|
|
|||
59
surfsense_backend/app/tasks/celery_tasks/obsidian_tasks.py
Normal file
59
surfsense_backend/app/tasks/celery_tasks/obsidian_tasks.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""Celery tasks for Obsidian plugin background processing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from app.celery_app import celery_app
|
||||
from app.db import SearchSourceConnector
|
||||
from app.schemas.obsidian_plugin import NotePayload
|
||||
from app.services.obsidian_plugin_indexer import upsert_note
|
||||
from app.tasks.celery_tasks import get_celery_session_maker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery_app.task(name="index_obsidian_attachment", bind=True)
|
||||
def index_obsidian_attachment_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
payload_data: dict,
|
||||
user_id: str,
|
||||
) -> None:
|
||||
"""Process one Obsidian non-markdown attachment asynchronously."""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_obsidian_attachment(
|
||||
connector_id=connector_id,
|
||||
payload_data=payload_data,
|
||||
user_id=user_id,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_obsidian_attachment(
|
||||
*,
|
||||
connector_id: int,
|
||||
payload_data: dict,
|
||||
user_id: str,
|
||||
) -> None:
|
||||
async with get_celery_session_maker()() as session:
|
||||
connector = await session.get(SearchSourceConnector, connector_id)
|
||||
if connector is None:
|
||||
logger.warning(
|
||||
"obsidian attachment task skipped: connector %s not found", connector_id
|
||||
)
|
||||
return
|
||||
|
||||
payload = NotePayload.model_validate(payload_data)
|
||||
await upsert_note(
|
||||
session,
|
||||
connector=connector,
|
||||
payload=payload,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
|
@ -469,3 +469,51 @@ class TestWireContractSmoke:
|
|||
assert stats_resp.vault_id == vault_id
|
||||
assert stats_resp.files_synced == 0
|
||||
assert stats_resp.last_sync_at is None
|
||||
|
||||
async def test_sync_queues_binary_attachments(
|
||||
self, db_session: AsyncSession, db_user: User, db_search_space: SearchSpace
|
||||
):
|
||||
vault_id = str(uuid.uuid4())
|
||||
await obsidian_connect(
|
||||
ConnectRequest(
|
||||
vault_id=vault_id,
|
||||
vault_name="Queue Vault",
|
||||
search_space_id=db_search_space.id,
|
||||
vault_fingerprint="fp-" + uuid.uuid4().hex,
|
||||
),
|
||||
user=db_user,
|
||||
session=db_session,
|
||||
)
|
||||
|
||||
fake_doc = type("FakeDoc", (), {"id": 12345})()
|
||||
binary_note = _make_note_payload(vault_id, "image.png", "hash-bin")
|
||||
binary_note.extension = "png"
|
||||
binary_note.is_binary = True
|
||||
binary_note.binary_base64 = "aGVsbG8="
|
||||
binary_note.content = ""
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.routes.obsidian_plugin_routes.upsert_note",
|
||||
new=AsyncMock(return_value=fake_doc),
|
||||
) as upsert_mock,
|
||||
patch("app.routes.obsidian_plugin_routes._queue_obsidian_attachment") as queue_mock,
|
||||
):
|
||||
sync_resp = await obsidian_sync(
|
||||
SyncBatchRequest(
|
||||
vault_id=vault_id,
|
||||
notes=[
|
||||
_make_note_payload(vault_id, "ok.md", "hash-ok"),
|
||||
binary_note,
|
||||
],
|
||||
),
|
||||
user=db_user,
|
||||
session=db_session,
|
||||
)
|
||||
|
||||
assert sync_resp.indexed == 2
|
||||
assert sync_resp.failed == 0
|
||||
statuses = {it.path: it.status for it in sync_resp.items}
|
||||
assert statuses == {"ok.md": "ok", "image.png": "queued"}
|
||||
assert upsert_mock.await_count == 1
|
||||
queue_mock.assert_called_once()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,18 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from datetime import UTC, datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from app.etl_pipeline.etl_document import EtlResult
|
||||
from app.schemas.obsidian_plugin import HeadingRef, NotePayload
|
||||
from app.services.obsidian_plugin_indexer import _build_metadata
|
||||
from app.services.obsidian_plugin_indexer import (
|
||||
_build_metadata,
|
||||
_extract_binary_attachment_markdown,
|
||||
_is_image_attachment,
|
||||
_require_extracted_attachment_content,
|
||||
)
|
||||
|
||||
|
||||
def test_build_metadata_serializes_headings_to_plain_json() -> None:
|
||||
|
|
@ -23,3 +32,130 @@ def test_build_metadata_serializes_headings_to_plain_json() -> None:
|
|||
metadata = _build_metadata(payload, vault_name="My Vault", connector_id=42)
|
||||
|
||||
assert metadata["headings"] == [{"heading": "Notes", "level": 1}]
|
||||
|
||||
|
||||
def test_build_metadata_marks_binary_attachment_fields() -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
mime_type="image/png",
|
||||
)
|
||||
|
||||
metadata = _build_metadata(payload, vault_name="My Vault", connector_id=42)
|
||||
|
||||
assert metadata["is_binary"] is True
|
||||
assert metadata["mime_type"] == "image/png"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_binary_attachment_markdown_handles_invalid_base64() -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/diagram.png",
|
||||
name="diagram",
|
||||
extension="png",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64="not-valid-base64!!",
|
||||
)
|
||||
|
||||
content, metadata = await _extract_binary_attachment_markdown(
|
||||
payload, vision_llm=None
|
||||
)
|
||||
|
||||
assert content == ""
|
||||
assert metadata["attachment_extraction_status"] == "invalid_binary_payload"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_binary_attachment_markdown_uses_etl(monkeypatch) -> None:
|
||||
now = datetime.now(UTC)
|
||||
payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/spec.pdf",
|
||||
name="spec",
|
||||
extension="pdf",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
binary_base64=base64.b64encode(b"%PDF-1.7 fake bytes").decode("ascii"),
|
||||
)
|
||||
|
||||
async def _fake_run_etl_extract( # noqa: ANN001
|
||||
*, file_path, filename, vision_llm
|
||||
):
|
||||
assert filename == "spec.pdf"
|
||||
assert file_path
|
||||
assert vision_llm is None
|
||||
return EtlResult(
|
||||
markdown_content="Extracted content",
|
||||
etl_service="TEST_ETL",
|
||||
content_type="document",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.services.obsidian_plugin_indexer._run_etl_extract",
|
||||
_fake_run_etl_extract,
|
||||
)
|
||||
|
||||
content, metadata = await _extract_binary_attachment_markdown(
|
||||
payload, vision_llm=None
|
||||
)
|
||||
|
||||
assert content == "Extracted content"
|
||||
assert metadata["attachment_extraction_status"] == "ok"
|
||||
assert metadata["attachment_etl_service"] == "TEST_ETL"
|
||||
|
||||
|
||||
def test_is_image_attachment_detects_image_extensions() -> None:
|
||||
now = datetime.now(UTC)
|
||||
image_payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/screenshot.PNG",
|
||||
name="screenshot",
|
||||
extension="PNG",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
)
|
||||
pdf_payload = NotePayload(
|
||||
vault_id="vault-1",
|
||||
path="assets/spec.pdf",
|
||||
name="spec",
|
||||
extension="pdf",
|
||||
content="",
|
||||
content_hash="abc123",
|
||||
mtime=now,
|
||||
ctime=now,
|
||||
is_binary=True,
|
||||
)
|
||||
|
||||
assert _is_image_attachment(image_payload) is True
|
||||
assert _is_image_attachment(pdf_payload) is False
|
||||
|
||||
|
||||
def test_require_extracted_attachment_content_rejects_empty_content() -> None:
|
||||
with pytest.raises(
|
||||
RuntimeError, match="Attachment extraction failed for assets/img.png"
|
||||
):
|
||||
_require_extracted_attachment_content(
|
||||
content=" ",
|
||||
etl_meta={"attachment_extraction_status": "etl_failed"},
|
||||
path="assets/img.png",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ export interface ManifestResponse {
|
|||
/** Per-item ack shapes — mirror `app/schemas/obsidian_plugin.py` 1:1. */
|
||||
export interface SyncAckItem {
|
||||
path: string;
|
||||
status: "ok" | "error";
|
||||
status: "ok" | "queued" | "error";
|
||||
document_id?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ const PluginStats: FC<{ config: Record<string, unknown> }> = ({ config }) => {
|
|||
const tileRows = useMemo(() => {
|
||||
const placeholder = statsError ? "—" : stats ? null : "…";
|
||||
return [
|
||||
{ label: "Vault", value: (config.vault_name as string) || "—" },
|
||||
{ label: "Vault name", value: (config.vault_name as string) || "—" },
|
||||
{
|
||||
label: "Last sync",
|
||||
value: placeholder ?? formatTimestamp(stats?.last_sync_at ?? null),
|
||||
|
|
@ -134,7 +134,7 @@ const PluginStats: FC<{ config: Record<string, unknown> }> = ({ config }) => {
|
|||
</Alert>
|
||||
|
||||
<div className="rounded-xl bg-slate-400/5 p-3 sm:p-6 dark:bg-white/5">
|
||||
<h3 className="mb-3 text-sm font-medium sm:text-base">Vault status</h3>
|
||||
<h3 className="mb-3 text-sm font-medium sm:text-base">Vault Status</h3>
|
||||
<dl className="grid grid-cols-1 gap-3 sm:grid-cols-2">
|
||||
{tileRows.map((stat) => (
|
||||
<div key={stat.label} className="rounded-lg bg-background/50 p-3">
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"use client";
|
||||
|
||||
import { useAtomValue } from "jotai";
|
||||
import { ArrowLeft, Info, RefreshCw, Trash2 } from "lucide-react";
|
||||
import { ArrowLeft, Info, RefreshCw } from "lucide-react";
|
||||
import { type FC, useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
|
||||
|
|
@ -19,6 +19,14 @@ import { VisionLLMConfig } from "../../components/vision-llm-config";
|
|||
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
||||
import { getConnectorConfigComponent } from "../index";
|
||||
|
||||
const VISION_LLM_CONNECTOR_TYPES = new Set<SearchSourceConnector["connector_type"]>([
|
||||
EnumConnectorName.GOOGLE_DRIVE_CONNECTOR,
|
||||
EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR,
|
||||
EnumConnectorName.DROPBOX_CONNECTOR,
|
||||
EnumConnectorName.ONEDRIVE_CONNECTOR,
|
||||
EnumConnectorName.OBSIDIAN_CONNECTOR,
|
||||
]);
|
||||
|
||||
const REAUTH_ENDPOINTS: Partial<Record<string, string>> = {
|
||||
[EnumConnectorName.LINEAR_CONNECTOR]: "/api/v1/auth/linear/connector/reauth",
|
||||
[EnumConnectorName.NOTION_CONNECTOR]: "/api/v1/auth/notion/connector/reauth",
|
||||
|
|
@ -87,10 +95,9 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
const isAuthExpired = connector.config?.auth_expired === true;
|
||||
const reauthEndpoint = REAUTH_ENDPOINTS[connector.connector_type];
|
||||
const [reauthing, setReauthing] = useState(false);
|
||||
// Obsidian is plugin-driven: name + config are owned by the plugin, so
|
||||
// the web edit view has nothing the user can persist back. Hide Save
|
||||
// (and re-auth, which Obsidian never uses) entirely for that type.
|
||||
const isPluginManagedReadOnly = connector.connector_type === EnumConnectorName.OBSIDIAN_CONNECTOR;
|
||||
const supportsVisionLlm = VISION_LLM_CONNECTOR_TYPES.has(connector.connector_type);
|
||||
const showsAiToggles =
|
||||
connector.is_indexable || connector.connector_type === EnumConnectorName.OBSIDIAN_CONNECTOR;
|
||||
|
||||
const handleReauth = useCallback(async () => {
|
||||
const spaceId = searchSpaceId ?? searchSpaceIdAtom;
|
||||
|
|
@ -275,25 +282,23 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
/>
|
||||
)}
|
||||
|
||||
{/* Summary and sync settings - only shown for indexable connectors */}
|
||||
{connector.is_indexable && (
|
||||
{/* Summary + vision toggles (Obsidian is plugin-push, non-indexable by design) */}
|
||||
{showsAiToggles && (
|
||||
<>
|
||||
{/* AI Summary toggle */}
|
||||
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
||||
|
||||
{/* Vision LLM toggle - only for file-based connectors */}
|
||||
{(connector.connector_type === "GOOGLE_DRIVE_CONNECTOR" ||
|
||||
connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
|
||||
connector.connector_type === "DROPBOX_CONNECTOR" ||
|
||||
connector.connector_type === "ONEDRIVE_CONNECTOR") && (
|
||||
{/* Vision LLM toggle for file/attachment connectors */}
|
||||
{supportsVisionLlm && (
|
||||
<VisionLLMConfig
|
||||
enabled={enableVisionLlm}
|
||||
onEnabledChange={onEnableVisionLlmChange}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
||||
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
{/* Date-range and periodic sync stay indexable-only */}
|
||||
{connector.is_indexable &&
|
||||
connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||
connector.connector_type !== "DROPBOX_CONNECTOR" &&
|
||||
connector.connector_type !== "ONEDRIVE_CONNECTOR" &&
|
||||
|
|
@ -313,37 +318,40 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
/>
|
||||
)}
|
||||
|
||||
{(() => {
|
||||
const isGoogleDrive = connector.connector_type === "GOOGLE_DRIVE_CONNECTOR";
|
||||
const isComposioGoogleDrive =
|
||||
connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR";
|
||||
const requiresFolderSelection = isGoogleDrive || isComposioGoogleDrive;
|
||||
const selectedFolders =
|
||||
(connector.config?.selected_folders as
|
||||
| Array<{ id: string; name: string }>
|
||||
| undefined) || [];
|
||||
const selectedFiles =
|
||||
(connector.config?.selected_files as
|
||||
| Array<{ id: string; name: string }>
|
||||
| undefined) || [];
|
||||
const hasItemsSelected = selectedFolders.length > 0 || selectedFiles.length > 0;
|
||||
const isDisabled = requiresFolderSelection && !hasItemsSelected;
|
||||
{connector.is_indexable &&
|
||||
(() => {
|
||||
const isGoogleDrive =
|
||||
connector.connector_type === "GOOGLE_DRIVE_CONNECTOR";
|
||||
const isComposioGoogleDrive =
|
||||
connector.connector_type === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR";
|
||||
const requiresFolderSelection = isGoogleDrive || isComposioGoogleDrive;
|
||||
const selectedFolders =
|
||||
(connector.config?.selected_folders as
|
||||
| Array<{ id: string; name: string }>
|
||||
| undefined) || [];
|
||||
const selectedFiles =
|
||||
(connector.config?.selected_files as
|
||||
| Array<{ id: string; name: string }>
|
||||
| undefined) || [];
|
||||
const hasItemsSelected =
|
||||
selectedFolders.length > 0 || selectedFiles.length > 0;
|
||||
const isDisabled = requiresFolderSelection && !hasItemsSelected;
|
||||
|
||||
return (
|
||||
<PeriodicSyncConfig
|
||||
enabled={periodicEnabled}
|
||||
frequencyMinutes={frequencyMinutes}
|
||||
onEnabledChange={onPeriodicEnabledChange}
|
||||
onFrequencyChange={onFrequencyChange}
|
||||
disabled={isDisabled}
|
||||
disabledMessage={
|
||||
isDisabled
|
||||
? "Select at least one folder or file above to enable periodic sync"
|
||||
: undefined
|
||||
}
|
||||
/>
|
||||
);
|
||||
})()}
|
||||
return (
|
||||
<PeriodicSyncConfig
|
||||
enabled={periodicEnabled}
|
||||
frequencyMinutes={frequencyMinutes}
|
||||
onEnabledChange={onPeriodicEnabledChange}
|
||||
onFrequencyChange={onFrequencyChange}
|
||||
disabled={isDisabled}
|
||||
disabledMessage={
|
||||
isDisabled
|
||||
? "Select at least one folder or file above to enable periodic sync"
|
||||
: undefined
|
||||
}
|
||||
/>
|
||||
);
|
||||
})()}
|
||||
</>
|
||||
)}
|
||||
|
||||
|
|
@ -412,11 +420,10 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
disabled={isSaving || isDisconnecting}
|
||||
className="text-xs sm:text-sm flex-1 sm:flex-initial h-12 sm:h-auto py-3 sm:py-2"
|
||||
>
|
||||
<Trash2 className="mr-2 h-4 w-4" />
|
||||
Disconnect
|
||||
</Button>
|
||||
)}
|
||||
{isPluginManagedReadOnly ? null : isAuthExpired && reauthEndpoint ? (
|
||||
{isAuthExpired && reauthEndpoint ? (
|
||||
<Button
|
||||
onClick={handleReauth}
|
||||
disabled={reauthing || isDisconnecting}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { ArrowLeft, Check, Info } from "lucide-react";
|
|||
import { type FC, useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Spinner } from "@/components/ui/spinner";
|
||||
import { EnumConnectorName } from "@/contracts/enums/connector";
|
||||
import type { SearchSourceConnector } from "@/contracts/types/connector.types";
|
||||
import { getConnectorTypeDisplay } from "@/lib/connectors/utils";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
|
@ -15,6 +16,14 @@ import type { IndexingConfigState } from "../../constants/connector-constants";
|
|||
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
|
||||
import { getConnectorConfigComponent } from "../index";
|
||||
|
||||
const VISION_LLM_CONNECTOR_TYPES = new Set<string>([
|
||||
"GOOGLE_DRIVE_CONNECTOR",
|
||||
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
|
||||
"DROPBOX_CONNECTOR",
|
||||
"ONEDRIVE_CONNECTOR",
|
||||
"OBSIDIAN_CONNECTOR",
|
||||
]);
|
||||
|
||||
interface IndexingConfigurationViewProps {
|
||||
config: IndexingConfigState;
|
||||
connector?: SearchSourceConnector;
|
||||
|
|
@ -63,6 +72,9 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
() => (connector ? getConnectorConfigComponent(connector.connector_type) : null),
|
||||
[connector]
|
||||
);
|
||||
const showsAiToggles =
|
||||
(connector?.is_indexable ?? false) ||
|
||||
connector?.connector_type === EnumConnectorName.OBSIDIAN_CONNECTOR;
|
||||
const [isScrolled, setIsScrolled] = useState(false);
|
||||
const [hasMoreContent, setHasMoreContent] = useState(false);
|
||||
const scrollContainerRef = useRef<HTMLDivElement>(null);
|
||||
|
|
@ -157,25 +169,23 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
<ConnectorConfigComponent connector={connector} onConfigChange={onConfigChange} />
|
||||
)}
|
||||
|
||||
{/* Summary and sync settings - only shown for indexable connectors */}
|
||||
{connector?.is_indexable && (
|
||||
{/* Summary + vision toggles (Obsidian is plugin-push, non-indexable by design) */}
|
||||
{showsAiToggles && (
|
||||
<>
|
||||
{/* AI Summary toggle */}
|
||||
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
|
||||
|
||||
{/* Vision LLM toggle - only for file-based connectors */}
|
||||
{(config.connectorType === "GOOGLE_DRIVE_CONNECTOR" ||
|
||||
config.connectorType === "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" ||
|
||||
config.connectorType === "DROPBOX_CONNECTOR" ||
|
||||
config.connectorType === "ONEDRIVE_CONNECTOR") && (
|
||||
{/* Vision LLM toggle for file/attachment connectors */}
|
||||
{VISION_LLM_CONNECTOR_TYPES.has(config.connectorType) && (
|
||||
<VisionLLMConfig
|
||||
enabled={enableVisionLlm}
|
||||
onEnabledChange={onEnableVisionLlmChange}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Date range selector - not shown for file-based connectors (Drive, Dropbox, OneDrive), Webcrawler, GitHub, or Local Folder */}
|
||||
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
{/* Date-range and periodic sync stay indexable-only */}
|
||||
{connector?.is_indexable &&
|
||||
config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||
config.connectorType !== "DROPBOX_CONNECTOR" &&
|
||||
config.connectorType !== "ONEDRIVE_CONNECTOR" &&
|
||||
|
|
@ -195,7 +205,8 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
/>
|
||||
)}
|
||||
|
||||
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
{connector?.is_indexable &&
|
||||
config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
|
||||
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&
|
||||
config.connectorType !== "DROPBOX_CONNECTOR" &&
|
||||
config.connectorType !== "ONEDRIVE_CONNECTOR" && (
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue