refactor: remove legacy Obsidian connector support

This commit is contained in:
Anish Sarkar 2026-04-22 00:10:24 +05:30
parent 16ea8e2401
commit 99623a85d5
10 changed files with 44 additions and 1046 deletions

View file

@ -152,7 +152,6 @@ celery_app.conf.update(
"index_elasticsearch_documents": {"queue": CONNECTORS_QUEUE},
"index_crawled_urls": {"queue": CONNECTORS_QUEUE},
"index_bookstack_pages": {"queue": CONNECTORS_QUEUE},
"index_obsidian_vault": {"queue": CONNECTORS_QUEUE},
"index_composio_connector": {"queue": CONNECTORS_QUEUE},
# Everything else (document processing, podcasts, reindexing,
# schedule checker, cleanup) stays on the default fast queue.

View file

@ -1157,25 +1157,6 @@ async def index_connector_content(
)
response_message = "Web page indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR:
from app.config import config as app_config
from app.tasks.celery_tasks.connector_tasks import index_obsidian_vault_task
# Obsidian connector only available in self-hosted mode
if not app_config.is_self_hosted():
raise HTTPException(
status_code=400,
detail="Obsidian connector is only available in self-hosted mode",
)
logger.info(
f"Triggering Obsidian vault indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
index_obsidian_vault_task.delay(
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
)
response_message = "Obsidian vault indexing started in the background."
elif (
connector.connector_type
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
@ -3048,59 +3029,6 @@ async def run_bookstack_indexing(
)
# Add new helper functions for Obsidian indexing
async def run_obsidian_indexing_with_new_session(
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Wrapper to run Obsidian indexing with its own database session."""
logger.info(
f"Background task started: Indexing Obsidian connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
)
async with async_session_maker() as session:
await run_obsidian_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
logger.info(f"Background task finished: Indexing Obsidian connector {connector_id}")
async def run_obsidian_indexing(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""
Background task to run Obsidian vault indexing.
Args:
session: Database session
connector_id: ID of the Obsidian connector
search_space_id: ID of the search space
user_id: ID of the user
start_date: Start date for indexing
end_date: End date for indexing
"""
from app.tasks.connector_indexers import index_obsidian_vault
await _run_indexing_with_notifications(
session=session,
connector_id=connector_id,
search_space_id=search_space_id,
user_id=user_id,
start_date=start_date,
end_date=end_date,
indexing_function=index_obsidian_vault,
update_timestamp_func=_update_connector_timestamp_by_id,
supports_heartbeat_callback=True,
)
async def run_composio_indexing_with_new_session(
connector_id: int,
search_space_id: int,

View file

@ -1,59 +0,0 @@
"""
Obsidian Connector Credentials Schema.
Obsidian is a local-first note-taking app that stores notes as markdown files.
This connector supports indexing from local file system (self-hosted only).
"""
from pydantic import BaseModel, field_validator
class ObsidianAuthCredentialsBase(BaseModel):
"""
Credentials/configuration for the Obsidian connector.
Since Obsidian vaults are local directories, this schema primarily
holds the vault path and configuration options rather than API tokens.
"""
vault_path: str
vault_name: str | None = None
exclude_folders: list[str] | None = None
include_attachments: bool = False
@field_validator("vault_path")
@classmethod
def validate_vault_path(cls, v: str) -> str:
"""Ensure vault path is provided and stripped of whitespace."""
if not v or not v.strip():
raise ValueError("Vault path is required")
return v.strip()
@field_validator("exclude_folders", mode="before")
@classmethod
def parse_exclude_folders(cls, v):
"""Parse exclude_folders from string if needed."""
if v is None:
return [".trash", ".obsidian", "templates"]
if isinstance(v, str):
return [f.strip() for f in v.split(",") if f.strip()]
return v
def to_dict(self) -> dict:
"""Convert credentials to dictionary for storage."""
return {
"vault_path": self.vault_path,
"vault_name": self.vault_name,
"exclude_folders": self.exclude_folders,
"include_attachments": self.include_attachments,
}
@classmethod
def from_dict(cls, data: dict) -> "ObsidianAuthCredentialsBase":
"""Create credentials from dictionary."""
return cls(
vault_path=data.get("vault_path", ""),
vault_name=data.get("vault_name"),
exclude_folders=data.get("exclude_folders"),
include_attachments=data.get("include_attachments", False),
)

View file

@ -883,49 +883,6 @@ async def _index_bookstack_pages(
)
@celery_app.task(name="index_obsidian_vault", bind=True)
def index_obsidian_vault_task(
self,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Celery task to index Obsidian vault notes."""
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(
_index_obsidian_vault(
connector_id, search_space_id, user_id, start_date, end_date
)
)
finally:
loop.close()
async def _index_obsidian_vault(
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Index Obsidian vault with new session."""
from app.routes.search_source_connectors_routes import (
run_obsidian_indexing,
)
async with get_celery_session_maker()() as session:
await run_obsidian_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
@celery_app.task(name="index_composio_connector", bind=True)
def index_composio_connector_task(
self,

View file

@ -46,7 +46,6 @@ from .linear_indexer import index_linear_issues
# Documentation and knowledge management
from .luma_indexer import index_luma_events
from .notion_indexer import index_notion_pages
from .obsidian_indexer import index_obsidian_vault
from .slack_indexer import index_slack_messages
from .webcrawler_indexer import index_crawled_urls
@ -69,7 +68,6 @@ __all__ = [ # noqa: RUF022
"index_linear_issues",
# Documentation and knowledge management
"index_notion_pages",
"index_obsidian_vault",
"index_crawled_urls",
# Communication platforms
"index_slack_messages",

View file

@ -1,676 +0,0 @@
"""
Obsidian connector indexer.
Indexes markdown notes from a local Obsidian vault.
This connector is only available in self-hosted mode.
Implements 2-phase document status updates for real-time UI feedback:
- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
- Phase 2: Process each document: pending processing ready/failed
"""
import os
import re
import time
from collections.abc import Awaitable, Callable
from datetime import UTC, datetime
from pathlib import Path
import yaml
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import (
create_document_chunks,
embed_text,
generate_content_hash,
generate_document_summary,
generate_unique_identifier_hash,
)
from .base import (
build_document_metadata_string,
check_document_by_unique_identifier,
check_duplicate_document_by_hash,
get_connector_by_id,
get_current_timestamp,
logger,
safe_set_chunks,
update_connector_last_indexed,
)
# Type hint for heartbeat callback
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
# Heartbeat interval in seconds
HEARTBEAT_INTERVAL_SECONDS = 30
def parse_frontmatter(content: str) -> tuple[dict | None, str]:
"""
Parse YAML frontmatter from markdown content.
Args:
content: The full markdown content
Returns:
Tuple of (frontmatter dict or None, content without frontmatter)
"""
if not content.startswith("---"):
return None, content
# Find the closing ---
end_match = re.search(r"\n---\n", content[3:])
if not end_match:
return None, content
frontmatter_str = content[3 : end_match.start() + 3]
remaining_content = content[end_match.end() + 3 :]
try:
frontmatter = yaml.safe_load(frontmatter_str)
return frontmatter, remaining_content.strip()
except yaml.YAMLError:
return None, content
def extract_wiki_links(content: str) -> list[str]:
"""
Extract [[wiki-style links]] from content.
Args:
content: Markdown content
Returns:
List of linked note names
"""
# Match [[link]] or [[link|alias]]
pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]"
matches = re.findall(pattern, content)
return list(set(matches))
def extract_tags(content: str) -> list[str]:
"""
Extract #tags from content (both inline and frontmatter).
Args:
content: Markdown content
Returns:
List of tags (without # prefix)
"""
# Match #tag but not ## headers
pattern = r"(?<!\S)#([a-zA-Z][a-zA-Z0-9_/-]*)"
matches = re.findall(pattern, content)
return list(set(matches))
def scan_vault(
vault_path: str,
exclude_folders: list[str] | None = None,
) -> list[dict]:
"""
Scan an Obsidian vault for markdown files.
Args:
vault_path: Path to the Obsidian vault
exclude_folders: List of folder names to exclude
Returns:
List of file info dicts with path, name, modified time
"""
if exclude_folders is None:
exclude_folders = [".trash", ".obsidian", "templates"]
vault = Path(vault_path)
if not vault.exists():
raise ValueError(f"Vault path does not exist: {vault_path}")
files = []
for md_file in vault.rglob("*.md"):
# Check if file is in an excluded folder
relative_path = md_file.relative_to(vault)
parts = relative_path.parts
if any(excluded in parts for excluded in exclude_folders):
continue
try:
stat = md_file.stat()
files.append(
{
"path": str(md_file),
"relative_path": str(relative_path),
"name": md_file.stem,
"modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC),
"created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC),
"size": stat.st_size,
}
)
except OSError as e:
logger.warning(f"Could not stat file {md_file}: {e}")
return files
async def index_obsidian_vault(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str | None = None,
end_date: str | None = None,
update_last_indexed: bool = True,
on_heartbeat_callback: HeartbeatCallbackType | None = None,
) -> tuple[int, str | None]:
"""
Index notes from a local Obsidian vault.
This indexer is only available in self-hosted mode as it requires
direct file system access to the user's Obsidian vault.
Args:
session: Database session
connector_id: ID of the Obsidian connector
search_space_id: ID of the search space to store documents in
user_id: ID of the user
start_date: Start date for filtering (YYYY-MM-DD format) - optional
end_date: End date for filtering (YYYY-MM-DD format) - optional
update_last_indexed: Whether to update the last_indexed_at timestamp
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
Returns:
Tuple containing (number of documents indexed, error message or None)
"""
task_logger = TaskLoggingService(session, search_space_id)
# Check if self-hosted mode
if not config.is_self_hosted():
return 0, "Obsidian connector is only available in self-hosted mode"
# Log task start
log_entry = await task_logger.log_task_start(
task_name="obsidian_vault_indexing",
source="connector_indexing_task",
message=f"Starting Obsidian vault indexing for connector {connector_id}",
metadata={
"connector_id": connector_id,
"user_id": str(user_id),
"start_date": start_date,
"end_date": end_date,
},
)
try:
# Get the connector
await task_logger.log_task_progress(
log_entry,
f"Retrieving Obsidian connector {connector_id} from database",
{"stage": "connector_retrieval"},
)
connector = await get_connector_by_id(
session, connector_id, SearchSourceConnectorType.OBSIDIAN_CONNECTOR
)
if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
"Connector not found",
{"error_type": "ConnectorNotFound"},
)
return (
0,
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
)
# Get vault path from connector config
vault_path = connector.config.get("vault_path")
if not vault_path:
await task_logger.log_task_failure(
log_entry,
"Vault path not configured for this connector",
"Missing vault path",
{"error_type": "MissingVaultPath"},
)
return 0, "Vault path not configured for this connector"
# Validate vault path exists
if not os.path.exists(vault_path):
await task_logger.log_task_failure(
log_entry,
f"Vault path does not exist: {vault_path}",
"Vault path not found",
{"error_type": "VaultNotFound", "vault_path": vault_path},
)
return 0, f"Vault path does not exist: {vault_path}"
# Get configuration options
exclude_folders = connector.config.get(
"exclude_folders", [".trash", ".obsidian", "templates"]
)
vault_name = connector.config.get("vault_name") or os.path.basename(vault_path)
await task_logger.log_task_progress(
log_entry,
f"Scanning Obsidian vault: {vault_name}",
{"stage": "vault_scan", "vault_path": vault_path},
)
# Scan vault for markdown files
try:
files = scan_vault(vault_path, exclude_folders)
except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to scan vault: {e}",
"Vault scan error",
{"error_type": "VaultScanError"},
)
return 0, f"Failed to scan vault: {e}"
logger.info(f"Found {len(files)} markdown files in vault")
await task_logger.log_task_progress(
log_entry,
f"Found {len(files)} markdown files to process",
{"stage": "files_discovered", "file_count": len(files)},
)
# Filter by date if provided (handle "undefined" string from frontend)
# Also handle inverted dates (start > end) by skipping filtering
start_dt = None
end_dt = None
if start_date and start_date != "undefined":
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
if end_date and end_date != "undefined":
# Make end_date inclusive (end of day)
end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
end_dt = end_dt.replace(hour=23, minute=59, second=59)
# Only apply date filtering if dates are valid and in correct order
if start_dt and end_dt and start_dt > end_dt:
logger.warning(
f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
)
else:
if start_dt:
files = [f for f in files if f["modified_at"] >= start_dt]
logger.info(
f"After start_date filter ({start_date}): {len(files)} files"
)
if end_dt:
files = [f for f in files if f["modified_at"] <= end_dt]
logger.info(f"After end_date filter ({end_date}): {len(files)} files")
logger.info(f"Processing {len(files)} files after date filtering")
indexed_count = 0
skipped_count = 0
failed_count = 0
duplicate_content_count = 0
# Heartbeat tracking - update notification periodically to prevent appearing stuck
last_heartbeat_time = time.time()
# =======================================================================
# PHASE 1: Analyze all files, create pending documents
# This makes ALL documents visible in the UI immediately with pending status
# =======================================================================
files_to_process = [] # List of dicts with document and file data
new_documents_created = False
for file_info in files:
try:
file_path = file_info["path"]
relative_path = file_info["relative_path"]
# Read file content
try:
with open(file_path, encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
logger.warning(f"Could not decode file {file_path}, skipping")
skipped_count += 1
continue
if not content.strip():
logger.debug(f"Empty file {file_path}, skipping")
skipped_count += 1
continue
# Parse frontmatter and extract metadata
frontmatter, body_content = parse_frontmatter(content)
wiki_links = extract_wiki_links(content)
tags = extract_tags(content)
# Get title from frontmatter or filename
title = file_info["name"]
if frontmatter:
title = frontmatter.get("title", title)
# Also extract tags from frontmatter
fm_tags = frontmatter.get("tags", [])
if isinstance(fm_tags, list):
tags = list({*tags, *fm_tags})
elif isinstance(fm_tags, str):
tags = list({*tags, fm_tags})
# Generate unique identifier using vault name and relative path
unique_identifier = f"{vault_name}:{relative_path}"
unique_identifier_hash = generate_unique_identifier_hash(
DocumentType.OBSIDIAN_CONNECTOR,
unique_identifier,
search_space_id,
)
# Generate content hash
content_hash = generate_content_hash(content, search_space_id)
# Check for existing document
existing_document = await check_document_by_unique_identifier(
session, unique_identifier_hash
)
if existing_document:
# Document exists - check if content has changed
if existing_document.content_hash == content_hash:
# Ensure status is ready (might have been stuck in processing/pending)
if not DocumentStatus.is_state(
existing_document.status, DocumentStatus.READY
):
existing_document.status = DocumentStatus.ready()
logger.debug(f"Note {title} unchanged, skipping")
skipped_count += 1
continue
# Queue existing document for update (will be set to processing in Phase 2)
files_to_process.append(
{
"document": existing_document,
"is_new": False,
"file_info": file_info,
"content": content,
"body_content": body_content,
"frontmatter": frontmatter,
"wiki_links": wiki_links,
"tags": tags,
"title": title,
"relative_path": relative_path,
"content_hash": content_hash,
"unique_identifier_hash": unique_identifier_hash,
}
)
continue
# Document doesn't exist by unique_identifier_hash
# Check if a document with the same content_hash exists (from another connector)
with session.no_autoflush:
duplicate_by_content = await check_duplicate_document_by_hash(
session, content_hash
)
if duplicate_by_content:
logger.info(
f"Obsidian note {title} already indexed by another connector "
f"(existing document ID: {duplicate_by_content.id}, "
f"type: {duplicate_by_content.document_type}). Skipping."
)
duplicate_content_count += 1
skipped_count += 1
continue
# Create new document with PENDING status (visible in UI immediately)
document = Document(
search_space_id=search_space_id,
title=title,
document_type=DocumentType.OBSIDIAN_CONNECTOR,
document_metadata={
"vault_name": vault_name,
"file_path": relative_path,
"connector_id": connector_id,
},
content="Pending...", # Placeholder until processed
content_hash=unique_identifier_hash, # Temporary unique value - updated when ready
unique_identifier_hash=unique_identifier_hash,
embedding=None,
chunks=[], # Empty at creation - safe for async
status=DocumentStatus.pending(), # Pending until processing starts
updated_at=get_current_timestamp(),
created_by_id=user_id,
connector_id=connector_id,
)
session.add(document)
new_documents_created = True
files_to_process.append(
{
"document": document,
"is_new": True,
"file_info": file_info,
"content": content,
"body_content": body_content,
"frontmatter": frontmatter,
"wiki_links": wiki_links,
"tags": tags,
"title": title,
"relative_path": relative_path,
"content_hash": content_hash,
"unique_identifier_hash": unique_identifier_hash,
}
)
except Exception as e:
logger.exception(
f"Error in Phase 1 for file {file_info.get('path', 'unknown')}: {e}"
)
failed_count += 1
continue
# Commit all pending documents - they all appear in UI now
if new_documents_created:
logger.info(
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
)
await session.commit()
# =======================================================================
# PHASE 2: Process each document one by one
# Each document transitions: pending → processing → ready/failed
# =======================================================================
logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
# Get LLM for summarization
long_context_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
for item in files_to_process:
# Send heartbeat periodically
if on_heartbeat_callback:
current_time = time.time()
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
await on_heartbeat_callback(indexed_count)
last_heartbeat_time = current_time
document = item["document"]
try:
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
document.status = DocumentStatus.processing()
await session.commit()
# Extract data from item
title = item["title"]
relative_path = item["relative_path"]
content = item["content"]
body_content = item["body_content"]
frontmatter = item["frontmatter"]
wiki_links = item["wiki_links"]
tags = item["tags"]
content_hash = item["content_hash"]
file_info = item["file_info"]
# Build metadata
document_metadata = {
"vault_name": vault_name,
"file_path": relative_path,
"tags": tags,
"outgoing_links": wiki_links,
"frontmatter": frontmatter,
"modified_at": file_info["modified_at"].isoformat(),
"created_at": file_info["created_at"].isoformat(),
"word_count": len(body_content.split()),
}
# Build document content with metadata
metadata_sections = [
(
"METADATA",
[
f"Title: {title}",
f"Vault: {vault_name}",
f"Path: {relative_path}",
f"Tags: {', '.join(tags) if tags else 'None'}",
f"Links to: {', '.join(wiki_links) if wiki_links else 'None'}",
],
),
("CONTENT", [body_content]),
]
document_string = build_document_metadata_string(metadata_sections)
# Generate summary
summary_content = ""
if long_context_llm and connector.enable_summary:
summary_content, _ = await generate_document_summary(
document_string,
long_context_llm,
document_metadata,
)
# Generate embedding
embedding = embed_text(document_string)
# Add URL and summary to metadata
document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}"
document_metadata["summary"] = summary_content
document_metadata["connector_id"] = connector_id
# Create chunks
chunks = await create_document_chunks(document_string)
# Update document to READY with actual content
document.title = title
document.content = document_string
document.content_hash = content_hash
document.embedding = embedding
document.document_metadata = document_metadata
await safe_set_chunks(session, document, chunks)
document.updated_at = get_current_timestamp()
document.status = DocumentStatus.ready()
indexed_count += 1
# Batch commit every 10 documents (for ready status updates)
if indexed_count % 10 == 0:
logger.info(
f"Committing batch: {indexed_count} Obsidian notes processed so far"
)
await session.commit()
except Exception as e:
logger.exception(
f"Error processing file {item.get('file_info', {}).get('path', 'unknown')}: {e}"
)
# Mark document as failed with reason (visible in UI)
try:
document.status = DocumentStatus.failed(str(e))
document.updated_at = get_current_timestamp()
except Exception as status_error:
logger.error(
f"Failed to update document status to failed: {status_error}"
)
failed_count += 1
continue
# CRITICAL: Always update timestamp (even if 0 documents indexed) so Zero syncs
await update_connector_last_indexed(session, connector, update_last_indexed)
# Final commit for any remaining documents not yet committed in batches
logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
try:
await session.commit()
logger.info(
"Successfully committed all Obsidian document changes to database"
)
except Exception as e:
# Handle any remaining integrity errors gracefully (race conditions, etc.)
if (
"duplicate key value violates unique constraint" in str(e).lower()
or "uniqueviolationerror" in str(e).lower()
):
logger.warning(
f"Duplicate content_hash detected during final commit. "
f"This may occur if the same note was indexed by multiple connectors. "
f"Rolling back and continuing. Error: {e!s}"
)
await session.rollback()
# Don't fail the entire task - some documents may have been successfully indexed
else:
raise
# Build warning message if there were issues
warning_parts = []
if duplicate_content_count > 0:
warning_parts.append(f"{duplicate_content_count} duplicate")
if failed_count > 0:
warning_parts.append(f"{failed_count} failed")
warning_message = ", ".join(warning_parts) if warning_parts else None
total_processed = indexed_count
await task_logger.log_task_success(
log_entry,
f"Successfully completed Obsidian vault indexing for connector {connector_id}",
{
"notes_processed": total_processed,
"documents_indexed": indexed_count,
"documents_skipped": skipped_count,
"documents_failed": failed_count,
"duplicate_content_count": duplicate_content_count,
},
)
logger.info(
f"Obsidian vault indexing completed: {indexed_count} ready, "
f"{skipped_count} skipped, {failed_count} failed "
f"({duplicate_content_count} duplicate content)"
)
return total_processed, warning_message
except SQLAlchemyError as e:
logger.exception(f"Database error during Obsidian indexing: {e}")
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Obsidian indexing: {e}",
"Database error",
{"error_type": "SQLAlchemyError"},
)
return 0, f"Database error: {e}"
except Exception as e:
logger.exception(f"Error during Obsidian indexing: {e}")
await task_logger.log_task_failure(
log_entry,
f"Error during Obsidian indexing: {e}",
"Unexpected error",
{"error_type": type(e).__name__},
)
return 0, str(e)

View file

@ -34,7 +34,6 @@ CONNECTOR_TASK_MAP = {
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: "index_bookstack_pages",
SearchSourceConnectorType.OBSIDIAN_CONNECTOR: "index_obsidian_vault",
}
@ -100,7 +99,6 @@ def create_periodic_schedule(
index_linear_issues_task,
index_luma_events_task,
index_notion_pages_task,
index_obsidian_vault_task,
index_slack_messages_task,
)
@ -121,7 +119,6 @@ def create_periodic_schedule(
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: index_bookstack_pages_task,
SearchSourceConnectorType.OBSIDIAN_CONNECTOR: index_obsidian_vault_task,
}
# Trigger the first run immediately

View file

@ -1,15 +1,11 @@
"use client";
import { AlertTriangle, Download, Info } from "lucide-react";
import { Info } from "lucide-react";
import { type FC, useEffect, useMemo, useState } from "react";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import { connectorsApiService, type ObsidianStats } from "@/lib/apis/connectors-api.service";
import type { ConnectorConfigProps } from "../index";
const PLUGIN_RELEASES_URL =
"https://github.com/MODSetter/SurfSense/releases?q=obsidian&expanded=true";
function formatTimestamp(value: unknown): string {
if (typeof value !== "string" || !value) return "—";
const d = new Date(value);
@ -26,78 +22,17 @@ function formatTimestamp(value: unknown): string {
* web UI doesn't expose a Name input or a Save button for Obsidian (the
* latter is suppressed in `connector-edit-view.tsx`).
*
* Renders one of three modes depending on the connector's `config`:
*
* 1. **Plugin connector** (`config.source === "plugin"`) read-only stats
* panel showing what the plugin most recently reported.
* 2. **Legacy server-path connector** (`config.legacy === true`, set by the
* Phase 3 alembic) migration banner, an "Install Plugin" CTA, and a
* short "how to migrate" checklist that ends with the user pressing the
* standard Disconnect button (which deletes this connector along with
* every document it previously indexed).
* 3. **Unknown** fallback for rows that escaped the alembic; suggests a
* clean re-install.
* Renders plugin stats when connector metadata comes from the plugin.
* If metadata is missing or malformed, we show a recovery hint.
*/
export const ObsidianConfig: FC<ConnectorConfigProps> = ({ connector }) => {
const config = (connector.config ?? {}) as Record<string, unknown>;
const isLegacy = config.legacy === true;
const isPlugin = config.source === "plugin";
if (isLegacy) return <LegacyBanner />;
if (isPlugin) return <PluginStats config={config} />;
return <UnknownConnectorState />;
};
const LegacyBanner: FC = () => {
return (
<div className="space-y-4">
<Alert className="border-amber-500/40 bg-amber-500/10">
<AlertTriangle className="size-4 shrink-0 text-amber-500" />
<AlertTitle className="text-xs sm:text-sm">
Sync stopped install the plugin to migrate
</AlertTitle>
<AlertDescription className="text-[11px] sm:text-xs leading-relaxed">
This Obsidian connector used the legacy server-path scanner, which has been removed. The
notes already indexed remain searchable, but they no longer reflect changes made in your
vault.
</AlertDescription>
</Alert>
<a
href={PLUGIN_RELEASES_URL}
target="_blank"
rel="noopener noreferrer"
className="inline-flex"
>
<Button type="button" variant="outline" size="sm" className="gap-2">
<Download className="size-3.5" />
Install the plugin
</Button>
</a>
<div className="rounded-xl border border-border bg-slate-400/5 p-3 sm:p-6 dark:bg-white/5">
<h3 className="mb-3 text-sm font-medium sm:text-base">How to migrate</h3>
<ol className="list-decimal space-y-2 pl-5 text-[11px] leading-relaxed text-muted-foreground sm:text-xs">
<li>Install the SurfSense Obsidian plugin using the button above.</li>
<li>
In Obsidian, open Settings SurfSense, sign in, pick a search space, and wait for the
first sync to finish.
</li>
<li>
Confirm the new "Obsidian — &lt;vault&gt;" connector shows your notes, then return here
and use the Disconnect button below to remove this legacy connector.
</li>
</ol>
<p className="mt-3 text-[11px] leading-relaxed text-amber-600 dark:text-amber-400 sm:text-xs">
Heads up: Disconnect also deletes every document this connector previously indexed. Make
sure the plugin has finished its first sync before you disconnect, otherwise your Obsidian
notes will disappear from search until the plugin re-indexes them.
</p>
</div>
</div>
);
};
const PluginStats: FC<{ config: Record<string, unknown> }> = ({ config }) => {
const vaultId = typeof config.vault_id === "string" ? config.vault_id : null;
const [stats, setStats] = useState<ObsidianStats | null>(null);
@ -179,8 +114,8 @@ const UnknownConnectorState: FC = () => (
<Info className="size-4 shrink-0" />
<AlertTitle className="text-xs sm:text-sm">Unrecognized config</AlertTitle>
<AlertDescription className="text-[11px] sm:text-xs">
This connector has neither plugin metadata nor a legacy marker. It may predate the migration
you can safely delete it and re-install the SurfSense Obsidian plugin to resume syncing.
This connector is missing plugin metadata. Delete it, then reconnect your vault from the
SurfSense Obsidian plugin so sync can resume.
</AlertDescription>
</Alert>
);

View file

@ -111,7 +111,9 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
: getConnectorTypeDisplay(connectorType)}
</h2>
<p className="text-xs sm:text-base text-muted-foreground mt-1">
Enter your connection details
{connectorType === "OBSIDIAN_CONNECTOR"
? "Follow the plugin setup steps below"
: "Enter your connection details"}
</p>
</div>
</div>

View file

@ -1,143 +1,60 @@
---
title: Obsidian
description: Connect your Obsidian vault to SurfSense
description: Sync your Obsidian vault with the SurfSense plugin
---
# Obsidian Integration Setup Guide
# Obsidian Plugin Setup Guide
This guide walks you through connecting your Obsidian vault to SurfSense for note search and AI-powered insights.
<Callout type="warn">
This connector requires direct file system access and only works with self-hosted SurfSense installations.
</Callout>
SurfSense integrates with Obsidian through the SurfSense Obsidian plugin.
The old server-side vault path scanner is no longer supported.
## How it works
The Obsidian connector scans your local Obsidian vault directory and indexes all Markdown files. It preserves your note structure and extracts metadata from YAML frontmatter.
The plugin runs inside your Obsidian app and pushes note updates to SurfSense over HTTPS.
This works for cloud and self-hosted deployments, including desktop and mobile clients.
- For follow-up indexing runs, the connector uses content hashing to skip unchanged files for faster sync.
- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
---
## What Gets Indexed
## What gets indexed
| Content Type | Description |
|--------------|-------------|
| Markdown Files | All `.md` files in your vault |
| Frontmatter | YAML metadata (title, tags, aliases, dates) |
| Wiki Links | Links between notes (`[[note]]`) |
| Inline Tags | Tags throughout your notes (`#tag`) |
| Note Content | Full content with intelligent chunking |
| Markdown files | Note content (`.md`) |
| Frontmatter | YAML metadata like title, tags, aliases, dates |
| Wiki links | Linked notes (`[[note]]`) |
| Tags | Inline and frontmatter tags |
| Vault metadata | Vault and path metadata used for deep links and sync state |
<Callout type="warn">
Binary files and attachments are not indexed by default. Enable "Include Attachments" to index embedded files.
</Callout>
## Quick start
---
## Quick Start (Local Installation)
1. Navigate to **Connectors** → **Add Connector** → **Obsidian**
2. Enter your vault path: `/Users/yourname/Documents/MyVault`
3. Enter a vault name (e.g., `Personal Notes`)
4. Click **Connect Obsidian**
1. Open **Connectors** in SurfSense and choose **Obsidian**.
2. Click **Open plugin releases** and install the latest SurfSense Obsidian plugin.
3. In Obsidian, open **Settings → SurfSense**.
4. Paste your SurfSense API token from the connector setup panel.
5. Paste your SurfSense backend URL in the plugin's **Server URL** setting.
6. Choose the Search Space in the plugin, then run the first sync.
7. Confirm the connector appears as **Obsidian — <vault>** in SurfSense.
<Callout type="info">
Find your vault path: In Obsidian, right-click any note → "Reveal in Finder" (macOS) or "Show in Explorer" (Windows).
You do not create or configure a vault path in the web UI. The connector row is created automatically when the plugin calls `/api/v1/obsidian/connect`.
</Callout>
<Callout type="info" title="Periodic Sync">
Enable periodic sync to automatically re-index notes when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
</Callout>
## Self-hosted notes
---
## Docker Setup
For Docker deployments, you need to mount your Obsidian vault as a volume.
### Step 1: Update docker-compose.yml
Add your vault as a volume mount to the SurfSense backend service:
```yaml
services:
surfsense:
# ... other config
volumes:
- /path/to/your/obsidian/vault:/app/obsidian_vaults/my-vault:ro
```
<Callout type="info">
The `:ro` flag mounts the vault as read-only, which is recommended for security.
</Callout>
### Step 2: Configure the Connector
Use the **container path** (not your local path) when setting up the connector:
| Your Local Path | Container Path (use this) |
|-----------------|---------------------------|
| `/Users/john/Documents/MyVault` | `/app/obsidian_vaults/my-vault` |
| `C:\Users\john\Documents\MyVault` | `/app/obsidian_vaults/my-vault` |
### Example: Multiple Vaults
```yaml
volumes:
- /Users/john/Documents/PersonalNotes:/app/obsidian_vaults/personal:ro
- /Users/john/Documents/WorkNotes:/app/obsidian_vaults/work:ro
```
Then create separate connectors for each vault using `/app/obsidian_vaults/personal` and `/app/obsidian_vaults/work`.
---
## Connector Configuration
| Field | Description | Required |
|-------|-------------|----------|
| **Connector Name** | A friendly name to identify this connector | Yes |
| **Vault Path** | Absolute path to your vault (container path for Docker) | Yes |
| **Vault Name** | Display name for your vault in search results | Yes |
| **Exclude Folders** | Comma-separated folder names to skip | No |
| **Include Attachments** | Index embedded files (images, PDFs) | No |
---
## Recommended Exclusions
Common folders to exclude from indexing:
| Folder | Reason |
|--------|--------|
| `.obsidian` | Obsidian config files (always exclude) |
| `.trash` | Obsidian's trash folder |
| `templates` | Template files you don't want searchable |
| `daily-notes` | If you want to exclude daily notes |
| `attachments` | If not using "Include Attachments" |
Default exclusions: `.obsidian,.trash`
---
- Use your public or LAN backend URL that your Obsidian device can reach.
- No Docker bind mount for the vault is required.
- If your instance is behind TLS, ensure the URL/certificate is valid for the device running Obsidian.
## Troubleshooting
**Vault not found / Permission denied**
- Verify the path exists and is accessible
- For Docker: ensure the volume is mounted correctly in `docker-compose.yml`
- Check file permissions: SurfSense needs read access to the vault directory
**Plugin connects but no files appear**
- Verify the plugin is pointed to the correct Search Space.
- Trigger a manual sync from the plugin settings.
- Confirm your API token is valid and not expired.
**No notes indexed**
- Ensure your vault contains `.md` files
- Check that notes aren't in excluded folders
- Verify the path points to the vault root (contains `.obsidian` folder)
**Unauthorized / 401 errors**
- Regenerate and paste a fresh API token from SurfSense.
- Ensure the token belongs to the same account and workspace you are syncing into.
**Changes not appearing**
- Wait for the next sync cycle, or manually trigger re-indexing
- For Docker: restart the container if you modified volume mounts
**Docker: "path not found" error**
- Use the container path (`/app/obsidian_vaults/...`), not your local path
- Verify the volume mount in `docker-compose.yml` matches
**Cannot reach server URL**
- Check that the backend URL is reachable from the Obsidian device.
- For self-hosted setups, verify firewall and reverse proxy rules.
- Avoid using localhost unless SurfSense and Obsidian run on the same machine.