mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
refactor: remove legacy Obsidian connector support
This commit is contained in:
parent
16ea8e2401
commit
99623a85d5
10 changed files with 44 additions and 1046 deletions
|
|
@ -152,7 +152,6 @@ celery_app.conf.update(
|
|||
"index_elasticsearch_documents": {"queue": CONNECTORS_QUEUE},
|
||||
"index_crawled_urls": {"queue": CONNECTORS_QUEUE},
|
||||
"index_bookstack_pages": {"queue": CONNECTORS_QUEUE},
|
||||
"index_obsidian_vault": {"queue": CONNECTORS_QUEUE},
|
||||
"index_composio_connector": {"queue": CONNECTORS_QUEUE},
|
||||
# Everything else (document processing, podcasts, reindexing,
|
||||
# schedule checker, cleanup) stays on the default fast queue.
|
||||
|
|
|
|||
|
|
@ -1157,25 +1157,6 @@ async def index_connector_content(
|
|||
)
|
||||
response_message = "Web page indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR:
|
||||
from app.config import config as app_config
|
||||
from app.tasks.celery_tasks.connector_tasks import index_obsidian_vault_task
|
||||
|
||||
# Obsidian connector only available in self-hosted mode
|
||||
if not app_config.is_self_hosted():
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Obsidian connector is only available in self-hosted mode",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Triggering Obsidian vault indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
index_obsidian_vault_task.delay(
|
||||
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
|
||||
)
|
||||
response_message = "Obsidian vault indexing started in the background."
|
||||
|
||||
elif (
|
||||
connector.connector_type
|
||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
||||
|
|
@ -3048,59 +3029,6 @@ async def run_bookstack_indexing(
|
|||
)
|
||||
|
||||
|
||||
# Add new helper functions for Obsidian indexing
|
||||
async def run_obsidian_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Wrapper to run Obsidian indexing with its own database session."""
|
||||
logger.info(
|
||||
f"Background task started: Indexing Obsidian connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
|
||||
)
|
||||
async with async_session_maker() as session:
|
||||
await run_obsidian_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
logger.info(f"Background task finished: Indexing Obsidian connector {connector_id}")
|
||||
|
||||
|
||||
async def run_obsidian_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Background task to run Obsidian vault indexing.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Obsidian connector
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
start_date: Start date for indexing
|
||||
end_date: End date for indexing
|
||||
"""
|
||||
from app.tasks.connector_indexers import index_obsidian_vault
|
||||
|
||||
await _run_indexing_with_notifications(
|
||||
session=session,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
indexing_function=index_obsidian_vault,
|
||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||
supports_heartbeat_callback=True,
|
||||
)
|
||||
|
||||
|
||||
async def run_composio_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
"""
|
||||
Obsidian Connector Credentials Schema.
|
||||
|
||||
Obsidian is a local-first note-taking app that stores notes as markdown files.
|
||||
This connector supports indexing from local file system (self-hosted only).
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
|
||||
class ObsidianAuthCredentialsBase(BaseModel):
|
||||
"""
|
||||
Credentials/configuration for the Obsidian connector.
|
||||
|
||||
Since Obsidian vaults are local directories, this schema primarily
|
||||
holds the vault path and configuration options rather than API tokens.
|
||||
"""
|
||||
|
||||
vault_path: str
|
||||
vault_name: str | None = None
|
||||
exclude_folders: list[str] | None = None
|
||||
include_attachments: bool = False
|
||||
|
||||
@field_validator("vault_path")
|
||||
@classmethod
|
||||
def validate_vault_path(cls, v: str) -> str:
|
||||
"""Ensure vault path is provided and stripped of whitespace."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Vault path is required")
|
||||
return v.strip()
|
||||
|
||||
@field_validator("exclude_folders", mode="before")
|
||||
@classmethod
|
||||
def parse_exclude_folders(cls, v):
|
||||
"""Parse exclude_folders from string if needed."""
|
||||
if v is None:
|
||||
return [".trash", ".obsidian", "templates"]
|
||||
if isinstance(v, str):
|
||||
return [f.strip() for f in v.split(",") if f.strip()]
|
||||
return v
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert credentials to dictionary for storage."""
|
||||
return {
|
||||
"vault_path": self.vault_path,
|
||||
"vault_name": self.vault_name,
|
||||
"exclude_folders": self.exclude_folders,
|
||||
"include_attachments": self.include_attachments,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ObsidianAuthCredentialsBase":
|
||||
"""Create credentials from dictionary."""
|
||||
return cls(
|
||||
vault_path=data.get("vault_path", ""),
|
||||
vault_name=data.get("vault_name"),
|
||||
exclude_folders=data.get("exclude_folders"),
|
||||
include_attachments=data.get("include_attachments", False),
|
||||
)
|
||||
|
|
@ -883,49 +883,6 @@ async def _index_bookstack_pages(
|
|||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_obsidian_vault", bind=True)
|
||||
def index_obsidian_vault_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Celery task to index Obsidian vault notes."""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_obsidian_vault(
|
||||
connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_obsidian_vault(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Index Obsidian vault with new session."""
|
||||
from app.routes.search_source_connectors_routes import (
|
||||
run_obsidian_indexing,
|
||||
)
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await run_obsidian_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_composio_connector", bind=True)
|
||||
def index_composio_connector_task(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ from .linear_indexer import index_linear_issues
|
|||
# Documentation and knowledge management
|
||||
from .luma_indexer import index_luma_events
|
||||
from .notion_indexer import index_notion_pages
|
||||
from .obsidian_indexer import index_obsidian_vault
|
||||
from .slack_indexer import index_slack_messages
|
||||
from .webcrawler_indexer import index_crawled_urls
|
||||
|
||||
|
|
@ -69,7 +68,6 @@ __all__ = [ # noqa: RUF022
|
|||
"index_linear_issues",
|
||||
# Documentation and knowledge management
|
||||
"index_notion_pages",
|
||||
"index_obsidian_vault",
|
||||
"index_crawled_urls",
|
||||
# Communication platforms
|
||||
"index_slack_messages",
|
||||
|
|
|
|||
|
|
@ -1,676 +0,0 @@
|
|||
"""
|
||||
Obsidian connector indexer.
|
||||
|
||||
Indexes markdown notes from a local Obsidian vault.
|
||||
This connector is only available in self-hosted mode.
|
||||
|
||||
Implements 2-phase document status updates for real-time UI feedback:
|
||||
- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
|
||||
- Phase 2: Process each document: pending → processing → ready/failed
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import config
|
||||
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
embed_text,
|
||||
generate_content_hash,
|
||||
generate_document_summary,
|
||||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
from .base import (
|
||||
build_document_metadata_string,
|
||||
check_document_by_unique_identifier,
|
||||
check_duplicate_document_by_hash,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
safe_set_chunks,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
||||
# Type hint for heartbeat callback
|
||||
HeartbeatCallbackType = Callable[[int], Awaitable[None]]
|
||||
|
||||
# Heartbeat interval in seconds
|
||||
HEARTBEAT_INTERVAL_SECONDS = 30
|
||||
|
||||
|
||||
def parse_frontmatter(content: str) -> tuple[dict | None, str]:
|
||||
"""
|
||||
Parse YAML frontmatter from markdown content.
|
||||
|
||||
Args:
|
||||
content: The full markdown content
|
||||
|
||||
Returns:
|
||||
Tuple of (frontmatter dict or None, content without frontmatter)
|
||||
"""
|
||||
if not content.startswith("---"):
|
||||
return None, content
|
||||
|
||||
# Find the closing ---
|
||||
end_match = re.search(r"\n---\n", content[3:])
|
||||
if not end_match:
|
||||
return None, content
|
||||
|
||||
frontmatter_str = content[3 : end_match.start() + 3]
|
||||
remaining_content = content[end_match.end() + 3 :]
|
||||
|
||||
try:
|
||||
frontmatter = yaml.safe_load(frontmatter_str)
|
||||
return frontmatter, remaining_content.strip()
|
||||
except yaml.YAMLError:
|
||||
return None, content
|
||||
|
||||
|
||||
def extract_wiki_links(content: str) -> list[str]:
|
||||
"""
|
||||
Extract [[wiki-style links]] from content.
|
||||
|
||||
Args:
|
||||
content: Markdown content
|
||||
|
||||
Returns:
|
||||
List of linked note names
|
||||
"""
|
||||
# Match [[link]] or [[link|alias]]
|
||||
pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]"
|
||||
matches = re.findall(pattern, content)
|
||||
return list(set(matches))
|
||||
|
||||
|
||||
def extract_tags(content: str) -> list[str]:
|
||||
"""
|
||||
Extract #tags from content (both inline and frontmatter).
|
||||
|
||||
Args:
|
||||
content: Markdown content
|
||||
|
||||
Returns:
|
||||
List of tags (without # prefix)
|
||||
"""
|
||||
# Match #tag but not ## headers
|
||||
pattern = r"(?<!\S)#([a-zA-Z][a-zA-Z0-9_/-]*)"
|
||||
matches = re.findall(pattern, content)
|
||||
return list(set(matches))
|
||||
|
||||
|
||||
def scan_vault(
|
||||
vault_path: str,
|
||||
exclude_folders: list[str] | None = None,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Scan an Obsidian vault for markdown files.
|
||||
|
||||
Args:
|
||||
vault_path: Path to the Obsidian vault
|
||||
exclude_folders: List of folder names to exclude
|
||||
|
||||
Returns:
|
||||
List of file info dicts with path, name, modified time
|
||||
"""
|
||||
if exclude_folders is None:
|
||||
exclude_folders = [".trash", ".obsidian", "templates"]
|
||||
|
||||
vault = Path(vault_path)
|
||||
if not vault.exists():
|
||||
raise ValueError(f"Vault path does not exist: {vault_path}")
|
||||
|
||||
files = []
|
||||
for md_file in vault.rglob("*.md"):
|
||||
# Check if file is in an excluded folder
|
||||
relative_path = md_file.relative_to(vault)
|
||||
parts = relative_path.parts
|
||||
|
||||
if any(excluded in parts for excluded in exclude_folders):
|
||||
continue
|
||||
|
||||
try:
|
||||
stat = md_file.stat()
|
||||
files.append(
|
||||
{
|
||||
"path": str(md_file),
|
||||
"relative_path": str(relative_path),
|
||||
"name": md_file.stem,
|
||||
"modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC),
|
||||
"created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC),
|
||||
"size": stat.st_size,
|
||||
}
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not stat file {md_file}: {e}")
|
||||
|
||||
return files
|
||||
|
||||
|
||||
async def index_obsidian_vault(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
update_last_indexed: bool = True,
|
||||
on_heartbeat_callback: HeartbeatCallbackType | None = None,
|
||||
) -> tuple[int, str | None]:
|
||||
"""
|
||||
Index notes from a local Obsidian vault.
|
||||
|
||||
This indexer is only available in self-hosted mode as it requires
|
||||
direct file system access to the user's Obsidian vault.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Obsidian connector
|
||||
search_space_id: ID of the search space to store documents in
|
||||
user_id: ID of the user
|
||||
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
||||
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||
on_heartbeat_callback: Optional callback to update notification during long-running indexing.
|
||||
|
||||
Returns:
|
||||
Tuple containing (number of documents indexed, error message or None)
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
# Check if self-hosted mode
|
||||
if not config.is_self_hosted():
|
||||
return 0, "Obsidian connector is only available in self-hosted mode"
|
||||
|
||||
# Log task start
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="obsidian_vault_indexing",
|
||||
source="connector_indexing_task",
|
||||
message=f"Starting Obsidian vault indexing for connector {connector_id}",
|
||||
metadata={
|
||||
"connector_id": connector_id,
|
||||
"user_id": str(user_id),
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
# Get the connector
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Retrieving Obsidian connector {connector_id} from database",
|
||||
{"stage": "connector_retrieval"},
|
||||
)
|
||||
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.OBSIDIAN_CONNECTOR
|
||||
)
|
||||
|
||||
if not connector:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
|
||||
"Connector not found",
|
||||
{"error_type": "ConnectorNotFound"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
|
||||
)
|
||||
|
||||
# Get vault path from connector config
|
||||
vault_path = connector.config.get("vault_path")
|
||||
if not vault_path:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
"Vault path not configured for this connector",
|
||||
"Missing vault path",
|
||||
{"error_type": "MissingVaultPath"},
|
||||
)
|
||||
return 0, "Vault path not configured for this connector"
|
||||
|
||||
# Validate vault path exists
|
||||
if not os.path.exists(vault_path):
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Vault path does not exist: {vault_path}",
|
||||
"Vault path not found",
|
||||
{"error_type": "VaultNotFound", "vault_path": vault_path},
|
||||
)
|
||||
return 0, f"Vault path does not exist: {vault_path}"
|
||||
|
||||
# Get configuration options
|
||||
exclude_folders = connector.config.get(
|
||||
"exclude_folders", [".trash", ".obsidian", "templates"]
|
||||
)
|
||||
vault_name = connector.config.get("vault_name") or os.path.basename(vault_path)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Scanning Obsidian vault: {vault_name}",
|
||||
{"stage": "vault_scan", "vault_path": vault_path},
|
||||
)
|
||||
|
||||
# Scan vault for markdown files
|
||||
try:
|
||||
files = scan_vault(vault_path, exclude_folders)
|
||||
except Exception as e:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to scan vault: {e}",
|
||||
"Vault scan error",
|
||||
{"error_type": "VaultScanError"},
|
||||
)
|
||||
return 0, f"Failed to scan vault: {e}"
|
||||
|
||||
logger.info(f"Found {len(files)} markdown files in vault")
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Found {len(files)} markdown files to process",
|
||||
{"stage": "files_discovered", "file_count": len(files)},
|
||||
)
|
||||
|
||||
# Filter by date if provided (handle "undefined" string from frontend)
|
||||
# Also handle inverted dates (start > end) by skipping filtering
|
||||
start_dt = None
|
||||
end_dt = None
|
||||
|
||||
if start_date and start_date != "undefined":
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
|
||||
if end_date and end_date != "undefined":
|
||||
# Make end_date inclusive (end of day)
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
end_dt = end_dt.replace(hour=23, minute=59, second=59)
|
||||
|
||||
# Only apply date filtering if dates are valid and in correct order
|
||||
if start_dt and end_dt and start_dt > end_dt:
|
||||
logger.warning(
|
||||
f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
|
||||
)
|
||||
else:
|
||||
if start_dt:
|
||||
files = [f for f in files if f["modified_at"] >= start_dt]
|
||||
logger.info(
|
||||
f"After start_date filter ({start_date}): {len(files)} files"
|
||||
)
|
||||
if end_dt:
|
||||
files = [f for f in files if f["modified_at"] <= end_dt]
|
||||
logger.info(f"After end_date filter ({end_date}): {len(files)} files")
|
||||
|
||||
logger.info(f"Processing {len(files)} files after date filtering")
|
||||
|
||||
indexed_count = 0
|
||||
skipped_count = 0
|
||||
failed_count = 0
|
||||
duplicate_content_count = 0
|
||||
|
||||
# Heartbeat tracking - update notification periodically to prevent appearing stuck
|
||||
last_heartbeat_time = time.time()
|
||||
|
||||
# =======================================================================
|
||||
# PHASE 1: Analyze all files, create pending documents
|
||||
# This makes ALL documents visible in the UI immediately with pending status
|
||||
# =======================================================================
|
||||
files_to_process = [] # List of dicts with document and file data
|
||||
new_documents_created = False
|
||||
|
||||
for file_info in files:
|
||||
try:
|
||||
file_path = file_info["path"]
|
||||
relative_path = file_info["relative_path"]
|
||||
|
||||
# Read file content
|
||||
try:
|
||||
with open(file_path, encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(f"Could not decode file {file_path}, skipping")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
if not content.strip():
|
||||
logger.debug(f"Empty file {file_path}, skipping")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Parse frontmatter and extract metadata
|
||||
frontmatter, body_content = parse_frontmatter(content)
|
||||
wiki_links = extract_wiki_links(content)
|
||||
tags = extract_tags(content)
|
||||
|
||||
# Get title from frontmatter or filename
|
||||
title = file_info["name"]
|
||||
if frontmatter:
|
||||
title = frontmatter.get("title", title)
|
||||
# Also extract tags from frontmatter
|
||||
fm_tags = frontmatter.get("tags", [])
|
||||
if isinstance(fm_tags, list):
|
||||
tags = list({*tags, *fm_tags})
|
||||
elif isinstance(fm_tags, str):
|
||||
tags = list({*tags, fm_tags})
|
||||
|
||||
# Generate unique identifier using vault name and relative path
|
||||
unique_identifier = f"{vault_name}:{relative_path}"
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.OBSIDIAN_CONNECTOR,
|
||||
unique_identifier,
|
||||
search_space_id,
|
||||
)
|
||||
|
||||
# Generate content hash
|
||||
content_hash = generate_content_hash(content, search_space_id)
|
||||
|
||||
# Check for existing document
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
if existing_document:
|
||||
# Document exists - check if content has changed
|
||||
if existing_document.content_hash == content_hash:
|
||||
# Ensure status is ready (might have been stuck in processing/pending)
|
||||
if not DocumentStatus.is_state(
|
||||
existing_document.status, DocumentStatus.READY
|
||||
):
|
||||
existing_document.status = DocumentStatus.ready()
|
||||
logger.debug(f"Note {title} unchanged, skipping")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Queue existing document for update (will be set to processing in Phase 2)
|
||||
files_to_process.append(
|
||||
{
|
||||
"document": existing_document,
|
||||
"is_new": False,
|
||||
"file_info": file_info,
|
||||
"content": content,
|
||||
"body_content": body_content,
|
||||
"frontmatter": frontmatter,
|
||||
"wiki_links": wiki_links,
|
||||
"tags": tags,
|
||||
"title": title,
|
||||
"relative_path": relative_path,
|
||||
"content_hash": content_hash,
|
||||
"unique_identifier_hash": unique_identifier_hash,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# Document doesn't exist by unique_identifier_hash
|
||||
# Check if a document with the same content_hash exists (from another connector)
|
||||
with session.no_autoflush:
|
||||
duplicate_by_content = await check_duplicate_document_by_hash(
|
||||
session, content_hash
|
||||
)
|
||||
|
||||
if duplicate_by_content:
|
||||
logger.info(
|
||||
f"Obsidian note {title} already indexed by another connector "
|
||||
f"(existing document ID: {duplicate_by_content.id}, "
|
||||
f"type: {duplicate_by_content.document_type}). Skipping."
|
||||
)
|
||||
duplicate_content_count += 1
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Create new document with PENDING status (visible in UI immediately)
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=title,
|
||||
document_type=DocumentType.OBSIDIAN_CONNECTOR,
|
||||
document_metadata={
|
||||
"vault_name": vault_name,
|
||||
"file_path": relative_path,
|
||||
"connector_id": connector_id,
|
||||
},
|
||||
content="Pending...", # Placeholder until processed
|
||||
content_hash=unique_identifier_hash, # Temporary unique value - updated when ready
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=None,
|
||||
chunks=[], # Empty at creation - safe for async
|
||||
status=DocumentStatus.pending(), # Pending until processing starts
|
||||
updated_at=get_current_timestamp(),
|
||||
created_by_id=user_id,
|
||||
connector_id=connector_id,
|
||||
)
|
||||
session.add(document)
|
||||
new_documents_created = True
|
||||
|
||||
files_to_process.append(
|
||||
{
|
||||
"document": document,
|
||||
"is_new": True,
|
||||
"file_info": file_info,
|
||||
"content": content,
|
||||
"body_content": body_content,
|
||||
"frontmatter": frontmatter,
|
||||
"wiki_links": wiki_links,
|
||||
"tags": tags,
|
||||
"title": title,
|
||||
"relative_path": relative_path,
|
||||
"content_hash": content_hash,
|
||||
"unique_identifier_hash": unique_identifier_hash,
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error in Phase 1 for file {file_info.get('path', 'unknown')}: {e}"
|
||||
)
|
||||
failed_count += 1
|
||||
continue
|
||||
|
||||
# Commit all pending documents - they all appear in UI now
|
||||
if new_documents_created:
|
||||
logger.info(
|
||||
f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
# =======================================================================
|
||||
# PHASE 2: Process each document one by one
|
||||
# Each document transitions: pending → processing → ready/failed
|
||||
# =======================================================================
|
||||
logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
|
||||
|
||||
# Get LLM for summarization
|
||||
long_context_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
for item in files_to_process:
|
||||
# Send heartbeat periodically
|
||||
if on_heartbeat_callback:
|
||||
current_time = time.time()
|
||||
if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
|
||||
await on_heartbeat_callback(indexed_count)
|
||||
last_heartbeat_time = current_time
|
||||
|
||||
document = item["document"]
|
||||
try:
|
||||
# Set to PROCESSING and commit - shows "processing" in UI for THIS document only
|
||||
document.status = DocumentStatus.processing()
|
||||
await session.commit()
|
||||
|
||||
# Extract data from item
|
||||
title = item["title"]
|
||||
relative_path = item["relative_path"]
|
||||
content = item["content"]
|
||||
body_content = item["body_content"]
|
||||
frontmatter = item["frontmatter"]
|
||||
wiki_links = item["wiki_links"]
|
||||
tags = item["tags"]
|
||||
content_hash = item["content_hash"]
|
||||
file_info = item["file_info"]
|
||||
|
||||
# Build metadata
|
||||
document_metadata = {
|
||||
"vault_name": vault_name,
|
||||
"file_path": relative_path,
|
||||
"tags": tags,
|
||||
"outgoing_links": wiki_links,
|
||||
"frontmatter": frontmatter,
|
||||
"modified_at": file_info["modified_at"].isoformat(),
|
||||
"created_at": file_info["created_at"].isoformat(),
|
||||
"word_count": len(body_content.split()),
|
||||
}
|
||||
|
||||
# Build document content with metadata
|
||||
metadata_sections = [
|
||||
(
|
||||
"METADATA",
|
||||
[
|
||||
f"Title: {title}",
|
||||
f"Vault: {vault_name}",
|
||||
f"Path: {relative_path}",
|
||||
f"Tags: {', '.join(tags) if tags else 'None'}",
|
||||
f"Links to: {', '.join(wiki_links) if wiki_links else 'None'}",
|
||||
],
|
||||
),
|
||||
("CONTENT", [body_content]),
|
||||
]
|
||||
document_string = build_document_metadata_string(metadata_sections)
|
||||
|
||||
# Generate summary
|
||||
summary_content = ""
|
||||
if long_context_llm and connector.enable_summary:
|
||||
summary_content, _ = await generate_document_summary(
|
||||
document_string,
|
||||
long_context_llm,
|
||||
document_metadata,
|
||||
)
|
||||
|
||||
# Generate embedding
|
||||
embedding = embed_text(document_string)
|
||||
|
||||
# Add URL and summary to metadata
|
||||
document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}"
|
||||
document_metadata["summary"] = summary_content
|
||||
document_metadata["connector_id"] = connector_id
|
||||
|
||||
# Create chunks
|
||||
chunks = await create_document_chunks(document_string)
|
||||
|
||||
# Update document to READY with actual content
|
||||
document.title = title
|
||||
document.content = document_string
|
||||
document.content_hash = content_hash
|
||||
document.embedding = embedding
|
||||
document.document_metadata = document_metadata
|
||||
await safe_set_chunks(session, document, chunks)
|
||||
document.updated_at = get_current_timestamp()
|
||||
document.status = DocumentStatus.ready()
|
||||
|
||||
indexed_count += 1
|
||||
|
||||
# Batch commit every 10 documents (for ready status updates)
|
||||
if indexed_count % 10 == 0:
|
||||
logger.info(
|
||||
f"Committing batch: {indexed_count} Obsidian notes processed so far"
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error processing file {item.get('file_info', {}).get('path', 'unknown')}: {e}"
|
||||
)
|
||||
# Mark document as failed with reason (visible in UI)
|
||||
try:
|
||||
document.status = DocumentStatus.failed(str(e))
|
||||
document.updated_at = get_current_timestamp()
|
||||
except Exception as status_error:
|
||||
logger.error(
|
||||
f"Failed to update document status to failed: {status_error}"
|
||||
)
|
||||
failed_count += 1
|
||||
continue
|
||||
|
||||
# CRITICAL: Always update timestamp (even if 0 documents indexed) so Zero syncs
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
# Final commit for any remaining documents not yet committed in batches
|
||||
logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
|
||||
try:
|
||||
await session.commit()
|
||||
logger.info(
|
||||
"Successfully committed all Obsidian document changes to database"
|
||||
)
|
||||
except Exception as e:
|
||||
# Handle any remaining integrity errors gracefully (race conditions, etc.)
|
||||
if (
|
||||
"duplicate key value violates unique constraint" in str(e).lower()
|
||||
or "uniqueviolationerror" in str(e).lower()
|
||||
):
|
||||
logger.warning(
|
||||
f"Duplicate content_hash detected during final commit. "
|
||||
f"This may occur if the same note was indexed by multiple connectors. "
|
||||
f"Rolling back and continuing. Error: {e!s}"
|
||||
)
|
||||
await session.rollback()
|
||||
# Don't fail the entire task - some documents may have been successfully indexed
|
||||
else:
|
||||
raise
|
||||
|
||||
# Build warning message if there were issues
|
||||
warning_parts = []
|
||||
if duplicate_content_count > 0:
|
||||
warning_parts.append(f"{duplicate_content_count} duplicate")
|
||||
if failed_count > 0:
|
||||
warning_parts.append(f"{failed_count} failed")
|
||||
warning_message = ", ".join(warning_parts) if warning_parts else None
|
||||
|
||||
total_processed = indexed_count
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully completed Obsidian vault indexing for connector {connector_id}",
|
||||
{
|
||||
"notes_processed": total_processed,
|
||||
"documents_indexed": indexed_count,
|
||||
"documents_skipped": skipped_count,
|
||||
"documents_failed": failed_count,
|
||||
"duplicate_content_count": duplicate_content_count,
|
||||
},
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Obsidian vault indexing completed: {indexed_count} ready, "
|
||||
f"{skipped_count} skipped, {failed_count} failed "
|
||||
f"({duplicate_content_count} duplicate content)"
|
||||
)
|
||||
return total_processed, warning_message
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
logger.exception(f"Database error during Obsidian indexing: {e}")
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Database error during Obsidian indexing: {e}",
|
||||
"Database error",
|
||||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
return 0, f"Database error: {e}"
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Error during Obsidian indexing: {e}")
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Error during Obsidian indexing: {e}",
|
||||
"Unexpected error",
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
return 0, str(e)
|
||||
|
|
@ -34,7 +34,6 @@ CONNECTOR_TASK_MAP = {
|
|||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
|
||||
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: "index_bookstack_pages",
|
||||
SearchSourceConnectorType.OBSIDIAN_CONNECTOR: "index_obsidian_vault",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -100,7 +99,6 @@ def create_periodic_schedule(
|
|||
index_linear_issues_task,
|
||||
index_luma_events_task,
|
||||
index_notion_pages_task,
|
||||
index_obsidian_vault_task,
|
||||
index_slack_messages_task,
|
||||
)
|
||||
|
||||
|
|
@ -121,7 +119,6 @@ def create_periodic_schedule(
|
|||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
|
||||
SearchSourceConnectorType.BOOKSTACK_CONNECTOR: index_bookstack_pages_task,
|
||||
SearchSourceConnectorType.OBSIDIAN_CONNECTOR: index_obsidian_vault_task,
|
||||
}
|
||||
|
||||
# Trigger the first run immediately
|
||||
|
|
|
|||
|
|
@ -1,15 +1,11 @@
|
|||
"use client";
|
||||
|
||||
import { AlertTriangle, Download, Info } from "lucide-react";
|
||||
import { Info } from "lucide-react";
|
||||
import { type FC, useEffect, useMemo, useState } from "react";
|
||||
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { connectorsApiService, type ObsidianStats } from "@/lib/apis/connectors-api.service";
|
||||
import type { ConnectorConfigProps } from "../index";
|
||||
|
||||
const PLUGIN_RELEASES_URL =
|
||||
"https://github.com/MODSetter/SurfSense/releases?q=obsidian&expanded=true";
|
||||
|
||||
function formatTimestamp(value: unknown): string {
|
||||
if (typeof value !== "string" || !value) return "—";
|
||||
const d = new Date(value);
|
||||
|
|
@ -26,78 +22,17 @@ function formatTimestamp(value: unknown): string {
|
|||
* web UI doesn't expose a Name input or a Save button for Obsidian (the
|
||||
* latter is suppressed in `connector-edit-view.tsx`).
|
||||
*
|
||||
* Renders one of three modes depending on the connector's `config`:
|
||||
*
|
||||
* 1. **Plugin connector** (`config.source === "plugin"`) — read-only stats
|
||||
* panel showing what the plugin most recently reported.
|
||||
* 2. **Legacy server-path connector** (`config.legacy === true`, set by the
|
||||
* Phase 3 alembic) — migration banner, an "Install Plugin" CTA, and a
|
||||
* short "how to migrate" checklist that ends with the user pressing the
|
||||
* standard Disconnect button (which deletes this connector along with
|
||||
* every document it previously indexed).
|
||||
* 3. **Unknown** — fallback for rows that escaped the alembic; suggests a
|
||||
* clean re-install.
|
||||
* Renders plugin stats when connector metadata comes from the plugin.
|
||||
* If metadata is missing or malformed, we show a recovery hint.
|
||||
*/
|
||||
export const ObsidianConfig: FC<ConnectorConfigProps> = ({ connector }) => {
|
||||
const config = (connector.config ?? {}) as Record<string, unknown>;
|
||||
const isLegacy = config.legacy === true;
|
||||
const isPlugin = config.source === "plugin";
|
||||
|
||||
if (isLegacy) return <LegacyBanner />;
|
||||
if (isPlugin) return <PluginStats config={config} />;
|
||||
return <UnknownConnectorState />;
|
||||
};
|
||||
|
||||
const LegacyBanner: FC = () => {
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<Alert className="border-amber-500/40 bg-amber-500/10">
|
||||
<AlertTriangle className="size-4 shrink-0 text-amber-500" />
|
||||
<AlertTitle className="text-xs sm:text-sm">
|
||||
Sync stopped — install the plugin to migrate
|
||||
</AlertTitle>
|
||||
<AlertDescription className="text-[11px] sm:text-xs leading-relaxed">
|
||||
This Obsidian connector used the legacy server-path scanner, which has been removed. The
|
||||
notes already indexed remain searchable, but they no longer reflect changes made in your
|
||||
vault.
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
|
||||
<a
|
||||
href={PLUGIN_RELEASES_URL}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-flex"
|
||||
>
|
||||
<Button type="button" variant="outline" size="sm" className="gap-2">
|
||||
<Download className="size-3.5" />
|
||||
Install the plugin
|
||||
</Button>
|
||||
</a>
|
||||
|
||||
<div className="rounded-xl border border-border bg-slate-400/5 p-3 sm:p-6 dark:bg-white/5">
|
||||
<h3 className="mb-3 text-sm font-medium sm:text-base">How to migrate</h3>
|
||||
<ol className="list-decimal space-y-2 pl-5 text-[11px] leading-relaxed text-muted-foreground sm:text-xs">
|
||||
<li>Install the SurfSense Obsidian plugin using the button above.</li>
|
||||
<li>
|
||||
In Obsidian, open Settings → SurfSense, sign in, pick a search space, and wait for the
|
||||
first sync to finish.
|
||||
</li>
|
||||
<li>
|
||||
Confirm the new "Obsidian — <vault>" connector shows your notes, then return here
|
||||
and use the Disconnect button below to remove this legacy connector.
|
||||
</li>
|
||||
</ol>
|
||||
<p className="mt-3 text-[11px] leading-relaxed text-amber-600 dark:text-amber-400 sm:text-xs">
|
||||
Heads up: Disconnect also deletes every document this connector previously indexed. Make
|
||||
sure the plugin has finished its first sync before you disconnect, otherwise your Obsidian
|
||||
notes will disappear from search until the plugin re-indexes them.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const PluginStats: FC<{ config: Record<string, unknown> }> = ({ config }) => {
|
||||
const vaultId = typeof config.vault_id === "string" ? config.vault_id : null;
|
||||
const [stats, setStats] = useState<ObsidianStats | null>(null);
|
||||
|
|
@ -179,8 +114,8 @@ const UnknownConnectorState: FC = () => (
|
|||
<Info className="size-4 shrink-0" />
|
||||
<AlertTitle className="text-xs sm:text-sm">Unrecognized config</AlertTitle>
|
||||
<AlertDescription className="text-[11px] sm:text-xs">
|
||||
This connector has neither plugin metadata nor a legacy marker. It may predate the migration —
|
||||
you can safely delete it and re-install the SurfSense Obsidian plugin to resume syncing.
|
||||
This connector is missing plugin metadata. Delete it, then reconnect your vault from the
|
||||
SurfSense Obsidian plugin so sync can resume.
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -111,7 +111,9 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
|
|||
: getConnectorTypeDisplay(connectorType)}
|
||||
</h2>
|
||||
<p className="text-xs sm:text-base text-muted-foreground mt-1">
|
||||
Enter your connection details
|
||||
{connectorType === "OBSIDIAN_CONNECTOR"
|
||||
? "Follow the plugin setup steps below"
|
||||
: "Enter your connection details"}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,143 +1,60 @@
|
|||
---
|
||||
title: Obsidian
|
||||
description: Connect your Obsidian vault to SurfSense
|
||||
description: Sync your Obsidian vault with the SurfSense plugin
|
||||
---
|
||||
|
||||
# Obsidian Integration Setup Guide
|
||||
# Obsidian Plugin Setup Guide
|
||||
|
||||
This guide walks you through connecting your Obsidian vault to SurfSense for note search and AI-powered insights.
|
||||
|
||||
<Callout type="warn">
|
||||
This connector requires direct file system access and only works with self-hosted SurfSense installations.
|
||||
</Callout>
|
||||
SurfSense integrates with Obsidian through the SurfSense Obsidian plugin.
|
||||
The old server-side vault path scanner is no longer supported.
|
||||
|
||||
## How it works
|
||||
|
||||
The Obsidian connector scans your local Obsidian vault directory and indexes all Markdown files. It preserves your note structure and extracts metadata from YAML frontmatter.
|
||||
The plugin runs inside your Obsidian app and pushes note updates to SurfSense over HTTPS.
|
||||
This works for cloud and self-hosted deployments, including desktop and mobile clients.
|
||||
|
||||
- For follow-up indexing runs, the connector uses content hashing to skip unchanged files for faster sync.
|
||||
- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
|
||||
|
||||
---
|
||||
|
||||
## What Gets Indexed
|
||||
## What gets indexed
|
||||
|
||||
| Content Type | Description |
|
||||
|--------------|-------------|
|
||||
| Markdown Files | All `.md` files in your vault |
|
||||
| Frontmatter | YAML metadata (title, tags, aliases, dates) |
|
||||
| Wiki Links | Links between notes (`[[note]]`) |
|
||||
| Inline Tags | Tags throughout your notes (`#tag`) |
|
||||
| Note Content | Full content with intelligent chunking |
|
||||
| Markdown files | Note content (`.md`) |
|
||||
| Frontmatter | YAML metadata like title, tags, aliases, dates |
|
||||
| Wiki links | Linked notes (`[[note]]`) |
|
||||
| Tags | Inline and frontmatter tags |
|
||||
| Vault metadata | Vault and path metadata used for deep links and sync state |
|
||||
|
||||
<Callout type="warn">
|
||||
Binary files and attachments are not indexed by default. Enable "Include Attachments" to index embedded files.
|
||||
</Callout>
|
||||
## Quick start
|
||||
|
||||
---
|
||||
|
||||
## Quick Start (Local Installation)
|
||||
|
||||
1. Navigate to **Connectors** → **Add Connector** → **Obsidian**
|
||||
2. Enter your vault path: `/Users/yourname/Documents/MyVault`
|
||||
3. Enter a vault name (e.g., `Personal Notes`)
|
||||
4. Click **Connect Obsidian**
|
||||
1. Open **Connectors** in SurfSense and choose **Obsidian**.
|
||||
2. Click **Open plugin releases** and install the latest SurfSense Obsidian plugin.
|
||||
3. In Obsidian, open **Settings → SurfSense**.
|
||||
4. Paste your SurfSense API token from the connector setup panel.
|
||||
5. Paste your SurfSense backend URL in the plugin's **Server URL** setting.
|
||||
6. Choose the Search Space in the plugin, then run the first sync.
|
||||
7. Confirm the connector appears as **Obsidian — <vault>** in SurfSense.
|
||||
|
||||
<Callout type="info">
|
||||
Find your vault path: In Obsidian, right-click any note → "Reveal in Finder" (macOS) or "Show in Explorer" (Windows).
|
||||
You do not create or configure a vault path in the web UI. The connector row is created automatically when the plugin calls `/api/v1/obsidian/connect`.
|
||||
</Callout>
|
||||
|
||||
<Callout type="info" title="Periodic Sync">
|
||||
Enable periodic sync to automatically re-index notes when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
|
||||
</Callout>
|
||||
## Self-hosted notes
|
||||
|
||||
---
|
||||
|
||||
## Docker Setup
|
||||
|
||||
For Docker deployments, you need to mount your Obsidian vault as a volume.
|
||||
|
||||
### Step 1: Update docker-compose.yml
|
||||
|
||||
Add your vault as a volume mount to the SurfSense backend service:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
surfsense:
|
||||
# ... other config
|
||||
volumes:
|
||||
- /path/to/your/obsidian/vault:/app/obsidian_vaults/my-vault:ro
|
||||
```
|
||||
|
||||
<Callout type="info">
|
||||
The `:ro` flag mounts the vault as read-only, which is recommended for security.
|
||||
</Callout>
|
||||
|
||||
### Step 2: Configure the Connector
|
||||
|
||||
Use the **container path** (not your local path) when setting up the connector:
|
||||
|
||||
| Your Local Path | Container Path (use this) |
|
||||
|-----------------|---------------------------|
|
||||
| `/Users/john/Documents/MyVault` | `/app/obsidian_vaults/my-vault` |
|
||||
| `C:\Users\john\Documents\MyVault` | `/app/obsidian_vaults/my-vault` |
|
||||
|
||||
### Example: Multiple Vaults
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
- /Users/john/Documents/PersonalNotes:/app/obsidian_vaults/personal:ro
|
||||
- /Users/john/Documents/WorkNotes:/app/obsidian_vaults/work:ro
|
||||
```
|
||||
|
||||
Then create separate connectors for each vault using `/app/obsidian_vaults/personal` and `/app/obsidian_vaults/work`.
|
||||
|
||||
---
|
||||
|
||||
## Connector Configuration
|
||||
|
||||
| Field | Description | Required |
|
||||
|-------|-------------|----------|
|
||||
| **Connector Name** | A friendly name to identify this connector | Yes |
|
||||
| **Vault Path** | Absolute path to your vault (container path for Docker) | Yes |
|
||||
| **Vault Name** | Display name for your vault in search results | Yes |
|
||||
| **Exclude Folders** | Comma-separated folder names to skip | No |
|
||||
| **Include Attachments** | Index embedded files (images, PDFs) | No |
|
||||
|
||||
---
|
||||
|
||||
## Recommended Exclusions
|
||||
|
||||
Common folders to exclude from indexing:
|
||||
|
||||
| Folder | Reason |
|
||||
|--------|--------|
|
||||
| `.obsidian` | Obsidian config files (always exclude) |
|
||||
| `.trash` | Obsidian's trash folder |
|
||||
| `templates` | Template files you don't want searchable |
|
||||
| `daily-notes` | If you want to exclude daily notes |
|
||||
| `attachments` | If not using "Include Attachments" |
|
||||
|
||||
Default exclusions: `.obsidian,.trash`
|
||||
|
||||
---
|
||||
- Use your public or LAN backend URL that your Obsidian device can reach.
|
||||
- No Docker bind mount for the vault is required.
|
||||
- If your instance is behind TLS, ensure the URL/certificate is valid for the device running Obsidian.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
**Vault not found / Permission denied**
|
||||
- Verify the path exists and is accessible
|
||||
- For Docker: ensure the volume is mounted correctly in `docker-compose.yml`
|
||||
- Check file permissions: SurfSense needs read access to the vault directory
|
||||
**Plugin connects but no files appear**
|
||||
- Verify the plugin is pointed to the correct Search Space.
|
||||
- Trigger a manual sync from the plugin settings.
|
||||
- Confirm your API token is valid and not expired.
|
||||
|
||||
**No notes indexed**
|
||||
- Ensure your vault contains `.md` files
|
||||
- Check that notes aren't in excluded folders
|
||||
- Verify the path points to the vault root (contains `.obsidian` folder)
|
||||
**Unauthorized / 401 errors**
|
||||
- Regenerate and paste a fresh API token from SurfSense.
|
||||
- Ensure the token belongs to the same account and workspace you are syncing into.
|
||||
|
||||
**Changes not appearing**
|
||||
- Wait for the next sync cycle, or manually trigger re-indexing
|
||||
- For Docker: restart the container if you modified volume mounts
|
||||
|
||||
**Docker: "path not found" error**
|
||||
- Use the container path (`/app/obsidian_vaults/...`), not your local path
|
||||
- Verify the volume mount in `docker-compose.yml` matches
|
||||
**Cannot reach server URL**
|
||||
- Check that the backend URL is reachable from the Obsidian device.
|
||||
- For self-hosted setups, verify firewall and reverse proxy rules.
|
||||
- Avoid using localhost unless SurfSense and Obsidian run on the same machine.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue