refactor: remove legacy Obsidian connector support

2026-07-24 23:41:10 +02:00 · 2026-04-22 00:10:24 +05:30 · 2026-04-22 00:10:24 +05:30 · 99623a85d5
commit 99623a85d5
parent 16ea8e2401
10 changed files with 44 additions and 1046 deletions
--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@ -152,7 +152,6 @@ celery_app.conf.update(
        "index_elasticsearch_documents": {"queue": CONNECTORS_QUEUE},
        "index_crawled_urls": {"queue": CONNECTORS_QUEUE},
        "index_bookstack_pages": {"queue": CONNECTORS_QUEUE},
-        "index_obsidian_vault": {"queue": CONNECTORS_QUEUE},
        "index_composio_connector": {"queue": CONNECTORS_QUEUE},
        # Everything else (document processing, podcasts, reindexing,
        # schedule checker, cleanup) stays on the default fast queue.
--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -1157,25 +1157,6 @@ async def index_connector_content(
                )
                response_message = "Web page indexing started in the background."

-        elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR:
-            from app.config import config as app_config
-            from app.tasks.celery_tasks.connector_tasks import index_obsidian_vault_task
-
-            # Obsidian connector only available in self-hosted mode
-            if not app_config.is_self_hosted():
-                raise HTTPException(
-                    status_code=400,
-                    detail="Obsidian connector is only available in self-hosted mode",
-                )
-
-            logger.info(
-                f"Triggering Obsidian vault indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
-            )
-            index_obsidian_vault_task.delay(
-                connector_id, search_space_id, str(user.id), indexing_from, indexing_to
-            )
-            response_message = "Obsidian vault indexing started in the background."
-
        elif (
            connector.connector_type
            == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
@ -3048,59 +3029,6 @@ async def run_bookstack_indexing(
    )


-# Add new helper functions for Obsidian indexing
-async def run_obsidian_indexing_with_new_session(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Wrapper to run Obsidian indexing with its own database session."""
-    logger.info(
-        f"Background task started: Indexing Obsidian connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
-    )
-    async with async_session_maker() as session:
-        await run_obsidian_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-    logger.info(f"Background task finished: Indexing Obsidian connector {connector_id}")
-
-
-async def run_obsidian_indexing(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """
-    Background task to run Obsidian vault indexing.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Obsidian connector
-        search_space_id: ID of the search space
-        user_id: ID of the user
-        start_date: Start date for indexing
-        end_date: End date for indexing
-    """
-    from app.tasks.connector_indexers import index_obsidian_vault
-
-    await _run_indexing_with_notifications(
-        session=session,
-        connector_id=connector_id,
-        search_space_id=search_space_id,
-        user_id=user_id,
-        start_date=start_date,
-        end_date=end_date,
-        indexing_function=index_obsidian_vault,
-        update_timestamp_func=_update_connector_timestamp_by_id,
-        supports_heartbeat_callback=True,
-    )
-
-
 async def run_composio_indexing_with_new_session(
    connector_id: int,
    search_space_id: int,
--- a/surfsense_backend/app/schemas/obsidian_auth_credentials.py
+++ b/surfsense_backend/app/schemas/obsidian_auth_credentials.py
@ -1,59 +0,0 @@
-"""
-Obsidian Connector Credentials Schema.
-
-Obsidian is a local-first note-taking app that stores notes as markdown files.
-This connector supports indexing from local file system (self-hosted only).
-"""
-
-from pydantic import BaseModel, field_validator
-
-
-class ObsidianAuthCredentialsBase(BaseModel):
-    """
-    Credentials/configuration for the Obsidian connector.
-
-    Since Obsidian vaults are local directories, this schema primarily
-    holds the vault path and configuration options rather than API tokens.
-    """
-
-    vault_path: str
-    vault_name: str | None = None
-    exclude_folders: list[str] | None = None
-    include_attachments: bool = False
-
-    @field_validator("vault_path")
-    @classmethod
-    def validate_vault_path(cls, v: str) -> str:
-        """Ensure vault path is provided and stripped of whitespace."""
-        if not v or not v.strip():
-            raise ValueError("Vault path is required")
-        return v.strip()
-
-    @field_validator("exclude_folders", mode="before")
-    @classmethod
-    def parse_exclude_folders(cls, v):
-        """Parse exclude_folders from string if needed."""
-        if v is None:
-            return [".trash", ".obsidian", "templates"]
-        if isinstance(v, str):
-            return [f.strip() for f in v.split(",") if f.strip()]
-        return v
-
-    def to_dict(self) -> dict:
-        """Convert credentials to dictionary for storage."""
-        return {
-            "vault_path": self.vault_path,
-            "vault_name": self.vault_name,
-            "exclude_folders": self.exclude_folders,
-            "include_attachments": self.include_attachments,
-        }
-
-    @classmethod
-    def from_dict(cls, data: dict) -> "ObsidianAuthCredentialsBase":
-        """Create credentials from dictionary."""
-        return cls(
-            vault_path=data.get("vault_path", ""),
-            vault_name=data.get("vault_name"),
-            exclude_folders=data.get("exclude_folders"),
-            include_attachments=data.get("include_attachments", False),
-        )
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@ -883,49 +883,6 @@ async def _index_bookstack_pages(
        )


-@celery_app.task(name="index_obsidian_vault", bind=True)
-def index_obsidian_vault_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Obsidian vault notes."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_obsidian_vault(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_obsidian_vault(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Obsidian vault with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_obsidian_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_obsidian_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
@celery_app.task(name="index_composio_connector", bind=True)
 def index_composio_connector_task(
    self,
--- a/surfsense_backend/app/tasks/connector_indexers/init.py
+++ b/surfsense_backend/app/tasks/connector_indexers/init.py
@ -46,7 +46,6 @@ from .linear_indexer import index_linear_issues
 # Documentation and knowledge management
 from .luma_indexer import index_luma_events
 from .notion_indexer import index_notion_pages
-from .obsidian_indexer import index_obsidian_vault
 from .slack_indexer import index_slack_messages
 from .webcrawler_indexer import index_crawled_urls

@ -69,7 +68,6 @@ __all__ = [  # noqa: RUF022
    "index_linear_issues",
    # Documentation and knowledge management
    "index_notion_pages",
-    "index_obsidian_vault",
    "index_crawled_urls",
    # Communication platforms
    "index_slack_messages",
--- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py
@ -1,676 +0,0 @@
-"""
-Obsidian connector indexer.
-
-Indexes markdown notes from a local Obsidian vault.
-This connector is only available in self-hosted mode.
-
-Implements 2-phase document status updates for real-time UI feedback:
- Phase 1: Create all documents with 'pending' status (visible in UI immediately)
- Phase 2: Process each document: pending → processing → ready/failed
-"""
-
-import os
-import re
-import time
-from collections.abc import Awaitable, Callable
-from datetime import UTC, datetime
-from pathlib import Path
-
-import yaml
-from sqlalchemy.exc import SQLAlchemyError
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.config import config
-from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
-from app.services.llm_service import get_user_long_context_llm
-from app.services.task_logging_service import TaskLoggingService
-from app.utils.document_converters import (
-    create_document_chunks,
-    embed_text,
-    generate_content_hash,
-    generate_document_summary,
-    generate_unique_identifier_hash,
-)
-
-from .base import (
-    build_document_metadata_string,
-    check_document_by_unique_identifier,
-    check_duplicate_document_by_hash,
-    get_connector_by_id,
-    get_current_timestamp,
-    logger,
-    safe_set_chunks,
-    update_connector_last_indexed,
-)
-
-# Type hint for heartbeat callback
-HeartbeatCallbackType = Callable[[int], Awaitable[None]]
-
-# Heartbeat interval in seconds
-HEARTBEAT_INTERVAL_SECONDS = 30
-
-
-def parse_frontmatter(content: str) -> tuple[dict | None, str]:
-    """
-    Parse YAML frontmatter from markdown content.
-
-    Args:
-        content: The full markdown content
-
-    Returns:
-        Tuple of (frontmatter dict or None, content without frontmatter)
-    """
-    if not content.startswith("---"):
-        return None, content
-
-    # Find the closing ---
-    end_match = re.search(r"\n---\n", content[3:])
-    if not end_match:
-        return None, content
-
-    frontmatter_str = content[3 : end_match.start() + 3]
-    remaining_content = content[end_match.end() + 3 :]
-
-    try:
-        frontmatter = yaml.safe_load(frontmatter_str)
-        return frontmatter, remaining_content.strip()
-    except yaml.YAMLError:
-        return None, content
-
-
-def extract_wiki_links(content: str) -> list[str]:
-    """
-    Extract [[wiki-style links]] from content.
-
-    Args:
-        content: Markdown content
-
-    Returns:
-        List of linked note names
-    """
-    # Match [[link]] or [[link|alias]]
-    pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]"
-    matches = re.findall(pattern, content)
-    return list(set(matches))
-
-
-def extract_tags(content: str) -> list[str]:
-    """
-    Extract #tags from content (both inline and frontmatter).
-
-    Args:
-        content: Markdown content
-
-    Returns:
-        List of tags (without # prefix)
-    """
-    # Match #tag but not ## headers
-    pattern = r"(?<!\S)#([a-zA-Z][a-zA-Z0-9_/-]*)"
-    matches = re.findall(pattern, content)
-    return list(set(matches))
-
-
-def scan_vault(
-    vault_path: str,
-    exclude_folders: list[str] | None = None,
-) -> list[dict]:
-    """
-    Scan an Obsidian vault for markdown files.
-
-    Args:
-        vault_path: Path to the Obsidian vault
-        exclude_folders: List of folder names to exclude
-
-    Returns:
-        List of file info dicts with path, name, modified time
-    """
-    if exclude_folders is None:
-        exclude_folders = [".trash", ".obsidian", "templates"]
-
-    vault = Path(vault_path)
-    if not vault.exists():
-        raise ValueError(f"Vault path does not exist: {vault_path}")
-
-    files = []
-    for md_file in vault.rglob("*.md"):
-        # Check if file is in an excluded folder
-        relative_path = md_file.relative_to(vault)
-        parts = relative_path.parts
-
-        if any(excluded in parts for excluded in exclude_folders):
-            continue
-
-        try:
-            stat = md_file.stat()
-            files.append(
-                {
-                    "path": str(md_file),
-                    "relative_path": str(relative_path),
-                    "name": md_file.stem,
-                    "modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC),
-                    "created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC),
-                    "size": stat.st_size,
-                }
-            )
-        except OSError as e:
-            logger.warning(f"Could not stat file {md_file}: {e}")
-
-    return files
-
-
-async def index_obsidian_vault(
-    session: AsyncSession,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str | None = None,
-    end_date: str | None = None,
-    update_last_indexed: bool = True,
-    on_heartbeat_callback: HeartbeatCallbackType | None = None,
-) -> tuple[int, str | None]:
-    """
-    Index notes from a local Obsidian vault.
-
-    This indexer is only available in self-hosted mode as it requires
-    direct file system access to the user's Obsidian vault.
-
-    Args:
-        session: Database session
-        connector_id: ID of the Obsidian connector
-        search_space_id: ID of the search space to store documents in
-        user_id: ID of the user
-        start_date: Start date for filtering (YYYY-MM-DD format) - optional
-        end_date: End date for filtering (YYYY-MM-DD format) - optional
-        update_last_indexed: Whether to update the last_indexed_at timestamp
-        on_heartbeat_callback: Optional callback to update notification during long-running indexing.
-
-    Returns:
-        Tuple containing (number of documents indexed, error message or None)
-    """
-    task_logger = TaskLoggingService(session, search_space_id)
-
-    # Check if self-hosted mode
-    if not config.is_self_hosted():
-        return 0, "Obsidian connector is only available in self-hosted mode"
-
-    # Log task start
-    log_entry = await task_logger.log_task_start(
-        task_name="obsidian_vault_indexing",
-        source="connector_indexing_task",
-        message=f"Starting Obsidian vault indexing for connector {connector_id}",
-        metadata={
-            "connector_id": connector_id,
-            "user_id": str(user_id),
-            "start_date": start_date,
-            "end_date": end_date,
-        },
-    )
-
-    try:
-        # Get the connector
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Retrieving Obsidian connector {connector_id} from database",
-            {"stage": "connector_retrieval"},
-        )
-
-        connector = await get_connector_by_id(
-            session, connector_id, SearchSourceConnectorType.OBSIDIAN_CONNECTOR
-        )
-
-        if not connector:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Connector with ID {connector_id} not found or is not an Obsidian connector",
-                "Connector not found",
-                {"error_type": "ConnectorNotFound"},
-            )
-            return (
-                0,
-                f"Connector with ID {connector_id} not found or is not an Obsidian connector",
-            )
-
-        # Get vault path from connector config
-        vault_path = connector.config.get("vault_path")
-        if not vault_path:
-            await task_logger.log_task_failure(
-                log_entry,
-                "Vault path not configured for this connector",
-                "Missing vault path",
-                {"error_type": "MissingVaultPath"},
-            )
-            return 0, "Vault path not configured for this connector"
-
-        # Validate vault path exists
-        if not os.path.exists(vault_path):
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Vault path does not exist: {vault_path}",
-                "Vault path not found",
-                {"error_type": "VaultNotFound", "vault_path": vault_path},
-            )
-            return 0, f"Vault path does not exist: {vault_path}"
-
-        # Get configuration options
-        exclude_folders = connector.config.get(
-            "exclude_folders", [".trash", ".obsidian", "templates"]
-        )
-        vault_name = connector.config.get("vault_name") or os.path.basename(vault_path)
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Scanning Obsidian vault: {vault_name}",
-            {"stage": "vault_scan", "vault_path": vault_path},
-        )
-
-        # Scan vault for markdown files
-        try:
-            files = scan_vault(vault_path, exclude_folders)
-        except Exception as e:
-            await task_logger.log_task_failure(
-                log_entry,
-                f"Failed to scan vault: {e}",
-                "Vault scan error",
-                {"error_type": "VaultScanError"},
-            )
-            return 0, f"Failed to scan vault: {e}"
-
-        logger.info(f"Found {len(files)} markdown files in vault")
-
-        await task_logger.log_task_progress(
-            log_entry,
-            f"Found {len(files)} markdown files to process",
-            {"stage": "files_discovered", "file_count": len(files)},
-        )
-
-        # Filter by date if provided (handle "undefined" string from frontend)
-        # Also handle inverted dates (start > end) by skipping filtering
-        start_dt = None
-        end_dt = None
-
-        if start_date and start_date != "undefined":
-            start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
-
-        if end_date and end_date != "undefined":
-            # Make end_date inclusive (end of day)
-            end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
-            end_dt = end_dt.replace(hour=23, minute=59, second=59)
-
-        # Only apply date filtering if dates are valid and in correct order
-        if start_dt and end_dt and start_dt > end_dt:
-            logger.warning(
-                f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
-            )
-        else:
-            if start_dt:
-                files = [f for f in files if f["modified_at"] >= start_dt]
-                logger.info(
-                    f"After start_date filter ({start_date}): {len(files)} files"
-                )
-            if end_dt:
-                files = [f for f in files if f["modified_at"] <= end_dt]
-                logger.info(f"After end_date filter ({end_date}): {len(files)} files")
-
-        logger.info(f"Processing {len(files)} files after date filtering")
-
-        indexed_count = 0
-        skipped_count = 0
-        failed_count = 0
-        duplicate_content_count = 0
-
-        # Heartbeat tracking - update notification periodically to prevent appearing stuck
-        last_heartbeat_time = time.time()
-
-        # =======================================================================
-        # PHASE 1: Analyze all files, create pending documents
-        # This makes ALL documents visible in the UI immediately with pending status
-        # =======================================================================
-        files_to_process = []  # List of dicts with document and file data
-        new_documents_created = False
-
-        for file_info in files:
-            try:
-                file_path = file_info["path"]
-                relative_path = file_info["relative_path"]
-
-                # Read file content
-                try:
-                    with open(file_path, encoding="utf-8") as f:
-                        content = f.read()
-                except UnicodeDecodeError:
-                    logger.warning(f"Could not decode file {file_path}, skipping")
-                    skipped_count += 1
-                    continue
-
-                if not content.strip():
-                    logger.debug(f"Empty file {file_path}, skipping")
-                    skipped_count += 1
-                    continue
-
-                # Parse frontmatter and extract metadata
-                frontmatter, body_content = parse_frontmatter(content)
-                wiki_links = extract_wiki_links(content)
-                tags = extract_tags(content)
-
-                # Get title from frontmatter or filename
-                title = file_info["name"]
-                if frontmatter:
-                    title = frontmatter.get("title", title)
-                    # Also extract tags from frontmatter
-                    fm_tags = frontmatter.get("tags", [])
-                    if isinstance(fm_tags, list):
-                        tags = list({*tags, *fm_tags})
-                    elif isinstance(fm_tags, str):
-                        tags = list({*tags, fm_tags})
-
-                # Generate unique identifier using vault name and relative path
-                unique_identifier = f"{vault_name}:{relative_path}"
-                unique_identifier_hash = generate_unique_identifier_hash(
-                    DocumentType.OBSIDIAN_CONNECTOR,
-                    unique_identifier,
-                    search_space_id,
-                )
-
-                # Generate content hash
-                content_hash = generate_content_hash(content, search_space_id)
-
-                # Check for existing document
-                existing_document = await check_document_by_unique_identifier(
-                    session, unique_identifier_hash
-                )
-
-                if existing_document:
-                    # Document exists - check if content has changed
-                    if existing_document.content_hash == content_hash:
-                        # Ensure status is ready (might have been stuck in processing/pending)
-                        if not DocumentStatus.is_state(
-                            existing_document.status, DocumentStatus.READY
-                        ):
-                            existing_document.status = DocumentStatus.ready()
-                        logger.debug(f"Note {title} unchanged, skipping")
-                        skipped_count += 1
-                        continue
-
-                    # Queue existing document for update (will be set to processing in Phase 2)
-                    files_to_process.append(
-                        {
-                            "document": existing_document,
-                            "is_new": False,
-                            "file_info": file_info,
-                            "content": content,
-                            "body_content": body_content,
-                            "frontmatter": frontmatter,
-                            "wiki_links": wiki_links,
-                            "tags": tags,
-                            "title": title,
-                            "relative_path": relative_path,
-                            "content_hash": content_hash,
-                            "unique_identifier_hash": unique_identifier_hash,
-                        }
-                    )
-                    continue
-
-                # Document doesn't exist by unique_identifier_hash
-                # Check if a document with the same content_hash exists (from another connector)
-                with session.no_autoflush:
-                    duplicate_by_content = await check_duplicate_document_by_hash(
-                        session, content_hash
-                    )
-
-                if duplicate_by_content:
-                    logger.info(
-                        f"Obsidian note {title} already indexed by another connector "
-                        f"(existing document ID: {duplicate_by_content.id}, "
-                        f"type: {duplicate_by_content.document_type}). Skipping."
-                    )
-                    duplicate_content_count += 1
-                    skipped_count += 1
-                    continue
-
-                # Create new document with PENDING status (visible in UI immediately)
-                document = Document(
-                    search_space_id=search_space_id,
-                    title=title,
-                    document_type=DocumentType.OBSIDIAN_CONNECTOR,
-                    document_metadata={
-                        "vault_name": vault_name,
-                        "file_path": relative_path,
-                        "connector_id": connector_id,
-                    },
-                    content="Pending...",  # Placeholder until processed
-                    content_hash=unique_identifier_hash,  # Temporary unique value - updated when ready
-                    unique_identifier_hash=unique_identifier_hash,
-                    embedding=None,
-                    chunks=[],  # Empty at creation - safe for async
-                    status=DocumentStatus.pending(),  # Pending until processing starts
-                    updated_at=get_current_timestamp(),
-                    created_by_id=user_id,
-                    connector_id=connector_id,
-                )
-                session.add(document)
-                new_documents_created = True
-
-                files_to_process.append(
-                    {
-                        "document": document,
-                        "is_new": True,
-                        "file_info": file_info,
-                        "content": content,
-                        "body_content": body_content,
-                        "frontmatter": frontmatter,
-                        "wiki_links": wiki_links,
-                        "tags": tags,
-                        "title": title,
-                        "relative_path": relative_path,
-                        "content_hash": content_hash,
-                        "unique_identifier_hash": unique_identifier_hash,
-                    }
-                )
-
-            except Exception as e:
-                logger.exception(
-                    f"Error in Phase 1 for file {file_info.get('path', 'unknown')}: {e}"
-                )
-                failed_count += 1
-                continue
-
-        # Commit all pending documents - they all appear in UI now
-        if new_documents_created:
-            logger.info(
-                f"Phase 1: Committing {len([f for f in files_to_process if f['is_new']])} pending documents"
-            )
-            await session.commit()
-
-        # =======================================================================
-        # PHASE 2: Process each document one by one
-        # Each document transitions: pending → processing → ready/failed
-        # =======================================================================
-        logger.info(f"Phase 2: Processing {len(files_to_process)} documents")
-
-        # Get LLM for summarization
-        long_context_llm = await get_user_long_context_llm(
-            session, user_id, search_space_id
-        )
-
-        for item in files_to_process:
-            # Send heartbeat periodically
-            if on_heartbeat_callback:
-                current_time = time.time()
-                if current_time - last_heartbeat_time >= HEARTBEAT_INTERVAL_SECONDS:
-                    await on_heartbeat_callback(indexed_count)
-                    last_heartbeat_time = current_time
-
-            document = item["document"]
-            try:
-                # Set to PROCESSING and commit - shows "processing" in UI for THIS document only
-                document.status = DocumentStatus.processing()
-                await session.commit()
-
-                # Extract data from item
-                title = item["title"]
-                relative_path = item["relative_path"]
-                content = item["content"]
-                body_content = item["body_content"]
-                frontmatter = item["frontmatter"]
-                wiki_links = item["wiki_links"]
-                tags = item["tags"]
-                content_hash = item["content_hash"]
-                file_info = item["file_info"]
-
-                # Build metadata
-                document_metadata = {
-                    "vault_name": vault_name,
-                    "file_path": relative_path,
-                    "tags": tags,
-                    "outgoing_links": wiki_links,
-                    "frontmatter": frontmatter,
-                    "modified_at": file_info["modified_at"].isoformat(),
-                    "created_at": file_info["created_at"].isoformat(),
-                    "word_count": len(body_content.split()),
-                }
-
-                # Build document content with metadata
-                metadata_sections = [
-                    (
-                        "METADATA",
-                        [
-                            f"Title: {title}",
-                            f"Vault: {vault_name}",
-                            f"Path: {relative_path}",
-                            f"Tags: {', '.join(tags) if tags else 'None'}",
-                            f"Links to: {', '.join(wiki_links) if wiki_links else 'None'}",
-                        ],
-                    ),
-                    ("CONTENT", [body_content]),
-                ]
-                document_string = build_document_metadata_string(metadata_sections)
-
-                # Generate summary
-                summary_content = ""
-                if long_context_llm and connector.enable_summary:
-                    summary_content, _ = await generate_document_summary(
-                        document_string,
-                        long_context_llm,
-                        document_metadata,
-                    )
-
-                # Generate embedding
-                embedding = embed_text(document_string)
-
-                # Add URL and summary to metadata
-                document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}"
-                document_metadata["summary"] = summary_content
-                document_metadata["connector_id"] = connector_id
-
-                # Create chunks
-                chunks = await create_document_chunks(document_string)
-
-                # Update document to READY with actual content
-                document.title = title
-                document.content = document_string
-                document.content_hash = content_hash
-                document.embedding = embedding
-                document.document_metadata = document_metadata
-                await safe_set_chunks(session, document, chunks)
-                document.updated_at = get_current_timestamp()
-                document.status = DocumentStatus.ready()
-
-                indexed_count += 1
-
-                # Batch commit every 10 documents (for ready status updates)
-                if indexed_count % 10 == 0:
-                    logger.info(
-                        f"Committing batch: {indexed_count} Obsidian notes processed so far"
-                    )
-                    await session.commit()
-
-            except Exception as e:
-                logger.exception(
-                    f"Error processing file {item.get('file_info', {}).get('path', 'unknown')}: {e}"
-                )
-                # Mark document as failed with reason (visible in UI)
-                try:
-                    document.status = DocumentStatus.failed(str(e))
-                    document.updated_at = get_current_timestamp()
-                except Exception as status_error:
-                    logger.error(
-                        f"Failed to update document status to failed: {status_error}"
-                    )
-                failed_count += 1
-                continue
-
-        # CRITICAL: Always update timestamp (even if 0 documents indexed) so Zero syncs
-        await update_connector_last_indexed(session, connector, update_last_indexed)
-
-        # Final commit for any remaining documents not yet committed in batches
-        logger.info(f"Final commit: Total {indexed_count} Obsidian notes processed")
-        try:
-            await session.commit()
-            logger.info(
-                "Successfully committed all Obsidian document changes to database"
-            )
-        except Exception as e:
-            # Handle any remaining integrity errors gracefully (race conditions, etc.)
-            if (
-                "duplicate key value violates unique constraint" in str(e).lower()
-                or "uniqueviolationerror" in str(e).lower()
-            ):
-                logger.warning(
-                    f"Duplicate content_hash detected during final commit. "
-                    f"This may occur if the same note was indexed by multiple connectors. "
-                    f"Rolling back and continuing. Error: {e!s}"
-                )
-                await session.rollback()
-                # Don't fail the entire task - some documents may have been successfully indexed
-            else:
-                raise
-
-        # Build warning message if there were issues
-        warning_parts = []
-        if duplicate_content_count > 0:
-            warning_parts.append(f"{duplicate_content_count} duplicate")
-        if failed_count > 0:
-            warning_parts.append(f"{failed_count} failed")
-        warning_message = ", ".join(warning_parts) if warning_parts else None
-
-        total_processed = indexed_count
-
-        await task_logger.log_task_success(
-            log_entry,
-            f"Successfully completed Obsidian vault indexing for connector {connector_id}",
-            {
-                "notes_processed": total_processed,
-                "documents_indexed": indexed_count,
-                "documents_skipped": skipped_count,
-                "documents_failed": failed_count,
-                "duplicate_content_count": duplicate_content_count,
-            },
-        )
-
-        logger.info(
-            f"Obsidian vault indexing completed: {indexed_count} ready, "
-            f"{skipped_count} skipped, {failed_count} failed "
-            f"({duplicate_content_count} duplicate content)"
-        )
-        return total_processed, warning_message
-
-    except SQLAlchemyError as e:
-        logger.exception(f"Database error during Obsidian indexing: {e}")
-        await session.rollback()
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Database error during Obsidian indexing: {e}",
-            "Database error",
-            {"error_type": "SQLAlchemyError"},
-        )
-        return 0, f"Database error: {e}"
-
-    except Exception as e:
-        logger.exception(f"Error during Obsidian indexing: {e}")
-        await task_logger.log_task_failure(
-            log_entry,
-            f"Error during Obsidian indexing: {e}",
-            "Unexpected error",
-            {"error_type": type(e).__name__},
-        )
-        return 0, str(e)
--- a/surfsense_backend/app/utils/periodic_scheduler.py
+++ b/surfsense_backend/app/utils/periodic_scheduler.py
@ -34,7 +34,6 @@ CONNECTOR_TASK_MAP = {
    SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
    SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
    SearchSourceConnectorType.BOOKSTACK_CONNECTOR: "index_bookstack_pages",
-    SearchSourceConnectorType.OBSIDIAN_CONNECTOR: "index_obsidian_vault",
 }


@ -100,7 +99,6 @@ def create_periodic_schedule(
            index_linear_issues_task,
            index_luma_events_task,
            index_notion_pages_task,
-            index_obsidian_vault_task,
            index_slack_messages_task,
        )

@ -121,7 +119,6 @@ def create_periodic_schedule(
            SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
            SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
            SearchSourceConnectorType.BOOKSTACK_CONNECTOR: index_bookstack_pages_task,
-            SearchSourceConnectorType.OBSIDIAN_CONNECTOR: index_obsidian_vault_task,
        }

        # Trigger the first run immediately
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/obsidian-config.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/obsidian-config.tsx
@ -1,15 +1,11 @@
 "use client";

-import { AlertTriangle, Download, Info } from "lucide-react";
+import { Info } from "lucide-react";
 import { type FC, useEffect, useMemo, useState } from "react";
 import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
-import { Button } from "@/components/ui/button";
 import { connectorsApiService, type ObsidianStats } from "@/lib/apis/connectors-api.service";
 import type { ConnectorConfigProps } from "../index";

-const PLUGIN_RELEASES_URL =
-	"https://github.com/MODSetter/SurfSense/releases?q=obsidian&expanded=true";
-
 function formatTimestamp(value: unknown): string {
 	if (typeof value !== "string" || !value) return "—";
 	const d = new Date(value);
@ -26,78 +22,17 @@ function formatTimestamp(value: unknown): string {
 * web UI doesn't expose a Name input or a Save button for Obsidian (the
 * latter is suppressed in `connector-edit-view.tsx`).
 *
- * Renders one of three modes depending on the connector's `config`:
- *
- * 1. **Plugin connector** (`config.source === "plugin"`) — read-only stats
- *    panel showing what the plugin most recently reported.
- * 2. **Legacy server-path connector** (`config.legacy === true`, set by the
- *    Phase 3 alembic) — migration banner, an "Install Plugin" CTA, and a
- *    short "how to migrate" checklist that ends with the user pressing the
- *    standard Disconnect button (which deletes this connector along with
- *    every document it previously indexed).
- * 3. **Unknown** — fallback for rows that escaped the alembic; suggests a
- *    clean re-install.
+ * Renders plugin stats when connector metadata comes from the plugin.
+ * If metadata is missing or malformed, we show a recovery hint.
 */
 export const ObsidianConfig: FC<ConnectorConfigProps> = ({ connector }) => {
 	const config = (connector.config ?? {}) as Record<string, unknown>;
-	const isLegacy = config.legacy === true;
 	const isPlugin = config.source === "plugin";

-	if (isLegacy) return <LegacyBanner />;
 	if (isPlugin) return <PluginStats config={config} />;
 	return <UnknownConnectorState />;
 };

-const LegacyBanner: FC = () => {
-	return (
-		<div className="space-y-4">
-			<Alert className="border-amber-500/40 bg-amber-500/10">
-				<AlertTriangle className="size-4 shrink-0 text-amber-500" />
-				<AlertTitle className="text-xs sm:text-sm">
-					Sync stopped — install the plugin to migrate
-				</AlertTitle>
-				<AlertDescription className="text-[11px] sm:text-xs leading-relaxed">
-					This Obsidian connector used the legacy server-path scanner, which has been removed. The
-					notes already indexed remain searchable, but they no longer reflect changes made in your
-					vault.
-				</AlertDescription>
-			</Alert>
-
-			<a
-				href={PLUGIN_RELEASES_URL}
-				target="_blank"
-				rel="noopener noreferrer"
-				className="inline-flex"
-			>
-				<Button type="button" variant="outline" size="sm" className="gap-2">
-					<Download className="size-3.5" />
-					Install the plugin
-				</Button>
-			</a>
-
-			<div className="rounded-xl border border-border bg-slate-400/5 p-3 sm:p-6 dark:bg-white/5">
-				<h3 className="mb-3 text-sm font-medium sm:text-base">How to migrate</h3>
-				<ol className="list-decimal space-y-2 pl-5 text-[11px] leading-relaxed text-muted-foreground sm:text-xs">
-					<li>Install the SurfSense Obsidian plugin using the button above.</li>
-					<li>
-						In Obsidian, open Settings → SurfSense, sign in, pick a search space, and wait for the
-						first sync to finish.
-					</li>
-					<li>
-						Confirm the new "Obsidian — &lt;vault&gt;" connector shows your notes, then return here
-						and use the Disconnect button below to remove this legacy connector.
-					</li>
-				</ol>
-				<p className="mt-3 text-[11px] leading-relaxed text-amber-600 dark:text-amber-400 sm:text-xs">
-					Heads up: Disconnect also deletes every document this connector previously indexed. Make
-					sure the plugin has finished its first sync before you disconnect, otherwise your Obsidian
-					notes will disappear from search until the plugin re-indexes them.
-				</p>
-			</div>
-		</div>
-	);
-};
-
 const PluginStats: FC<{ config: Record<string, unknown> }> = ({ config }) => {
 	const vaultId = typeof config.vault_id === "string" ? config.vault_id : null;
 	const [stats, setStats] = useState<ObsidianStats | null>(null);
@ -179,8 +114,8 @@ const UnknownConnectorState: FC = () => (
 		<Info className="size-4 shrink-0" />
 		<AlertTitle className="text-xs sm:text-sm">Unrecognized config</AlertTitle>
 		<AlertDescription className="text-[11px] sm:text-xs">
-			This connector has neither plugin metadata nor a legacy marker. It may predate the migration —
-			you can safely delete it and re-install the SurfSense Obsidian plugin to resume syncing.
+			This connector is missing plugin metadata. Delete it, then reconnect your vault from the
+			SurfSense Obsidian plugin so sync can resume.
 		</AlertDescription>
 	</Alert>
 );
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx
@ -111,7 +111,9 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
 								: getConnectorTypeDisplay(connectorType)}
 						</h2>
 						<p className="text-xs sm:text-base text-muted-foreground mt-1">
-							Enter your connection details
+							{connectorType === "OBSIDIAN_CONNECTOR"
+								? "Follow the plugin setup steps below"
+								: "Enter your connection details"}
 						</p>
 					</div>
 				</div>
--- a/surfsense_web/content/docs/connectors/obsidian.mdx
+++ b/surfsense_web/content/docs/connectors/obsidian.mdx
@ -1,143 +1,60 @@
 ---
 title: Obsidian
-description: Connect your Obsidian vault to SurfSense
+description: Sync your Obsidian vault with the SurfSense plugin
 ---

-# Obsidian Integration Setup Guide
+# Obsidian Plugin Setup Guide

-This guide walks you through connecting your Obsidian vault to SurfSense for note search and AI-powered insights.
-
-<Callout type="warn">
-    This connector requires direct file system access and only works with self-hosted SurfSense installations.
-</Callout>
+SurfSense integrates with Obsidian through the SurfSense Obsidian plugin.
+The old server-side vault path scanner is no longer supported.

 ## How it works

-The Obsidian connector scans your local Obsidian vault directory and indexes all Markdown files. It preserves your note structure and extracts metadata from YAML frontmatter.
+The plugin runs inside your Obsidian app and pushes note updates to SurfSense over HTTPS.
+This works for cloud and self-hosted deployments, including desktop and mobile clients.

- For follow-up indexing runs, the connector uses content hashing to skip unchanged files for faster sync.
- Indexing should be configured to run periodically, so updates should appear in your search results within minutes.
-
---
-
-## What Gets Indexed
+## What gets indexed

 | Content Type | Description |
 |--------------|-------------|
-| Markdown Files | All `.md` files in your vault |
-| Frontmatter | YAML metadata (title, tags, aliases, dates) |
-| Wiki Links | Links between notes (`[[note]]`) |
-| Inline Tags | Tags throughout your notes (`#tag`) |
-| Note Content | Full content with intelligent chunking |
+| Markdown files | Note content (`.md`) |
+| Frontmatter | YAML metadata like title, tags, aliases, dates |
+| Wiki links | Linked notes (`[[note]]`) |
+| Tags | Inline and frontmatter tags |
+| Vault metadata | Vault and path metadata used for deep links and sync state |

-<Callout type="warn">
-    Binary files and attachments are not indexed by default. Enable "Include Attachments" to index embedded files.
-</Callout>
+## Quick start

---
-
-## Quick Start (Local Installation)
-
-1. Navigate to **Connectors** → **Add Connector** → **Obsidian**
-2. Enter your vault path: `/Users/yourname/Documents/MyVault`
-3. Enter a vault name (e.g., `Personal Notes`)
-4. Click **Connect Obsidian**
+1. Open **Connectors** in SurfSense and choose **Obsidian**.
+2. Click **Open plugin releases** and install the latest SurfSense Obsidian plugin.
+3. In Obsidian, open **Settings → SurfSense**.
+4. Paste your SurfSense API token from the connector setup panel.
+5. Paste your SurfSense backend URL in the plugin's **Server URL** setting.
+6. Choose the Search Space in the plugin, then run the first sync.
+7. Confirm the connector appears as **Obsidian — <vault>** in SurfSense.

 <Callout type="info">
-    Find your vault path: In Obsidian, right-click any note → "Reveal in Finder" (macOS) or "Show in Explorer" (Windows).
+    You do not create or configure a vault path in the web UI. The connector row is created automatically when the plugin calls `/api/v1/obsidian/connect`.
 </Callout>

-<Callout type="info" title="Periodic Sync">
-Enable periodic sync to automatically re-index notes when content changes. Available frequencies: Every 5 minutes, 15 minutes, hourly, every 6 hours, daily, or weekly.
-</Callout>
+## Self-hosted notes

---
-
-## Docker Setup
-
-For Docker deployments, you need to mount your Obsidian vault as a volume.
-
-### Step 1: Update docker-compose.yml
-
-Add your vault as a volume mount to the SurfSense backend service:
-
-```yaml
-services:
-  surfsense:
-    # ... other config
-    volumes:
-      - /path/to/your/obsidian/vault:/app/obsidian_vaults/my-vault:ro
-```
-
-<Callout type="info">
-    The `:ro` flag mounts the vault as read-only, which is recommended for security.
-</Callout>
-
-### Step 2: Configure the Connector
-
-Use the **container path** (not your local path) when setting up the connector:
-
-| Your Local Path | Container Path (use this) |
-|-----------------|---------------------------|
-| `/Users/john/Documents/MyVault` | `/app/obsidian_vaults/my-vault` |
-| `C:\Users\john\Documents\MyVault` | `/app/obsidian_vaults/my-vault` |
-
-### Example: Multiple Vaults
-
-```yaml
-volumes:
-  - /Users/john/Documents/PersonalNotes:/app/obsidian_vaults/personal:ro
-  - /Users/john/Documents/WorkNotes:/app/obsidian_vaults/work:ro
-```
-
-Then create separate connectors for each vault using `/app/obsidian_vaults/personal` and `/app/obsidian_vaults/work`.
-
---
-
-## Connector Configuration
-
-| Field | Description | Required |
-|-------|-------------|----------|
-| **Connector Name** | A friendly name to identify this connector | Yes |
-| **Vault Path** | Absolute path to your vault (container path for Docker) | Yes |
-| **Vault Name** | Display name for your vault in search results | Yes |
-| **Exclude Folders** | Comma-separated folder names to skip | No |
-| **Include Attachments** | Index embedded files (images, PDFs) | No |
-
---
-
-## Recommended Exclusions
-
-Common folders to exclude from indexing:
-
-| Folder | Reason |
-|--------|--------|
-| `.obsidian` | Obsidian config files (always exclude) |
-| `.trash` | Obsidian's trash folder |
-| `templates` | Template files you don't want searchable |
-| `daily-notes` | If you want to exclude daily notes |
-| `attachments` | If not using "Include Attachments" |
-
-Default exclusions: `.obsidian,.trash`
-
---
+- Use your public or LAN backend URL that your Obsidian device can reach.
+- No Docker bind mount for the vault is required.
+- If your instance is behind TLS, ensure the URL/certificate is valid for the device running Obsidian.

 ## Troubleshooting

-**Vault not found / Permission denied**
- Verify the path exists and is accessible
- For Docker: ensure the volume is mounted correctly in `docker-compose.yml`
- Check file permissions: SurfSense needs read access to the vault directory
+**Plugin connects but no files appear**
+- Verify the plugin is pointed to the correct Search Space.
+- Trigger a manual sync from the plugin settings.
+- Confirm your API token is valid and not expired.

-**No notes indexed**
- Ensure your vault contains `.md` files
- Check that notes aren't in excluded folders
- Verify the path points to the vault root (contains `.obsidian` folder)
+**Unauthorized / 401 errors**
+- Regenerate and paste a fresh API token from SurfSense.
+- Ensure the token belongs to the same account and workspace you are syncing into.

-**Changes not appearing**
- Wait for the next sync cycle, or manually trigger re-indexing
- For Docker: restart the container if you modified volume mounts
-
-**Docker: "path not found" error**
- Use the container path (`/app/obsidian_vaults/...`), not your local path
- Verify the volume mount in `docker-compose.yml` matches
+**Cannot reach server URL**
+- Check that the backend URL is reachable from the Obsidian device.
+- For self-hosted setups, verify firewall and reverse proxy rules.
+- Avoid using localhost unless SurfSense and Obsidian run on the same machine.