diff --git a/surfsense_backend/alembic/versions/74_add_obsidian_connector.py b/surfsense_backend/alembic/versions/74_add_obsidian_connector.py new file mode 100644 index 000000000..b34cd07a0 --- /dev/null +++ b/surfsense_backend/alembic/versions/74_add_obsidian_connector.py @@ -0,0 +1,33 @@ +"""Add Obsidian connector enums + +Revision ID: 74_add_obsidian_connector +Revises: 73_add_user_memories_table +Create Date: 2026-01-21 + +""" + +from collections.abc import Sequence + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "74_add_obsidian_connector" +down_revision: str | None = "73_add_user_memories_table" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + # Add OBSIDIAN_CONNECTOR to documenttype enum + op.execute("ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'OBSIDIAN_CONNECTOR'") + + # Add OBSIDIAN_CONNECTOR to searchsourceconnectortype enum + op.execute( + "ALTER TYPE searchsourceconnectortype ADD VALUE IF NOT EXISTS 'OBSIDIAN_CONNECTOR'" + ) + + +def downgrade() -> None: + # Note: PostgreSQL doesn't support removing enum values directly. + # The values will remain in the enum type but won't be used. + pass diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 448e2c253..a59b23391 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -61,6 +61,21 @@ class Config: "FFmpeg is not installed on the system. Please install it to use the Surfsense Podcaster." ) + # Deployment Mode (self-hosted or cloud) + # self-hosted: Full access to local file system connectors (Obsidian, etc.) + # cloud: Only cloud-based connectors available + DEPLOYMENT_MODE = os.getenv("SURFSENSE_DEPLOYMENT_MODE", "self-hosted") + + @classmethod + def is_self_hosted(cls) -> bool: + """Check if running in self-hosted mode.""" + return cls.DEPLOYMENT_MODE == "self-hosted" + + @classmethod + def is_cloud(cls) -> bool: + """Check if running in cloud mode.""" + return cls.DEPLOYMENT_MODE == "cloud" + # Database DATABASE_URL = os.getenv("DATABASE_URL") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index c56d19355..723aaaf8b 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -53,6 +53,7 @@ class DocumentType(str, Enum): ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR" BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR" CIRCLEBACK = "CIRCLEBACK" + OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR" NOTE = "NOTE" @@ -80,6 +81,7 @@ class SearchSourceConnectorType(str, Enum): WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR" BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR" CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR" + OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR" # Self-hosted only - Local Obsidian vault indexing MCP_CONNECTOR = "MCP_CONNECTOR" # Model Context Protocol - User-defined API tools diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index f6319653f..4fb8698ec 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -868,6 +868,25 @@ async def index_connector_content( ) response_message = "Web page indexing started in the background." + elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR: + from app.config import config as app_config + from app.tasks.celery_tasks.connector_tasks import index_obsidian_vault_task + + # Obsidian connector only available in self-hosted mode + if not app_config.is_self_hosted(): + raise HTTPException( + status_code=400, + detail="Obsidian connector is only available in self-hosted mode", + ) + + logger.info( + f"Triggering Obsidian vault indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}" + ) + index_obsidian_vault_task.delay( + connector_id, search_space_id, str(user.id), indexing_from, indexing_to + ) + response_message = "Obsidian vault indexing started in the background." + else: raise HTTPException( status_code=400, @@ -2073,6 +2092,60 @@ async def run_bookstack_indexing( ) +# Add new helper functions for Obsidian indexing +async def run_obsidian_indexing_with_new_session( + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Wrapper to run Obsidian indexing with its own database session.""" + logger.info( + f"Background task started: Indexing Obsidian connector {connector_id} into space {search_space_id} from {start_date} to {end_date}" + ) + async with async_session_maker() as session: + await run_obsidian_indexing( + session, connector_id, search_space_id, user_id, start_date, end_date + ) + logger.info( + f"Background task finished: Indexing Obsidian connector {connector_id}" + ) + + +async def run_obsidian_indexing( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """ + Background task to run Obsidian vault indexing. + + Args: + session: Database session + connector_id: ID of the Obsidian connector + search_space_id: ID of the search space + user_id: ID of the user + start_date: Start date for indexing + end_date: End date for indexing + """ + from app.tasks.connector_indexers import index_obsidian_vault + + await _run_indexing_with_notifications( + session=session, + connector_id=connector_id, + search_space_id=search_space_id, + user_id=user_id, + start_date=start_date, + end_date=end_date, + indexing_function=index_obsidian_vault, + update_timestamp_func=_update_connector_timestamp_by_id, + ) + + # ============================================================================= # MCP Connector Routes # ============================================================================= diff --git a/surfsense_backend/app/schemas/obsidian_auth_credentials.py b/surfsense_backend/app/schemas/obsidian_auth_credentials.py new file mode 100644 index 000000000..ab178eac8 --- /dev/null +++ b/surfsense_backend/app/schemas/obsidian_auth_credentials.py @@ -0,0 +1,59 @@ +""" +Obsidian Connector Credentials Schema. + +Obsidian is a local-first note-taking app that stores notes as markdown files. +This connector supports indexing from local file system (self-hosted only). +""" + +from pydantic import BaseModel, field_validator + + +class ObsidianAuthCredentialsBase(BaseModel): + """ + Credentials/configuration for the Obsidian connector. + + Since Obsidian vaults are local directories, this schema primarily + holds the vault path and configuration options rather than API tokens. + """ + + vault_path: str + vault_name: str | None = None + exclude_folders: list[str] | None = None + include_attachments: bool = False + + @field_validator("vault_path") + @classmethod + def validate_vault_path(cls, v: str) -> str: + """Ensure vault path is provided and stripped of whitespace.""" + if not v or not v.strip(): + raise ValueError("Vault path is required") + return v.strip() + + @field_validator("exclude_folders", mode="before") + @classmethod + def parse_exclude_folders(cls, v): + """Parse exclude_folders from string if needed.""" + if v is None: + return [".trash", ".obsidian", "templates"] + if isinstance(v, str): + return [f.strip() for f in v.split(",") if f.strip()] + return v + + def to_dict(self) -> dict: + """Convert credentials to dictionary for storage.""" + return { + "vault_path": self.vault_path, + "vault_name": self.vault_name, + "exclude_folders": self.exclude_folders, + "include_attachments": self.include_attachments, + } + + @classmethod + def from_dict(cls, data: dict) -> "ObsidianAuthCredentialsBase": + """Create credentials from dictionary.""" + return cls( + vault_path=data.get("vault_path", ""), + vault_name=data.get("vault_name"), + exclude_folders=data.get("exclude_folders"), + include_attachments=data.get("include_attachments", False), + ) diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py index 78fa595de..b44d7c9a4 100644 --- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py @@ -759,3 +759,46 @@ async def _index_bookstack_pages( await run_bookstack_indexing( session, connector_id, search_space_id, user_id, start_date, end_date ) + + +@celery_app.task(name="index_obsidian_vault", bind=True) +def index_obsidian_vault_task( + self, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Celery task to index Obsidian vault notes.""" + import asyncio + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + loop.run_until_complete( + _index_obsidian_vault( + connector_id, search_space_id, user_id, start_date, end_date + ) + ) + finally: + loop.close() + + +async def _index_obsidian_vault( + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str, + end_date: str, +): + """Index Obsidian vault with new session.""" + from app.routes.search_source_connectors_routes import ( + run_obsidian_indexing, + ) + + async with get_celery_session_maker()() as session: + await run_obsidian_indexing( + session, connector_id, search_space_id, user_id, start_date, end_date + ) diff --git a/surfsense_backend/app/tasks/connector_indexers/__init__.py b/surfsense_backend/app/tasks/connector_indexers/__init__.py index 95e57ddf2..d1f89f333 100644 --- a/surfsense_backend/app/tasks/connector_indexers/__init__.py +++ b/surfsense_backend/app/tasks/connector_indexers/__init__.py @@ -44,6 +44,7 @@ from .luma_indexer import index_luma_events # Documentation and knowledge management from .notion_indexer import index_notion_pages +from .obsidian_indexer import index_obsidian_vault from .slack_indexer import index_slack_messages from .webcrawler_indexer import index_crawled_urls @@ -65,6 +66,7 @@ __all__ = [ # noqa: RUF022 "index_linear_issues", # Documentation and knowledge management "index_notion_pages", + "index_obsidian_vault", "index_crawled_urls", # Communication platforms "index_slack_messages", diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py new file mode 100644 index 000000000..15c88938a --- /dev/null +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -0,0 +1,489 @@ +""" +Obsidian connector indexer. + +Indexes markdown notes from a local Obsidian vault. +This connector is only available in self-hosted mode. +""" + +import os +import re +from datetime import datetime, UTC +from pathlib import Path + +import yaml +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import config +from app.db import Document, DocumentType, SearchSourceConnectorType +from app.services.llm_service import get_user_long_context_llm +from app.services.task_logging_service import TaskLoggingService +from app.utils.document_converters import ( + create_document_chunks, + generate_content_hash, + generate_document_summary, + generate_unique_identifier_hash, +) + +from .base import ( + build_document_metadata_string, + check_document_by_unique_identifier, + get_connector_by_id, + get_current_timestamp, + logger, + update_connector_last_indexed, +) + + +def parse_frontmatter(content: str) -> tuple[dict | None, str]: + """ + Parse YAML frontmatter from markdown content. + + Args: + content: The full markdown content + + Returns: + Tuple of (frontmatter dict or None, content without frontmatter) + """ + if not content.startswith("---"): + return None, content + + # Find the closing --- + end_match = re.search(r"\n---\n", content[3:]) + if not end_match: + return None, content + + frontmatter_str = content[3 : end_match.start() + 3] + remaining_content = content[end_match.end() + 3 :] + + try: + frontmatter = yaml.safe_load(frontmatter_str) + return frontmatter, remaining_content.strip() + except yaml.YAMLError: + return None, content + + +def extract_wiki_links(content: str) -> list[str]: + """ + Extract [[wiki-style links]] from content. + + Args: + content: Markdown content + + Returns: + List of linked note names + """ + # Match [[link]] or [[link|alias]] + pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]" + matches = re.findall(pattern, content) + return list(set(matches)) + + +def extract_tags(content: str) -> list[str]: + """ + Extract #tags from content (both inline and frontmatter). + + Args: + content: Markdown content + + Returns: + List of tags (without # prefix) + """ + # Match #tag but not ## headers + pattern = r"(? list[dict]: + """ + Scan an Obsidian vault for markdown files. + + Args: + vault_path: Path to the Obsidian vault + exclude_folders: List of folder names to exclude + + Returns: + List of file info dicts with path, name, modified time + """ + if exclude_folders is None: + exclude_folders = [".trash", ".obsidian", "templates"] + + vault = Path(vault_path) + if not vault.exists(): + raise ValueError(f"Vault path does not exist: {vault_path}") + + files = [] + for md_file in vault.rglob("*.md"): + # Check if file is in an excluded folder + relative_path = md_file.relative_to(vault) + parts = relative_path.parts + + if any(excluded in parts for excluded in exclude_folders): + continue + + try: + stat = md_file.stat() + files.append( + { + "path": str(md_file), + "relative_path": str(relative_path), + "name": md_file.stem, + "modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC), + "created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC), + "size": stat.st_size, + } + ) + except OSError as e: + logger.warning(f"Could not stat file {md_file}: {e}") + + return files + + +async def index_obsidian_vault( + session: AsyncSession, + connector_id: int, + search_space_id: int, + user_id: str, + start_date: str | None = None, + end_date: str | None = None, + update_last_indexed: bool = True, +) -> tuple[int, str | None]: + """ + Index notes from a local Obsidian vault. + + This indexer is only available in self-hosted mode as it requires + direct file system access to the user's Obsidian vault. + + Args: + session: Database session + connector_id: ID of the Obsidian connector + search_space_id: ID of the search space to store documents in + user_id: ID of the user + start_date: Start date for filtering (YYYY-MM-DD format) - optional + end_date: End date for filtering (YYYY-MM-DD format) - optional + update_last_indexed: Whether to update the last_indexed_at timestamp + + Returns: + Tuple containing (number of documents indexed, error message or None) + """ + task_logger = TaskLoggingService(session, search_space_id) + + # Check if self-hosted mode + if not config.is_self_hosted(): + return 0, "Obsidian connector is only available in self-hosted mode" + + # Log task start + log_entry = await task_logger.log_task_start( + task_name="obsidian_vault_indexing", + source="connector_indexing_task", + message=f"Starting Obsidian vault indexing for connector {connector_id}", + metadata={ + "connector_id": connector_id, + "user_id": str(user_id), + "start_date": start_date, + "end_date": end_date, + }, + ) + + try: + # Get the connector + await task_logger.log_task_progress( + log_entry, + f"Retrieving Obsidian connector {connector_id} from database", + {"stage": "connector_retrieval"}, + ) + + connector = await get_connector_by_id( + session, connector_id, SearchSourceConnectorType.OBSIDIAN_CONNECTOR + ) + + if not connector: + await task_logger.log_task_failure( + log_entry, + f"Connector with ID {connector_id} not found or is not an Obsidian connector", + "Connector not found", + {"error_type": "ConnectorNotFound"}, + ) + return ( + 0, + f"Connector with ID {connector_id} not found or is not an Obsidian connector", + ) + + # Get vault path from connector config + vault_path = connector.config.get("vault_path") + if not vault_path: + await task_logger.log_task_failure( + log_entry, + "Vault path not configured for this connector", + "Missing vault path", + {"error_type": "MissingVaultPath"}, + ) + return 0, "Vault path not configured for this connector" + + # Validate vault path exists + if not os.path.exists(vault_path): + await task_logger.log_task_failure( + log_entry, + f"Vault path does not exist: {vault_path}", + "Vault path not found", + {"error_type": "VaultNotFound", "vault_path": vault_path}, + ) + return 0, f"Vault path does not exist: {vault_path}" + + # Get configuration options + exclude_folders = connector.config.get( + "exclude_folders", [".trash", ".obsidian", "templates"] + ) + vault_name = connector.config.get("vault_name") or os.path.basename(vault_path) + + await task_logger.log_task_progress( + log_entry, + f"Scanning Obsidian vault: {vault_name}", + {"stage": "vault_scan", "vault_path": vault_path}, + ) + + # Scan vault for markdown files + try: + files = scan_vault(vault_path, exclude_folders) + except Exception as e: + await task_logger.log_task_failure( + log_entry, + f"Failed to scan vault: {e}", + "Vault scan error", + {"error_type": "VaultScanError"}, + ) + return 0, f"Failed to scan vault: {e}" + + logger.info(f"Found {len(files)} markdown files in vault") + + await task_logger.log_task_progress( + log_entry, + f"Found {len(files)} markdown files to process", + {"stage": "files_discovered", "file_count": len(files)}, + ) + + # Filter by date if provided + if start_date: + start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC) + files = [f for f in files if f["modified_at"] >= start_dt] + + if end_date: + end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC) + files = [f for f in files if f["modified_at"] <= end_dt] + + # Get LLM for summarization + long_context_llm = await get_user_long_context_llm(session, user_id) + + indexed_count = 0 + skipped_count = 0 + + for file_info in files: + try: + file_path = file_info["path"] + relative_path = file_info["relative_path"] + + # Read file content + try: + with open(file_path, encoding="utf-8") as f: + content = f.read() + except UnicodeDecodeError: + logger.warning(f"Could not decode file {file_path}, skipping") + skipped_count += 1 + continue + + if not content.strip(): + logger.debug(f"Empty file {file_path}, skipping") + skipped_count += 1 + continue + + # Parse frontmatter and extract metadata + frontmatter, body_content = parse_frontmatter(content) + wiki_links = extract_wiki_links(content) + tags = extract_tags(content) + + # Get title from frontmatter or filename + title = file_info["name"] + if frontmatter: + title = frontmatter.get("title", title) + # Also extract tags from frontmatter + fm_tags = frontmatter.get("tags", []) + if isinstance(fm_tags, list): + tags = list(set(tags + fm_tags)) + elif isinstance(fm_tags, str): + tags = list(set(tags + [fm_tags])) + + # Generate unique identifier using vault name and relative path + unique_identifier = f"{vault_name}:{relative_path}" + unique_identifier_hash = generate_unique_identifier_hash( + DocumentType.OBSIDIAN_CONNECTOR, + unique_identifier, + search_space_id, + ) + + # Check for existing document + existing_document = await check_document_by_unique_identifier( + session, unique_identifier_hash + ) + + # Generate content hash + content_hash = generate_content_hash(content) + + # Build metadata + document_metadata = { + "vault_name": vault_name, + "file_path": relative_path, + "tags": tags, + "outgoing_links": wiki_links, + "frontmatter": frontmatter, + "modified_at": file_info["modified_at"].isoformat(), + "created_at": file_info["created_at"].isoformat(), + "word_count": len(body_content.split()), + } + + # Build document content with metadata + metadata_sections = [ + ( + "METADATA", + [ + f"Title: {title}", + f"Vault: {vault_name}", + f"Path: {relative_path}", + f"Tags: {', '.join(tags) if tags else 'None'}", + f"Links to: {', '.join(wiki_links) if wiki_links else 'None'}", + ], + ), + ("CONTENT", [body_content]), + ] + document_string = build_document_metadata_string(metadata_sections) + + if existing_document: + # Check if content has changed + if existing_document.content_hash == content_hash: + logger.debug(f"Note {title} unchanged, skipping") + skipped_count += 1 + continue + + # Update existing document + logger.info(f"Updating note: {title}") + + # Generate new summary if content changed + if long_context_llm: + new_summary = await generate_document_summary( + content=document_string, + llm=long_context_llm, + ) + existing_document.summary = new_summary + + existing_document.content = document_string + existing_document.content_hash = content_hash + existing_document.document_metadata = document_metadata + existing_document.updated_at = get_current_timestamp() + + # Update embedding + embedding = config.embedding_model_instance.embed(document_string) + existing_document.embedding = embedding + + # Update chunks + await create_document_chunks( + session=session, + document=existing_document, + content=document_string, + chunker=config.chunker_instance, + embedding_model=config.embedding_model_instance, + ) + + indexed_count += 1 + + else: + # Create new document + logger.info(f"Indexing new note: {title}") + + # Generate summary + summary = "" + if long_context_llm: + summary = await generate_document_summary( + content=document_string, + llm=long_context_llm, + ) + + # Generate embedding + embedding = config.embedding_model_instance.embed(document_string) + + # Create document + new_document = Document( + search_space_id=search_space_id, + title=title, + url=f"obsidian://{vault_name}/{relative_path}", + document_type=DocumentType.OBSIDIAN_CONNECTOR, + content=document_string, + content_hash=content_hash, + unique_identifier_hash=unique_identifier_hash, + document_metadata=document_metadata, + summary=summary, + embedding=embedding, + connector_id=connector_id, + ) + + session.add(new_document) + await session.flush() + + # Create chunks + await create_document_chunks( + session=session, + document=new_document, + content=document_string, + chunker=config.chunker_instance, + embedding_model=config.embedding_model_instance, + ) + + indexed_count += 1 + + except Exception as e: + logger.exception( + f"Error processing file {file_info.get('path', 'unknown')}: {e}" + ) + skipped_count += 1 + continue + + # Update connector's last indexed timestamp + await update_connector_last_indexed(session, connector, update_last_indexed) + + # Commit all changes + await session.commit() + + await task_logger.log_task_success( + log_entry, + f"Successfully indexed {indexed_count} Obsidian notes (skipped {skipped_count})", + { + "indexed_count": indexed_count, + "skipped_count": skipped_count, + "total_files": len(files), + }, + ) + + return indexed_count, None + + except SQLAlchemyError as e: + logger.exception(f"Database error during Obsidian indexing: {e}") + await session.rollback() + await task_logger.log_task_failure( + log_entry, + f"Database error during Obsidian indexing: {e}", + "Database error", + {"error_type": "SQLAlchemyError"}, + ) + return 0, f"Database error: {e}" + + except Exception as e: + logger.exception(f"Error during Obsidian indexing: {e}") + await task_logger.log_task_failure( + log_entry, + f"Error during Obsidian indexing: {e}", + "Unexpected error", + {"error_type": type(e).__name__}, + ) + return 0, str(e) diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx new file mode 100644 index 000000000..acdbc4c1f --- /dev/null +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx @@ -0,0 +1,448 @@ +"use client"; + +import { zodResolver } from "@hookform/resolvers/zod"; +import { FolderOpen, Info } from "lucide-react"; +import type { FC } from "react"; +import { useRef, useState } from "react"; +import { useForm } from "react-hook-form"; +import * as z from "zod"; +import { + Accordion, + AccordionContent, + AccordionItem, + AccordionTrigger, +} from "@/components/ui/accordion"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; +import { + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from "@/components/ui/form"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Switch } from "@/components/ui/switch"; +import { EnumConnectorName } from "@/contracts/enums/connector"; +import { getConnectorBenefits } from "../connector-benefits"; +import type { ConnectFormProps } from "../index"; + +const obsidianConnectorFormSchema = z.object({ + name: z.string().min(3, { + message: "Connector name must be at least 3 characters.", + }), + vault_path: z.string().min(1, { + message: "Vault path is required.", + }), + vault_name: z.string().min(1, { + message: "Vault name is required.", + }), + exclude_folders: z.string().optional(), + include_attachments: z.boolean(), +}); + +type ObsidianConnectorFormValues = z.infer; + +export const ObsidianConnectForm: FC = ({ onSubmit, isSubmitting }) => { + const isSubmittingRef = useRef(false); + const [periodicEnabled, setPeriodicEnabled] = useState(true); + const [frequencyMinutes, setFrequencyMinutes] = useState("60"); + const form = useForm({ + resolver: zodResolver(obsidianConnectorFormSchema), + defaultValues: { + name: "Obsidian Vault", + vault_path: "", + vault_name: "", + exclude_folders: ".obsidian,.trash", + include_attachments: false, + }, + }); + + const handleSubmit = async (values: ObsidianConnectorFormValues) => { + // Prevent multiple submissions + if (isSubmittingRef.current || isSubmitting) { + return; + } + + isSubmittingRef.current = true; + try { + // Parse exclude_folders into an array + const excludeFolders = values.exclude_folders + ? values.exclude_folders + .split(",") + .map((f) => f.trim()) + .filter(Boolean) + : [".obsidian", ".trash"]; + + await onSubmit({ + name: values.name, + connector_type: EnumConnectorName.OBSIDIAN_CONNECTOR, + config: { + vault_path: values.vault_path, + vault_name: values.vault_name, + exclude_folders: excludeFolders, + include_attachments: values.include_attachments, + }, + is_indexable: true, + is_active: true, + last_indexed_at: null, + periodic_indexing_enabled: periodicEnabled, + indexing_frequency_minutes: periodicEnabled ? Number.parseInt(frequencyMinutes, 10) : null, + next_scheduled_at: null, + periodicEnabled, + frequencyMinutes, + }); + } finally { + isSubmittingRef.current = false; + } + }; + + return ( +
+ + +
+ Self-Hosted Only + + This connector requires direct file system access and only works with self-hosted + SurfSense installations. + +
+
+ +
+
+ + ( + + Connector Name + + + + + A friendly name to identify this connector. + + + + )} + /> + + ( + + Vault Path + + + + + The absolute path to your Obsidian vault on the server. This must be accessible + from the SurfSense backend. + + + + )} + /> + + ( + + Vault Name + + + + + A display name for your vault. This will be used in search results. + + + + )} + /> + + ( + + Exclude Folders + + + + + Comma-separated list of folder names to exclude from indexing. + + + + )} + /> + + ( + +
+ Include Attachments + + Index attachment folders and embedded files (images, PDFs, etc.) + +
+ + + +
+ )} + /> + + {/* Indexing Configuration */} +
+

Indexing Configuration

+ + {/* Periodic Sync Config */} +
+
+
+

Enable Periodic Sync

+

+ Automatically re-index at regular intervals +

+
+ +
+ + {periodicEnabled && ( +
+
+ + +
+
+ )} +
+
+ + +
+ + {/* What you get section */} + {getConnectorBenefits(EnumConnectorName.OBSIDIAN_CONNECTOR) && ( +
+

+ What you get with Obsidian integration: +

+
    + {getConnectorBenefits(EnumConnectorName.OBSIDIAN_CONNECTOR)?.map((benefit) => ( +
  • {benefit}
  • + ))} +
+
+ )} + + {/* Documentation Section */} + + + + Documentation + + +
+

How it works

+

+ The Obsidian connector scans your local Obsidian vault directory and indexes all + Markdown files. It preserves your note structure and extracts metadata from YAML + frontmatter. +

+
    +
  • + The connector parses frontmatter metadata (title, tags, aliases, dates, etc.) +
  • +
  • Wiki-style links ([[note]]) are extracted and preserved
  • +
  • Inline tags (#tag) are recognized and indexed
  • +
  • Content is chunked intelligently for optimal search results
  • +
  • + Subsequent indexing runs use content hashing to skip unchanged files for faster + sync +
  • +
+
+ +
+
+

Setup

+ + + + File System Access Required + + + The SurfSense backend must have read access to your Obsidian vault directory. For + Docker deployments, mount your vault as a volume. + + + +
+
+

+ Step 1: Locate your vault +

+
    +
  1. Open Obsidian and go to Settings → About
  2. +
  3. Look for "Vault path" to find the location
  4. +
  5. Or right-click any note and select "Reveal in Finder/Explorer"
  6. +
+
+ +
+

+ Step 2: Mount vault for Docker +

+

+ If running SurfSense in Docker, add a volume mount: +

+
+											{`volumes:
+  - /path/to/your/vault:/app/obsidian_vaults/my-vault:ro`}
+										
+

+ Then use /app/obsidian_vaults/my-vault as your vault path. +

+
+ +
+

+ Step 3: Configure exclusions +

+

+ Common folders to exclude: +

+
    +
  • + .obsidian - Obsidian config (always recommended) +
  • +
  • + .trash - Obsidian's trash folder +
  • +
  • + templates - If you have a templates folder +
  • +
  • + daily-notes - If you want to exclude daily notes +
  • +
+
+
+
+
+ +
+
+

What Gets Indexed

+ + + Indexed Content + +

The Obsidian connector indexes:

+
    +
  • All Markdown files (.md) in your vault
  • +
  • YAML frontmatter metadata (title, tags, aliases, dates)
  • +
  • Wiki-style links between notes
  • +
  • Inline tags throughout your notes
  • +
  • Full note content with proper chunking
  • +
+
+
+
+
+
+
+
+
+ ); +}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts index 9a74132ed..392de4bc8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/connector-benefits.ts @@ -108,6 +108,14 @@ export function getConnectorBenefits(connectorType: string): string[] | null { "Real-time updates via webhook integration", "No manual indexing required - meetings are added automatically", ], + OBSIDIAN_CONNECTOR: [ + "Search through all your Obsidian notes and knowledge base", + "Access note content with YAML frontmatter metadata preserved", + "Wiki-style links ([[note]]) and #tags are indexed", + "Connect your personal knowledge base directly to your search space", + "Incremental sync - only changed files are re-indexed", + "Full support for your vault's folder structure", + ], }; return benefits[connectorType] || null; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx index 4710f9ad5..ffaeb1478 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/index.tsx @@ -7,6 +7,7 @@ import { GithubConnectForm } from "./components/github-connect-form"; import { LinkupApiConnectForm } from "./components/linkup-api-connect-form"; import { LumaConnectForm } from "./components/luma-connect-form"; import { MCPConnectForm } from "./components/mcp-connect-form"; +import { ObsidianConnectForm } from "./components/obsidian-connect-form"; import { SearxngConnectForm } from "./components/searxng-connect-form"; import { TavilyApiConnectForm } from "./components/tavily-api-connect-form"; @@ -58,6 +59,8 @@ export function getConnectFormComponent(connectorType: string): ConnectFormCompo return CirclebackConnectForm; case "MCP_CONNECTOR": return MCPConnectForm; + case "OBSIDIAN_CONNECTOR": + return ObsidianConnectForm; // Add other connector types here as needed default: return null; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/obsidian-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/obsidian-config.tsx new file mode 100644 index 000000000..f79f2953a --- /dev/null +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/obsidian-config.tsx @@ -0,0 +1,191 @@ +"use client"; + +import { FolderOpen } from "lucide-react"; +import type { FC } from "react"; +import { useEffect, useState } from "react"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Switch } from "@/components/ui/switch"; +import type { ConnectorConfigProps } from "../index"; + +export interface ObsidianConfigProps extends ConnectorConfigProps { + onNameChange?: (name: string) => void; +} + +export const ObsidianConfig: FC = ({ + connector, + onConfigChange, + onNameChange, +}) => { + const [vaultPath, setVaultPath] = useState( + (connector.config?.vault_path as string) || "" + ); + const [vaultName, setVaultName] = useState( + (connector.config?.vault_name as string) || "" + ); + const [excludeFolders, setExcludeFolders] = useState(() => { + const folders = connector.config?.exclude_folders; + if (Array.isArray(folders)) { + return folders.join(", "); + } + return (folders as string) || ".obsidian, .trash"; + }); + const [includeAttachments, setIncludeAttachments] = useState( + (connector.config?.include_attachments as boolean) || false + ); + const [name, setName] = useState(connector.name || ""); + + // Update values when connector changes + useEffect(() => { + const path = (connector.config?.vault_path as string) || ""; + const vName = (connector.config?.vault_name as string) || ""; + const folders = connector.config?.exclude_folders; + const attachments = (connector.config?.include_attachments as boolean) || false; + + setVaultPath(path); + setVaultName(vName); + setIncludeAttachments(attachments); + setName(connector.name || ""); + + if (Array.isArray(folders)) { + setExcludeFolders(folders.join(", ")); + } else if (typeof folders === "string") { + setExcludeFolders(folders); + } + }, [connector.config, connector.name]); + + const handleVaultPathChange = (value: string) => { + setVaultPath(value); + if (onConfigChange) { + onConfigChange({ + ...connector.config, + vault_path: value, + }); + } + }; + + const handleVaultNameChange = (value: string) => { + setVaultName(value); + if (onConfigChange) { + onConfigChange({ + ...connector.config, + vault_name: value, + }); + } + }; + + const handleExcludeFoldersChange = (value: string) => { + setExcludeFolders(value); + const foldersArray = value + .split(",") + .map((f) => f.trim()) + .filter(Boolean); + if (onConfigChange) { + onConfigChange({ + ...connector.config, + exclude_folders: foldersArray, + }); + } + }; + + const handleIncludeAttachmentsChange = (value: boolean) => { + setIncludeAttachments(value); + if (onConfigChange) { + onConfigChange({ + ...connector.config, + include_attachments: value, + }); + } + }; + + const handleNameChange = (value: string) => { + setName(value); + if (onNameChange) { + onNameChange(value); + } + }; + + return ( +
+ {/* Connector Name */} +
+
+ + handleNameChange(e.target.value)} + placeholder="My Obsidian Vault" + className="border-slate-400/20 focus-visible:border-slate-400/40" + /> +

+ A friendly name to identify this connector. +

+
+
+ + {/* Configuration */} +
+
+

+ + Vault Configuration +

+
+ +
+
+ + handleVaultPathChange(e.target.value)} + placeholder="/path/to/your/obsidian/vault" + className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono" + /> +

+ The absolute path to your Obsidian vault on the server. +

+
+ +
+ + handleVaultNameChange(e.target.value)} + placeholder="My Knowledge Base" + className="border-slate-400/20 focus-visible:border-slate-400/40" + /> +

+ A display name for your vault in search results. +

+
+ +
+ + handleExcludeFoldersChange(e.target.value)} + placeholder=".obsidian, .trash, templates" + className="border-slate-400/20 focus-visible:border-slate-400/40 font-mono" + /> +

+ Comma-separated list of folder names to exclude from indexing. +

+
+ +
+
+ +

+ Index attachment folders and embedded files +

+
+ +
+
+
+
+ ); +}; diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx index b493ce746..359343863 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/index.tsx @@ -15,6 +15,7 @@ import { JiraConfig } from "./components/jira-config"; import { LinkupApiConfig } from "./components/linkup-api-config"; import { LumaConfig } from "./components/luma-config"; import { MCPConfig } from "./components/mcp-config"; +import { ObsidianConfig } from "./components/obsidian-config"; import { SearxngConfig } from "./components/searxng-config"; import { SlackConfig } from "./components/slack-config"; import { TavilyApiConfig } from "./components/tavily-api-config"; @@ -73,6 +74,8 @@ export function getConnectorConfigComponent( return CirclebackConfig; case "MCP_CONNECTOR": return MCPConfig; + case "OBSIDIAN_CONNECTOR": + return ObsidianConfig; // OAuth connectors (Gmail, Calendar, Airtable, Notion) and others don't need special config UI default: return null; diff --git a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts index 1ac3549df..e145a3d55 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/constants/connector-constants.ts @@ -166,6 +166,13 @@ export const OTHER_CONNECTORS = [ description: "Connect to MCP servers for AI tools", connectorType: EnumConnectorName.MCP_CONNECTOR, }, + { + id: "obsidian-connector", + title: "Obsidian", + description: "Index your Obsidian vault (self-hosted only)", + connectorType: EnumConnectorName.OBSIDIAN_CONNECTOR, + selfHostedOnly: true, + }, ] as const; // Re-export IndexingConfigState from schemas for backward compatibility diff --git a/surfsense_web/contracts/enums/connector.ts b/surfsense_web/contracts/enums/connector.ts index 7fe170eef..cd0703277 100644 --- a/surfsense_web/contracts/enums/connector.ts +++ b/surfsense_web/contracts/enums/connector.ts @@ -23,5 +23,6 @@ export enum EnumConnectorName { WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR", YOUTUBE_CONNECTOR = "YOUTUBE_CONNECTOR", CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR", + OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR", MCP_CONNECTOR = "MCP_CONNECTOR", } diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index 9350b6a1e..f19f65162 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -66,6 +66,8 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return ; case EnumConnectorName.MCP_CONNECTOR: return MCP; + case EnumConnectorName.OBSIDIAN_CONNECTOR: + return Obsidian; // Additional cases for non-enum connector types case "YOUTUBE_CONNECTOR": return YouTube; diff --git a/surfsense_web/public/connectors/obsidian.svg b/surfsense_web/public/connectors/obsidian.svg new file mode 100644 index 000000000..9fe15c4a3 --- /dev/null +++ b/surfsense_web/public/connectors/obsidian.svg @@ -0,0 +1,12 @@ + + + + + + + + + + + +