diff --git a/surfsense_backend/alembic/versions/74_add_obsidian_connector.py b/surfsense_backend/alembic/versions/74_add_obsidian_connector.py index b34cd07a0..ea2b0c46e 100644 --- a/surfsense_backend/alembic/versions/74_add_obsidian_connector.py +++ b/surfsense_backend/alembic/versions/74_add_obsidian_connector.py @@ -1,7 +1,7 @@ """Add Obsidian connector enums Revision ID: 74_add_obsidian_connector -Revises: 73_add_user_memories_table +Revises: 73 Create Date: 2026-01-21 """ @@ -12,7 +12,7 @@ from alembic import op # revision identifiers, used by Alembic. revision: str = "74_add_obsidian_connector" -down_revision: str | None = "73_add_user_memories_table" +down_revision: str | None = "73" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None diff --git a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index e91d865fa..84a0fdb3c 100644 --- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -49,6 +49,7 @@ _ALL_CONNECTORS: list[str] = [ "BOOKSTACK_CONNECTOR", "CRAWLED_URL", "CIRCLEBACK", + "OBSIDIAN_CONNECTOR", ] @@ -508,6 +509,16 @@ async def search_knowledge_base_async( ) all_documents.extend(chunks) + elif connector == "OBSIDIAN_CONNECTOR": + _, chunks = await connector_service.search_obsidian( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + except Exception as e: print(f"Error searching connector {connector}: {e}") continue diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 832aee4cc..dc43697e7 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -2780,3 +2780,94 @@ class ConnectorService: } return result_object, circleback_docs + + async def search_obsidian( + self, + user_query: str, + search_space_id: int, + top_k: int = 20, + start_date: datetime | None = None, + end_date: datetime | None = None, + ) -> tuple: + """ + Search for Obsidian vault notes and return both the source information and langchain documents. + + Uses combined chunk-level and document-level hybrid search with RRF fusion. + + Args: + user_query: The user's query + search_space_id: The search space ID to search in + top_k: Maximum number of results to return + start_date: Optional start date for filtering documents by updated_at + end_date: Optional end date for filtering documents by updated_at + + Returns: + tuple: (sources_info, langchain_documents) + """ + obsidian_docs = await self._combined_rrf_search( + query_text=user_query, + search_space_id=search_space_id, + document_type="OBSIDIAN_CONNECTOR", + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) + + # Early return if no results + if not obsidian_docs: + return { + "id": 53, + "name": "Obsidian Vault", + "type": "OBSIDIAN_CONNECTOR", + "sources": [], + }, [] + + def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: + return doc_info.get("title", "Untitled Note") + + def _url_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str: + # Obsidian URL format: obsidian://vault_name/path + return doc_info.get("url", "") + + def _description_fn( + chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any] + ) -> str: + description = self._chunk_preview(chunk.get("content", ""), limit=200) + info_parts = [] + vault_name = metadata.get("vault_name") + tags = metadata.get("tags", []) + if vault_name: + info_parts.append(f"Vault: {vault_name}") + if tags and isinstance(tags, list) and len(tags) > 0: + info_parts.append(f"Tags: {', '.join(tags[:3])}") + if info_parts: + description = (description + " | " + " | ".join(info_parts)).strip(" |") + return description + + def _extra_fields_fn( + _chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any] + ) -> dict[str, Any]: + return { + "vault_name": metadata.get("vault_name", ""), + "file_path": metadata.get("file_path", ""), + "tags": metadata.get("tags", []), + "outgoing_links": metadata.get("outgoing_links", []), + } + + sources_list = self._build_chunk_sources_from_documents( + obsidian_docs, + title_fn=_title_fn, + url_fn=_url_fn, + description_fn=_description_fn, + extra_fields_fn=_extra_fields_fn, + ) + + # Create result object + result_object = { + "id": 53, + "name": "Obsidian Vault", + "type": "OBSIDIAN_CONNECTOR", + "sources": sources_list, + } + + return result_object, obsidian_docs diff --git a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py index 15c88938a..4c4dab4c2 100644 --- a/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/obsidian_indexer.py @@ -7,7 +7,7 @@ This connector is only available in self-hosted mode. import os import re -from datetime import datetime, UTC +from datetime import UTC, datetime from pathlib import Path import yaml @@ -266,17 +266,40 @@ async def index_obsidian_vault( {"stage": "files_discovered", "file_count": len(files)}, ) - # Filter by date if provided - if start_date: - start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC) - files = [f for f in files if f["modified_at"] >= start_dt] + # Filter by date if provided (handle "undefined" string from frontend) + # Also handle inverted dates (start > end) by skipping filtering + start_dt = None + end_dt = None - if end_date: + if start_date and start_date != "undefined": + start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC) + + if end_date and end_date != "undefined": + # Make end_date inclusive (end of day) end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC) - files = [f for f in files if f["modified_at"] <= end_dt] + end_dt = end_dt.replace(hour=23, minute=59, second=59) + + # Only apply date filtering if dates are valid and in correct order + if start_dt and end_dt and start_dt > end_dt: + logger.warning( + f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter" + ) + else: + if start_dt: + files = [f for f in files if f["modified_at"] >= start_dt] + logger.info( + f"After start_date filter ({start_date}): {len(files)} files" + ) + if end_dt: + files = [f for f in files if f["modified_at"] <= end_dt] + logger.info(f"After end_date filter ({end_date}): {len(files)} files") + + logger.info(f"Processing {len(files)} files after date filtering") # Get LLM for summarization - long_context_llm = await get_user_long_context_llm(session, user_id) + long_context_llm = await get_user_long_context_llm( + session, user_id, search_space_id + ) indexed_count = 0 skipped_count = 0 @@ -312,9 +335,9 @@ async def index_obsidian_vault( # Also extract tags from frontmatter fm_tags = frontmatter.get("tags", []) if isinstance(fm_tags, list): - tags = list(set(tags + fm_tags)) + tags = list({*tags, *fm_tags}) elif isinstance(fm_tags, str): - tags = list(set(tags + [fm_tags])) + tags = list({*tags, fm_tags}) # Generate unique identifier using vault name and relative path unique_identifier = f"{vault_name}:{relative_path}" @@ -330,7 +353,7 @@ async def index_obsidian_vault( ) # Generate content hash - content_hash = generate_content_hash(content) + content_hash = generate_content_hash(content, search_space_id) # Build metadata document_metadata = { @@ -372,11 +395,19 @@ async def index_obsidian_vault( # Generate new summary if content changed if long_context_llm: - new_summary = await generate_document_summary( - content=document_string, - llm=long_context_llm, + new_summary, _ = await generate_document_summary( + document_string, + long_context_llm, + document_metadata, ) - existing_document.summary = new_summary + # Store summary in metadata + document_metadata["summary"] = new_summary + + # Add URL and connector_id to metadata + document_metadata["url"] = ( + f"obsidian://{vault_name}/{relative_path}" + ) + document_metadata["connector_id"] = connector_id existing_document.content = document_string existing_document.content_hash = content_hash @@ -387,14 +418,10 @@ async def index_obsidian_vault( embedding = config.embedding_model_instance.embed(document_string) existing_document.embedding = embedding - # Update chunks - await create_document_chunks( - session=session, - document=existing_document, - content=document_string, - chunker=config.chunker_instance, - embedding_model=config.embedding_model_instance, - ) + # Update chunks - delete old and create new + existing_document.chunks.clear() + new_chunks = await create_document_chunks(document_string) + existing_document.chunks = new_chunks indexed_count += 1 @@ -403,42 +430,42 @@ async def index_obsidian_vault( logger.info(f"Indexing new note: {title}") # Generate summary - summary = "" + summary_content = "" if long_context_llm: - summary = await generate_document_summary( - content=document_string, - llm=long_context_llm, + summary_content, _ = await generate_document_summary( + document_string, + long_context_llm, + document_metadata, ) # Generate embedding embedding = config.embedding_model_instance.embed(document_string) + # Add URL and summary to metadata + document_metadata["url"] = ( + f"obsidian://{vault_name}/{relative_path}" + ) + document_metadata["summary"] = summary_content + document_metadata["connector_id"] = connector_id + + # Create chunks + chunks = await create_document_chunks(document_string) + # Create document new_document = Document( search_space_id=search_space_id, title=title, - url=f"obsidian://{vault_name}/{relative_path}", document_type=DocumentType.OBSIDIAN_CONNECTOR, content=document_string, content_hash=content_hash, unique_identifier_hash=unique_identifier_hash, document_metadata=document_metadata, - summary=summary, embedding=embedding, - connector_id=connector_id, + chunks=chunks, + updated_at=get_current_timestamp(), ) session.add(new_document) - await session.flush() - - # Create chunks - await create_document_chunks( - session=session, - document=new_document, - content=document_string, - chunker=config.chunker_instance, - embedding_model=config.embedding_model_instance, - ) indexed_count += 1 diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx index acdbc4c1f..064e10e2f 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx @@ -362,8 +362,8 @@ export const ObsidianConnectForm: FC = ({ onSubmit, isSubmitti File System Access Required - The SurfSense backend must have read access to your Obsidian vault directory. For - Docker deployments, mount your vault as a volume. + The SurfSense backend must have read access to your Obsidian vault directory. + For Docker deployments, mount your vault as a volume. @@ -373,18 +373,34 @@ export const ObsidianConnectForm: FC = ({ onSubmit, isSubmitti Step 1: Locate your vault
    -
  1. Open Obsidian and go to Settings → About
  2. -
  3. Look for "Vault path" to find the location
  4. -
  5. Or right-click any note and select "Reveal in Finder/Explorer"
  6. +
  7. + macOS/Linux: Right-click any note in Obsidian → "Reveal in + Finder" to see the vault folder +
  8. +
  9. + Windows: Right-click any note → "Show in system explorer" +
  10. +
  11. + Or: Click the vault switcher (bottom-left icon) → "Open + folder" next to your vault name +

- Step 2: Mount vault for Docker + Step 2: Enter the path

- If running SurfSense in Docker, add a volume mount: + Running locally (no Docker): Use the direct path to your + vault: +

+
+											{`/Users/yourname/Documents/MyObsidianVault`}
+										
+

+ Running in Docker: Mount your vault as a volume in + docker-compose.yml:

 											{`volumes:
diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx
index 7666b775f..49d1ebacc 100644
--- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx
+++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-connect-view.tsx
@@ -57,6 +57,7 @@ export const ConnectorConnectView: FC = ({
 			LUMA_CONNECTOR: "luma-connect-form",
 			CIRCLEBACK_CONNECTOR: "circleback-connect-form",
 			MCP_CONNECTOR: "mcp-connect-form",
+			OBSIDIAN_CONNECTOR: "obsidian-connect-form",
 		};
 		const formId = formIdMap[connectorType];
 		if (formId) {
@@ -141,12 +142,10 @@ export const ConnectorConnectView: FC = ({
 							
 							Connecting
 						
+					) : connectorType === "MCP_CONNECTOR" ? (
+						"Connect"
 					) : (
-						<>
-							{connectorType === "MCP_CONNECTOR"
-								? "Connect"
-								: `Connect ${getConnectorTypeDisplay(connectorType)}`}
-						
+						`Connect ${getConnectorTypeDisplay(connectorType)}`
 					)}
 				
 			
diff --git a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts index 433a51e8c..ab66a943a 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/utils/connector-document-mapping.ts @@ -26,6 +26,7 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record = { ELASTICSEARCH_CONNECTOR: "ELASTICSEARCH_CONNECTOR", BOOKSTACK_CONNECTOR: "BOOKSTACK_CONNECTOR", CIRCLEBACK_CONNECTOR: "CIRCLEBACK", + OBSIDIAN_CONNECTOR: "OBSIDIAN_CONNECTOR", // Special mappings (connector type differs from document type) GOOGLE_DRIVE_CONNECTOR: "GOOGLE_DRIVE_FILE", diff --git a/surfsense_web/contracts/types/connector.types.ts b/surfsense_web/contracts/types/connector.types.ts index afd20b474..0e298443e 100644 --- a/surfsense_web/contracts/types/connector.types.ts +++ b/surfsense_web/contracts/types/connector.types.ts @@ -27,6 +27,7 @@ export const searchSourceConnectorTypeEnum = z.enum([ "BOOKSTACK_CONNECTOR", "CIRCLEBACK_CONNECTOR", "MCP_CONNECTOR", + "OBSIDIAN_CONNECTOR", ]); export const searchSourceConnector = z.object({