This commit is contained in:
Manoj Aggarwal 2026-01-21 22:54:09 -08:00
parent 8a83424be5
commit 88a9a5bef2
8 changed files with 200 additions and 54 deletions

View file

@ -1,7 +1,7 @@
"""Add Obsidian connector enums
Revision ID: 74_add_obsidian_connector
Revises: 73_add_user_memories_table
Revises: 73
Create Date: 2026-01-21
"""
@ -12,7 +12,7 @@ from alembic import op
# revision identifiers, used by Alembic.
revision: str = "74_add_obsidian_connector"
down_revision: str | None = "73_add_user_memories_table"
down_revision: str | None = "73"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None

View file

@ -49,6 +49,7 @@ _ALL_CONNECTORS: list[str] = [
"BOOKSTACK_CONNECTOR",
"CRAWLED_URL",
"CIRCLEBACK",
"OBSIDIAN_CONNECTOR",
]
@ -508,6 +509,16 @@ async def search_knowledge_base_async(
)
all_documents.extend(chunks)
elif connector == "OBSIDIAN_CONNECTOR":
_, chunks = await connector_service.search_obsidian(
user_query=query,
search_space_id=search_space_id,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
all_documents.extend(chunks)
except Exception as e:
print(f"Error searching connector {connector}: {e}")
continue

View file

@ -2780,3 +2780,94 @@ class ConnectorService:
}
return result_object, circleback_docs
async def search_obsidian(
self,
user_query: str,
search_space_id: int,
top_k: int = 20,
start_date: datetime | None = None,
end_date: datetime | None = None,
) -> tuple:
"""
Search for Obsidian vault notes and return both the source information and langchain documents.
Uses combined chunk-level and document-level hybrid search with RRF fusion.
Args:
user_query: The user's query
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
start_date: Optional start date for filtering documents by updated_at
end_date: Optional end date for filtering documents by updated_at
Returns:
tuple: (sources_info, langchain_documents)
"""
obsidian_docs = await self._combined_rrf_search(
query_text=user_query,
search_space_id=search_space_id,
document_type="OBSIDIAN_CONNECTOR",
top_k=top_k,
start_date=start_date,
end_date=end_date,
)
# Early return if no results
if not obsidian_docs:
return {
"id": 53,
"name": "Obsidian Vault",
"type": "OBSIDIAN_CONNECTOR",
"sources": [],
}, []
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return doc_info.get("title", "Untitled Note")
def _url_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
# Obsidian URL format: obsidian://vault_name/path
return doc_info.get("url", "")
def _description_fn(
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> str:
description = self._chunk_preview(chunk.get("content", ""), limit=200)
info_parts = []
vault_name = metadata.get("vault_name")
tags = metadata.get("tags", [])
if vault_name:
info_parts.append(f"Vault: {vault_name}")
if tags and isinstance(tags, list) and len(tags) > 0:
info_parts.append(f"Tags: {', '.join(tags[:3])}")
if info_parts:
description = (description + " | " + " | ".join(info_parts)).strip(" |")
return description
def _extra_fields_fn(
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> dict[str, Any]:
return {
"vault_name": metadata.get("vault_name", ""),
"file_path": metadata.get("file_path", ""),
"tags": metadata.get("tags", []),
"outgoing_links": metadata.get("outgoing_links", []),
}
sources_list = self._build_chunk_sources_from_documents(
obsidian_docs,
title_fn=_title_fn,
url_fn=_url_fn,
description_fn=_description_fn,
extra_fields_fn=_extra_fields_fn,
)
# Create result object
result_object = {
"id": 53,
"name": "Obsidian Vault",
"type": "OBSIDIAN_CONNECTOR",
"sources": sources_list,
}
return result_object, obsidian_docs

View file

@ -7,7 +7,7 @@ This connector is only available in self-hosted mode.
import os
import re
from datetime import datetime, UTC
from datetime import UTC, datetime
from pathlib import Path
import yaml
@ -266,17 +266,40 @@ async def index_obsidian_vault(
{"stage": "files_discovered", "file_count": len(files)},
)
# Filter by date if provided
if start_date:
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
files = [f for f in files if f["modified_at"] >= start_dt]
# Filter by date if provided (handle "undefined" string from frontend)
# Also handle inverted dates (start > end) by skipping filtering
start_dt = None
end_dt = None
if end_date:
if start_date and start_date != "undefined":
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
if end_date and end_date != "undefined":
# Make end_date inclusive (end of day)
end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
files = [f for f in files if f["modified_at"] <= end_dt]
end_dt = end_dt.replace(hour=23, minute=59, second=59)
# Only apply date filtering if dates are valid and in correct order
if start_dt and end_dt and start_dt > end_dt:
logger.warning(
f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
)
else:
if start_dt:
files = [f for f in files if f["modified_at"] >= start_dt]
logger.info(
f"After start_date filter ({start_date}): {len(files)} files"
)
if end_dt:
files = [f for f in files if f["modified_at"] <= end_dt]
logger.info(f"After end_date filter ({end_date}): {len(files)} files")
logger.info(f"Processing {len(files)} files after date filtering")
# Get LLM for summarization
long_context_llm = await get_user_long_context_llm(session, user_id)
long_context_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
indexed_count = 0
skipped_count = 0
@ -312,9 +335,9 @@ async def index_obsidian_vault(
# Also extract tags from frontmatter
fm_tags = frontmatter.get("tags", [])
if isinstance(fm_tags, list):
tags = list(set(tags + fm_tags))
tags = list({*tags, *fm_tags})
elif isinstance(fm_tags, str):
tags = list(set(tags + [fm_tags]))
tags = list({*tags, fm_tags})
# Generate unique identifier using vault name and relative path
unique_identifier = f"{vault_name}:{relative_path}"
@ -330,7 +353,7 @@ async def index_obsidian_vault(
)
# Generate content hash
content_hash = generate_content_hash(content)
content_hash = generate_content_hash(content, search_space_id)
# Build metadata
document_metadata = {
@ -372,11 +395,19 @@ async def index_obsidian_vault(
# Generate new summary if content changed
if long_context_llm:
new_summary = await generate_document_summary(
content=document_string,
llm=long_context_llm,
new_summary, _ = await generate_document_summary(
document_string,
long_context_llm,
document_metadata,
)
existing_document.summary = new_summary
# Store summary in metadata
document_metadata["summary"] = new_summary
# Add URL and connector_id to metadata
document_metadata["url"] = (
f"obsidian://{vault_name}/{relative_path}"
)
document_metadata["connector_id"] = connector_id
existing_document.content = document_string
existing_document.content_hash = content_hash
@ -387,14 +418,10 @@ async def index_obsidian_vault(
embedding = config.embedding_model_instance.embed(document_string)
existing_document.embedding = embedding
# Update chunks
await create_document_chunks(
session=session,
document=existing_document,
content=document_string,
chunker=config.chunker_instance,
embedding_model=config.embedding_model_instance,
)
# Update chunks - delete old and create new
existing_document.chunks.clear()
new_chunks = await create_document_chunks(document_string)
existing_document.chunks = new_chunks
indexed_count += 1
@ -403,42 +430,42 @@ async def index_obsidian_vault(
logger.info(f"Indexing new note: {title}")
# Generate summary
summary = ""
summary_content = ""
if long_context_llm:
summary = await generate_document_summary(
content=document_string,
llm=long_context_llm,
summary_content, _ = await generate_document_summary(
document_string,
long_context_llm,
document_metadata,
)
# Generate embedding
embedding = config.embedding_model_instance.embed(document_string)
# Add URL and summary to metadata
document_metadata["url"] = (
f"obsidian://{vault_name}/{relative_path}"
)
document_metadata["summary"] = summary_content
document_metadata["connector_id"] = connector_id
# Create chunks
chunks = await create_document_chunks(document_string)
# Create document
new_document = Document(
search_space_id=search_space_id,
title=title,
url=f"obsidian://{vault_name}/{relative_path}",
document_type=DocumentType.OBSIDIAN_CONNECTOR,
content=document_string,
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
document_metadata=document_metadata,
summary=summary,
embedding=embedding,
connector_id=connector_id,
chunks=chunks,
updated_at=get_current_timestamp(),
)
session.add(new_document)
await session.flush()
# Create chunks
await create_document_chunks(
session=session,
document=new_document,
content=document_string,
chunker=config.chunker_instance,
embedding_model=config.embedding_model_instance,
)
indexed_count += 1

View file

@ -362,8 +362,8 @@ export const ObsidianConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitti
File System Access Required
</AlertTitle>
<AlertDescription className="text-[9px] sm:text-[10px]">
The SurfSense backend must have read access to your Obsidian vault directory. For
Docker deployments, mount your vault as a volume.
The SurfSense backend must have read access to your Obsidian vault directory.
For Docker deployments, mount your vault as a volume.
</AlertDescription>
</Alert>
@ -373,18 +373,34 @@ export const ObsidianConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitti
Step 1: Locate your vault
</h4>
<ol className="list-decimal pl-5 space-y-2 text-[10px] sm:text-xs text-muted-foreground">
<li>Open Obsidian and go to Settings About</li>
<li>Look for "Vault path" to find the location</li>
<li>Or right-click any note and select "Reveal in Finder/Explorer"</li>
<li>
<strong>macOS/Linux:</strong> Right-click any note in Obsidian "Reveal in
Finder" to see the vault folder
</li>
<li>
<strong>Windows:</strong> Right-click any note "Show in system explorer"
</li>
<li>
<strong>Or:</strong> Click the vault switcher (bottom-left icon) "Open
folder" next to your vault name
</li>
</ol>
</div>
<div>
<h4 className="text-[10px] sm:text-xs font-medium mb-2">
Step 2: Mount vault for Docker
Step 2: Enter the path
</h4>
<p className="text-[10px] sm:text-xs text-muted-foreground mb-2">
If running SurfSense in Docker, add a volume mount:
<strong>Running locally (no Docker):</strong> Use the direct path to your
vault:
</p>
<pre className="bg-slate-800 text-slate-200 p-2 rounded text-[9px] sm:text-[10px] overflow-x-auto mb-2">
{`/Users/yourname/Documents/MyObsidianVault`}
</pre>
<p className="text-[10px] sm:text-xs text-muted-foreground mb-2">
<strong>Running in Docker:</strong> Mount your vault as a volume in
docker-compose.yml:
</p>
<pre className="bg-slate-800 text-slate-200 p-2 rounded text-[9px] sm:text-[10px] overflow-x-auto">
{`volumes:

View file

@ -57,6 +57,7 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
LUMA_CONNECTOR: "luma-connect-form",
CIRCLEBACK_CONNECTOR: "circleback-connect-form",
MCP_CONNECTOR: "mcp-connect-form",
OBSIDIAN_CONNECTOR: "obsidian-connect-form",
};
const formId = formIdMap[connectorType];
if (formId) {
@ -141,12 +142,10 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
Connecting
</>
) : connectorType === "MCP_CONNECTOR" ? (
"Connect"
) : (
<>
{connectorType === "MCP_CONNECTOR"
? "Connect"
: `Connect ${getConnectorTypeDisplay(connectorType)}`}
</>
`Connect ${getConnectorTypeDisplay(connectorType)}`
)}
</Button>
</div>

View file

@ -26,6 +26,7 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record<string, string> = {
ELASTICSEARCH_CONNECTOR: "ELASTICSEARCH_CONNECTOR",
BOOKSTACK_CONNECTOR: "BOOKSTACK_CONNECTOR",
CIRCLEBACK_CONNECTOR: "CIRCLEBACK",
OBSIDIAN_CONNECTOR: "OBSIDIAN_CONNECTOR",
// Special mappings (connector type differs from document type)
GOOGLE_DRIVE_CONNECTOR: "GOOGLE_DRIVE_FILE",

View file

@ -27,6 +27,7 @@ export const searchSourceConnectorTypeEnum = z.enum([
"BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR",
"MCP_CONNECTOR",
"OBSIDIAN_CONNECTOR",
]);
export const searchSourceConnector = z.object({