mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
format
This commit is contained in:
parent
8a83424be5
commit
88a9a5bef2
8 changed files with 200 additions and 54 deletions
|
|
@ -1,7 +1,7 @@
|
|||
"""Add Obsidian connector enums
|
||||
|
||||
Revision ID: 74_add_obsidian_connector
|
||||
Revises: 73_add_user_memories_table
|
||||
Revises: 73
|
||||
Create Date: 2026-01-21
|
||||
|
||||
"""
|
||||
|
|
@ -12,7 +12,7 @@ from alembic import op
|
|||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "74_add_obsidian_connector"
|
||||
down_revision: str | None = "73_add_user_memories_table"
|
||||
down_revision: str | None = "73"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ _ALL_CONNECTORS: list[str] = [
|
|||
"BOOKSTACK_CONNECTOR",
|
||||
"CRAWLED_URL",
|
||||
"CIRCLEBACK",
|
||||
"OBSIDIAN_CONNECTOR",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -508,6 +509,16 @@ async def search_knowledge_base_async(
|
|||
)
|
||||
all_documents.extend(chunks)
|
||||
|
||||
elif connector == "OBSIDIAN_CONNECTOR":
|
||||
_, chunks = await connector_service.search_obsidian(
|
||||
user_query=query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
)
|
||||
all_documents.extend(chunks)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching connector {connector}: {e}")
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -2780,3 +2780,94 @@ class ConnectorService:
|
|||
}
|
||||
|
||||
return result_object, circleback_docs
|
||||
|
||||
async def search_obsidian(
|
||||
self,
|
||||
user_query: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Obsidian vault notes and return both the source information and langchain documents.
|
||||
|
||||
Uses combined chunk-level and document-level hybrid search with RRF fusion.
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
obsidian_docs = await self._combined_rrf_search(
|
||||
query_text=user_query,
|
||||
search_space_id=search_space_id,
|
||||
document_type="OBSIDIAN_CONNECTOR",
|
||||
top_k=top_k,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Early return if no results
|
||||
if not obsidian_docs:
|
||||
return {
|
||||
"id": 53,
|
||||
"name": "Obsidian Vault",
|
||||
"type": "OBSIDIAN_CONNECTOR",
|
||||
"sources": [],
|
||||
}, []
|
||||
|
||||
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||
return doc_info.get("title", "Untitled Note")
|
||||
|
||||
def _url_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||
# Obsidian URL format: obsidian://vault_name/path
|
||||
return doc_info.get("url", "")
|
||||
|
||||
def _description_fn(
|
||||
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
|
||||
) -> str:
|
||||
description = self._chunk_preview(chunk.get("content", ""), limit=200)
|
||||
info_parts = []
|
||||
vault_name = metadata.get("vault_name")
|
||||
tags = metadata.get("tags", [])
|
||||
if vault_name:
|
||||
info_parts.append(f"Vault: {vault_name}")
|
||||
if tags and isinstance(tags, list) and len(tags) > 0:
|
||||
info_parts.append(f"Tags: {', '.join(tags[:3])}")
|
||||
if info_parts:
|
||||
description = (description + " | " + " | ".join(info_parts)).strip(" |")
|
||||
return description
|
||||
|
||||
def _extra_fields_fn(
|
||||
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"vault_name": metadata.get("vault_name", ""),
|
||||
"file_path": metadata.get("file_path", ""),
|
||||
"tags": metadata.get("tags", []),
|
||||
"outgoing_links": metadata.get("outgoing_links", []),
|
||||
}
|
||||
|
||||
sources_list = self._build_chunk_sources_from_documents(
|
||||
obsidian_docs,
|
||||
title_fn=_title_fn,
|
||||
url_fn=_url_fn,
|
||||
description_fn=_description_fn,
|
||||
extra_fields_fn=_extra_fields_fn,
|
||||
)
|
||||
|
||||
# Create result object
|
||||
result_object = {
|
||||
"id": 53,
|
||||
"name": "Obsidian Vault",
|
||||
"type": "OBSIDIAN_CONNECTOR",
|
||||
"sources": sources_list,
|
||||
}
|
||||
|
||||
return result_object, obsidian_docs
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ This connector is only available in self-hosted mode.
|
|||
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, UTC
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
|
@ -266,17 +266,40 @@ async def index_obsidian_vault(
|
|||
{"stage": "files_discovered", "file_count": len(files)},
|
||||
)
|
||||
|
||||
# Filter by date if provided
|
||||
if start_date:
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
files = [f for f in files if f["modified_at"] >= start_dt]
|
||||
# Filter by date if provided (handle "undefined" string from frontend)
|
||||
# Also handle inverted dates (start > end) by skipping filtering
|
||||
start_dt = None
|
||||
end_dt = None
|
||||
|
||||
if end_date:
|
||||
if start_date and start_date != "undefined":
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
|
||||
if end_date and end_date != "undefined":
|
||||
# Make end_date inclusive (end of day)
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
files = [f for f in files if f["modified_at"] <= end_dt]
|
||||
end_dt = end_dt.replace(hour=23, minute=59, second=59)
|
||||
|
||||
# Only apply date filtering if dates are valid and in correct order
|
||||
if start_dt and end_dt and start_dt > end_dt:
|
||||
logger.warning(
|
||||
f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
|
||||
)
|
||||
else:
|
||||
if start_dt:
|
||||
files = [f for f in files if f["modified_at"] >= start_dt]
|
||||
logger.info(
|
||||
f"After start_date filter ({start_date}): {len(files)} files"
|
||||
)
|
||||
if end_dt:
|
||||
files = [f for f in files if f["modified_at"] <= end_dt]
|
||||
logger.info(f"After end_date filter ({end_date}): {len(files)} files")
|
||||
|
||||
logger.info(f"Processing {len(files)} files after date filtering")
|
||||
|
||||
# Get LLM for summarization
|
||||
long_context_llm = await get_user_long_context_llm(session, user_id)
|
||||
long_context_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
indexed_count = 0
|
||||
skipped_count = 0
|
||||
|
|
@ -312,9 +335,9 @@ async def index_obsidian_vault(
|
|||
# Also extract tags from frontmatter
|
||||
fm_tags = frontmatter.get("tags", [])
|
||||
if isinstance(fm_tags, list):
|
||||
tags = list(set(tags + fm_tags))
|
||||
tags = list({*tags, *fm_tags})
|
||||
elif isinstance(fm_tags, str):
|
||||
tags = list(set(tags + [fm_tags]))
|
||||
tags = list({*tags, fm_tags})
|
||||
|
||||
# Generate unique identifier using vault name and relative path
|
||||
unique_identifier = f"{vault_name}:{relative_path}"
|
||||
|
|
@ -330,7 +353,7 @@ async def index_obsidian_vault(
|
|||
)
|
||||
|
||||
# Generate content hash
|
||||
content_hash = generate_content_hash(content)
|
||||
content_hash = generate_content_hash(content, search_space_id)
|
||||
|
||||
# Build metadata
|
||||
document_metadata = {
|
||||
|
|
@ -372,11 +395,19 @@ async def index_obsidian_vault(
|
|||
|
||||
# Generate new summary if content changed
|
||||
if long_context_llm:
|
||||
new_summary = await generate_document_summary(
|
||||
content=document_string,
|
||||
llm=long_context_llm,
|
||||
new_summary, _ = await generate_document_summary(
|
||||
document_string,
|
||||
long_context_llm,
|
||||
document_metadata,
|
||||
)
|
||||
existing_document.summary = new_summary
|
||||
# Store summary in metadata
|
||||
document_metadata["summary"] = new_summary
|
||||
|
||||
# Add URL and connector_id to metadata
|
||||
document_metadata["url"] = (
|
||||
f"obsidian://{vault_name}/{relative_path}"
|
||||
)
|
||||
document_metadata["connector_id"] = connector_id
|
||||
|
||||
existing_document.content = document_string
|
||||
existing_document.content_hash = content_hash
|
||||
|
|
@ -387,14 +418,10 @@ async def index_obsidian_vault(
|
|||
embedding = config.embedding_model_instance.embed(document_string)
|
||||
existing_document.embedding = embedding
|
||||
|
||||
# Update chunks
|
||||
await create_document_chunks(
|
||||
session=session,
|
||||
document=existing_document,
|
||||
content=document_string,
|
||||
chunker=config.chunker_instance,
|
||||
embedding_model=config.embedding_model_instance,
|
||||
)
|
||||
# Update chunks - delete old and create new
|
||||
existing_document.chunks.clear()
|
||||
new_chunks = await create_document_chunks(document_string)
|
||||
existing_document.chunks = new_chunks
|
||||
|
||||
indexed_count += 1
|
||||
|
||||
|
|
@ -403,42 +430,42 @@ async def index_obsidian_vault(
|
|||
logger.info(f"Indexing new note: {title}")
|
||||
|
||||
# Generate summary
|
||||
summary = ""
|
||||
summary_content = ""
|
||||
if long_context_llm:
|
||||
summary = await generate_document_summary(
|
||||
content=document_string,
|
||||
llm=long_context_llm,
|
||||
summary_content, _ = await generate_document_summary(
|
||||
document_string,
|
||||
long_context_llm,
|
||||
document_metadata,
|
||||
)
|
||||
|
||||
# Generate embedding
|
||||
embedding = config.embedding_model_instance.embed(document_string)
|
||||
|
||||
# Add URL and summary to metadata
|
||||
document_metadata["url"] = (
|
||||
f"obsidian://{vault_name}/{relative_path}"
|
||||
)
|
||||
document_metadata["summary"] = summary_content
|
||||
document_metadata["connector_id"] = connector_id
|
||||
|
||||
# Create chunks
|
||||
chunks = await create_document_chunks(document_string)
|
||||
|
||||
# Create document
|
||||
new_document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=title,
|
||||
url=f"obsidian://{vault_name}/{relative_path}",
|
||||
document_type=DocumentType.OBSIDIAN_CONNECTOR,
|
||||
content=document_string,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
document_metadata=document_metadata,
|
||||
summary=summary,
|
||||
embedding=embedding,
|
||||
connector_id=connector_id,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(new_document)
|
||||
await session.flush()
|
||||
|
||||
# Create chunks
|
||||
await create_document_chunks(
|
||||
session=session,
|
||||
document=new_document,
|
||||
content=document_string,
|
||||
chunker=config.chunker_instance,
|
||||
embedding_model=config.embedding_model_instance,
|
||||
)
|
||||
|
||||
indexed_count += 1
|
||||
|
||||
|
|
|
|||
|
|
@ -362,8 +362,8 @@ export const ObsidianConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitti
|
|||
File System Access Required
|
||||
</AlertTitle>
|
||||
<AlertDescription className="text-[9px] sm:text-[10px]">
|
||||
The SurfSense backend must have read access to your Obsidian vault directory. For
|
||||
Docker deployments, mount your vault as a volume.
|
||||
The SurfSense backend must have read access to your Obsidian vault directory.
|
||||
For Docker deployments, mount your vault as a volume.
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
|
||||
|
|
@ -373,18 +373,34 @@ export const ObsidianConnectForm: FC<ConnectFormProps> = ({ onSubmit, isSubmitti
|
|||
Step 1: Locate your vault
|
||||
</h4>
|
||||
<ol className="list-decimal pl-5 space-y-2 text-[10px] sm:text-xs text-muted-foreground">
|
||||
<li>Open Obsidian and go to Settings → About</li>
|
||||
<li>Look for "Vault path" to find the location</li>
|
||||
<li>Or right-click any note and select "Reveal in Finder/Explorer"</li>
|
||||
<li>
|
||||
<strong>macOS/Linux:</strong> Right-click any note in Obsidian → "Reveal in
|
||||
Finder" to see the vault folder
|
||||
</li>
|
||||
<li>
|
||||
<strong>Windows:</strong> Right-click any note → "Show in system explorer"
|
||||
</li>
|
||||
<li>
|
||||
<strong>Or:</strong> Click the vault switcher (bottom-left icon) → "Open
|
||||
folder" next to your vault name
|
||||
</li>
|
||||
</ol>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h4 className="text-[10px] sm:text-xs font-medium mb-2">
|
||||
Step 2: Mount vault for Docker
|
||||
Step 2: Enter the path
|
||||
</h4>
|
||||
<p className="text-[10px] sm:text-xs text-muted-foreground mb-2">
|
||||
If running SurfSense in Docker, add a volume mount:
|
||||
<strong>Running locally (no Docker):</strong> Use the direct path to your
|
||||
vault:
|
||||
</p>
|
||||
<pre className="bg-slate-800 text-slate-200 p-2 rounded text-[9px] sm:text-[10px] overflow-x-auto mb-2">
|
||||
{`/Users/yourname/Documents/MyObsidianVault`}
|
||||
</pre>
|
||||
<p className="text-[10px] sm:text-xs text-muted-foreground mb-2">
|
||||
<strong>Running in Docker:</strong> Mount your vault as a volume in
|
||||
docker-compose.yml:
|
||||
</p>
|
||||
<pre className="bg-slate-800 text-slate-200 p-2 rounded text-[9px] sm:text-[10px] overflow-x-auto">
|
||||
{`volumes:
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
|
|||
LUMA_CONNECTOR: "luma-connect-form",
|
||||
CIRCLEBACK_CONNECTOR: "circleback-connect-form",
|
||||
MCP_CONNECTOR: "mcp-connect-form",
|
||||
OBSIDIAN_CONNECTOR: "obsidian-connect-form",
|
||||
};
|
||||
const formId = formIdMap[connectorType];
|
||||
if (formId) {
|
||||
|
|
@ -141,12 +142,10 @@ export const ConnectorConnectView: FC<ConnectorConnectViewProps> = ({
|
|||
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||
Connecting
|
||||
</>
|
||||
) : connectorType === "MCP_CONNECTOR" ? (
|
||||
"Connect"
|
||||
) : (
|
||||
<>
|
||||
{connectorType === "MCP_CONNECTOR"
|
||||
? "Connect"
|
||||
: `Connect ${getConnectorTypeDisplay(connectorType)}`}
|
||||
</>
|
||||
`Connect ${getConnectorTypeDisplay(connectorType)}`
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ export const CONNECTOR_TO_DOCUMENT_TYPE: Record<string, string> = {
|
|||
ELASTICSEARCH_CONNECTOR: "ELASTICSEARCH_CONNECTOR",
|
||||
BOOKSTACK_CONNECTOR: "BOOKSTACK_CONNECTOR",
|
||||
CIRCLEBACK_CONNECTOR: "CIRCLEBACK",
|
||||
OBSIDIAN_CONNECTOR: "OBSIDIAN_CONNECTOR",
|
||||
|
||||
// Special mappings (connector type differs from document type)
|
||||
GOOGLE_DRIVE_CONNECTOR: "GOOGLE_DRIVE_FILE",
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export const searchSourceConnectorTypeEnum = z.enum([
|
|||
"BOOKSTACK_CONNECTOR",
|
||||
"CIRCLEBACK_CONNECTOR",
|
||||
"MCP_CONNECTOR",
|
||||
"OBSIDIAN_CONNECTOR",
|
||||
]);
|
||||
|
||||
export const searchSourceConnector = z.object({
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue