Merge remote-tracking branch 'upstream/dev' into feat/ui

This commit is contained in:
Anish Sarkar 2026-01-25 16:19:18 +05:30
commit c7c9eb3eb2
13 changed files with 825 additions and 97 deletions

View file

@ -29,8 +29,7 @@ SurfSense is a highly customizable AI research agent, connected to external sour
# Video # Video
https://github.com/user-attachments/assets/42a29ea1-d4d8-4213-9c69-972b5b806d58 https://github.com/user-attachments/assets/cc0c84d3-1f2f-4f7a-b519-2ecce22310b1
## Podcast Sample ## Podcast Sample
@ -52,8 +51,10 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
- Interact in Natural Language and get cited answers. - Interact in Natural Language and get cited answers.
### 📄 **Cited Answers** ### 📄 **Cited Answers**
- Get Cited answers just like Perplexity. - Get Cited answers just like Perplexity.
### 🧩 **Universal Compatibility**
- Connect virtually any inference provider via the OpenAI spec and LiteLLM.
### 🔔 **Privacy & Local LLM Support** ### 🔔 **Privacy & Local LLM Support**
- Works Flawlessly with Ollama local LLMs. - Works Flawlessly with local LLMs like vLLM and Ollama.
### 🏠 **Self Hostable** ### 🏠 **Self Hostable**
- Open source and easy to deploy locally. - Open source and easy to deploy locally.
### 👥 **Team Collaboration with RBAC** ### 👥 **Team Collaboration with RBAC**
@ -61,6 +62,7 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
- Invite team members with customizable roles (Owner, Admin, Editor, Viewer) - Invite team members with customizable roles (Owner, Admin, Editor, Viewer)
- Granular permissions for documents, chats, connectors, and settings - Granular permissions for documents, chats, connectors, and settings
- Share knowledge bases securely within your organization - Share knowledge bases securely within your organization
- Team chats update in real-time and "Chat about the chat" in comment threads
### 🎙️ Podcasts ### 🎙️ Podcasts
- Blazingly fast podcast generation agent. (Creates a 3-minute podcast in under 20 seconds.) - Blazingly fast podcast generation agent. (Creates a 3-minute podcast in under 20 seconds.)
- Convert your chat conversations into engaging audio content - Convert your chat conversations into engaging audio content
@ -237,6 +239,8 @@ Before self-hosting installation, make sure to complete the [prerequisite setup
### **BackEnd** ### **BackEnd**
- **LiteLLM**: Universal LLM integration supporting 100+ models (OpenAI, Anthropic, Ollama, etc.)
- **FastAPI**: Modern, fast web framework for building APIs with Python - **FastAPI**: Modern, fast web framework for building APIs with Python
- **PostgreSQL with pgvector**: Database with vector search capabilities for similarity searches - **PostgreSQL with pgvector**: Database with vector search capabilities for similarity searches
@ -253,8 +257,6 @@ Before self-hosting installation, make sure to complete the [prerequisite setup
- **LangChain**: Framework for developing AI-powered applications. - **LangChain**: Framework for developing AI-powered applications.
- **LiteLLM**: Universal LLM integration supporting 100+ models (OpenAI, Anthropic, Ollama, etc.)
- **Rerankers**: Advanced result ranking for improved search relevance - **Rerankers**: Advanced result ranking for improved search relevance
- **Hybrid Search**: Combines vector similarity and full-text search for optimal results using Reciprocal Rank Fusion (RRF) - **Hybrid Search**: Combines vector similarity and full-text search for optimal results using Reciprocal Rank Fusion (RRF)

View file

@ -7,6 +7,7 @@ via NewLLMConfig.
""" """
from collections.abc import Sequence from collections.abc import Sequence
from typing import Any
from deepagents import create_deep_agent from deepagents import create_deep_agent
from langchain_core.tools import BaseTool from langchain_core.tools import BaseTool
@ -23,6 +24,90 @@ from app.agents.new_chat.system_prompt import (
from app.agents.new_chat.tools.registry import build_tools_async from app.agents.new_chat.tools.registry import build_tools_async
from app.services.connector_service import ConnectorService from app.services.connector_service import ConnectorService
# =============================================================================
# Connector Type Mapping
# =============================================================================
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
# used by the knowledge_base tool. Some connectors map to different document types.
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
# Direct mappings (connector type == searchable type)
"TAVILY_API": "TAVILY_API",
"SEARXNG_API": "SEARXNG_API",
"LINKUP_API": "LINKUP_API",
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
# Composio connectors
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
"COMPOSIO_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
}
# Document types that don't come from SearchSourceConnector but should always be searchable
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
"EXTENSION", # Browser extension data
"FILE", # Uploaded files
"NOTE", # User notes
"YOUTUBE_VIDEO", # YouTube videos
]
def _map_connectors_to_searchable_types(
connector_types: list[Any],
) -> list[str]:
"""
Map SearchSourceConnectorType enums to searchable document/connector types.
This function:
1. Converts connector type enums to their searchable counterparts
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
3. Deduplicates while preserving order
Args:
connector_types: List of SearchSourceConnectorType enum values
Returns:
List of searchable connector/document type strings
"""
result_set: set[str] = set()
result_list: list[str] = []
# Add always-available document types first
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
if doc_type not in result_set:
result_set.add(doc_type)
result_list.append(doc_type)
# Map each connector type to its searchable equivalent
for ct in connector_types:
# Handle both enum and string types
ct_str = ct.value if hasattr(ct, "value") else str(ct)
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
if searchable and searchable not in result_set:
result_set.add(searchable)
result_list.append(searchable)
return result_list
# ============================================================================= # =============================================================================
# Deep Agent Factory # Deep Agent Factory
# ============================================================================= # =============================================================================
@ -116,6 +201,30 @@ async def create_surfsense_deep_agent(
additional_tools=[my_custom_tool] additional_tools=[my_custom_tool]
) )
""" """
# Discover available connectors and document types for this search space
# This enables dynamic tool docstrings that inform the LLM about what's actually available
available_connectors: list[str] | None = None
available_document_types: list[str] | None = None
try:
# Get enabled search source connectors for this search space
connector_types = await connector_service.get_available_connectors(
search_space_id
)
if connector_types:
# Convert enum values to strings and also include mapped document types
available_connectors = _map_connectors_to_searchable_types(connector_types)
# Get document types that have at least one document indexed
available_document_types = await connector_service.get_available_document_types(
search_space_id
)
except Exception as e:
# Log but don't fail - fall back to all connectors if discovery fails
import logging
logging.warning(f"Failed to discover available connectors/document types: {e}")
# Build dependencies dict for the tools registry # Build dependencies dict for the tools registry
dependencies = { dependencies = {
"search_space_id": search_space_id, "search_space_id": search_space_id,
@ -123,6 +232,9 @@ async def create_surfsense_deep_agent(
"connector_service": connector_service, "connector_service": connector_service,
"firecrawl_api_key": firecrawl_api_key, "firecrawl_api_key": firecrawl_api_key,
"user_id": user_id, # Required for memory tools "user_id": user_id, # Required for memory tools
# Dynamic connector/document type discovery for knowledge base tool
"available_connectors": available_connectors,
"available_document_types": available_document_types,
} }
# Build tools using the async registry (includes MCP tools) # Build tools using the async registry (includes MCP tools)

View file

@ -19,6 +19,7 @@ Available tools:
# Tool factory exports (for direct use) # Tool factory exports (for direct use)
from .display_image import create_display_image_tool from .display_image import create_display_image_tool
from .knowledge_base import ( from .knowledge_base import (
CONNECTOR_DESCRIPTIONS,
create_search_knowledge_base_tool, create_search_knowledge_base_tool,
format_documents_for_context, format_documents_for_context,
search_knowledge_base_async, search_knowledge_base_async,
@ -40,6 +41,8 @@ from .user_memory import create_recall_memory_tool, create_save_memory_tool
__all__ = [ __all__ = [
# Registry # Registry
"BUILTIN_TOOLS", "BUILTIN_TOOLS",
# Knowledge base utilities
"CONNECTOR_DESCRIPTIONS",
"ToolDefinition", "ToolDefinition",
"build_tools", "build_tools",
# Tool factories # Tool factories
@ -51,7 +54,6 @@ __all__ = [
"create_scrape_webpage_tool", "create_scrape_webpage_tool",
"create_search_knowledge_base_tool", "create_search_knowledge_base_tool",
"create_search_surfsense_docs_tool", "create_search_surfsense_docs_tool",
# Knowledge base utilities
"format_documents_for_context", "format_documents_for_context",
"get_all_tool_names", "get_all_tool_names",
"get_default_enabled_tools", "get_default_enabled_tools",

View file

@ -12,7 +12,8 @@ import json
from datetime import datetime from datetime import datetime
from typing import Any from typing import Any
from langchain_core.tools import tool from langchain_core.tools import StructuredTool
from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.services.connector_service import ConnectorService from app.services.connector_service import ConnectorService
@ -22,6 +23,7 @@ from app.services.connector_service import ConnectorService
# ============================================================================= # =============================================================================
# Canonical connector values used internally by ConnectorService # Canonical connector values used internally by ConnectorService
# Includes all document types and search source connectors
_ALL_CONNECTORS: list[str] = [ _ALL_CONNECTORS: list[str] = [
"EXTENSION", "EXTENSION",
"FILE", "FILE",
@ -50,41 +52,117 @@ _ALL_CONNECTORS: list[str] = [
"CRAWLED_URL", "CRAWLED_URL",
"CIRCLEBACK", "CIRCLEBACK",
"OBSIDIAN_CONNECTOR", "OBSIDIAN_CONNECTOR",
# Composio connectors
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
"COMPOSIO_GMAIL_CONNECTOR",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
] ]
# Human-readable descriptions for each connector type
# Used for generating dynamic docstrings and informing the LLM
CONNECTOR_DESCRIPTIONS: dict[str, str] = {
"EXTENSION": "Web content saved via SurfSense browser extension (personal browsing history)",
"FILE": "User-uploaded documents (PDFs, Word, etc.) (personal files)",
"NOTE": "SurfSense Notes (notes created inside SurfSense)",
"SLACK_CONNECTOR": "Slack conversations and shared content (personal workspace communications)",
"TEAMS_CONNECTOR": "Microsoft Teams messages and conversations (personal Teams communications)",
"NOTION_CONNECTOR": "Notion workspace pages and databases (personal knowledge management)",
"YOUTUBE_VIDEO": "YouTube video transcripts and metadata (personally saved videos)",
"GITHUB_CONNECTOR": "GitHub repository content and issues (personal repositories and interactions)",
"ELASTICSEARCH_CONNECTOR": "Elasticsearch indexed documents and data (personal Elasticsearch instances)",
"LINEAR_CONNECTOR": "Linear project issues and discussions (personal project management)",
"JIRA_CONNECTOR": "Jira project issues, tickets, and comments (personal project tracking)",
"CONFLUENCE_CONNECTOR": "Confluence pages and comments (personal project documentation)",
"CLICKUP_CONNECTOR": "ClickUp tasks and project data (personal task management)",
"GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events, meetings, and schedules (personal calendar)",
"GOOGLE_GMAIL_CONNECTOR": "Google Gmail emails and conversations (personal emails)",
"GOOGLE_DRIVE_FILE": "Google Drive files and documents (personal cloud storage)",
"DISCORD_CONNECTOR": "Discord server conversations and shared content (personal community)",
"AIRTABLE_CONNECTOR": "Airtable records, tables, and database content (personal data)",
"TAVILY_API": "Tavily web search API results (real-time web search)",
"SEARXNG_API": "SearxNG search API results (privacy-focused web search)",
"LINKUP_API": "Linkup search API results (web search)",
"BAIDU_SEARCH_API": "Baidu search API results (Chinese web search)",
"LUMA_CONNECTOR": "Luma events and meetings",
"WEBCRAWLER_CONNECTOR": "Webpages indexed by SurfSense (personally selected websites)",
"CRAWLED_URL": "Webpages indexed by SurfSense (personally selected websites)",
"BOOKSTACK_CONNECTOR": "BookStack pages (personal documentation)",
"CIRCLEBACK": "Circleback meeting notes, transcripts, and action items",
"OBSIDIAN_CONNECTOR": "Obsidian vault notes and markdown files (personal notes)",
# Composio connectors
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "Google Drive files via Composio (personal cloud storage)",
"COMPOSIO_GMAIL_CONNECTOR": "Gmail emails via Composio (personal emails)",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "Google Calendar events via Composio (personal calendar)",
}
def _normalize_connectors(connectors_to_search: list[str] | None) -> list[str]:
def _normalize_connectors(
connectors_to_search: list[str] | None,
available_connectors: list[str] | None = None,
) -> list[str]:
""" """
Normalize connectors provided by the model. Normalize connectors provided by the model.
- Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical - Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical
ConnectorService types. ConnectorService types.
- Drops unknown values. - Drops unknown values.
- If None/empty, defaults to searching across all known connectors. - If available_connectors is provided, only includes connectors from that list.
- If connectors_to_search is None/empty, defaults to available_connectors or all.
Args:
connectors_to_search: List of connectors requested by the model
available_connectors: List of connectors actually available in the search space
Returns:
List of normalized connector strings to search
""" """
# Determine the set of valid connectors to consider
valid_set = (
set(available_connectors) if available_connectors else set(_ALL_CONNECTORS)
)
if not connectors_to_search: if not connectors_to_search:
return list(_ALL_CONNECTORS) # Search all available connectors if none specified
return (
list(available_connectors)
if available_connectors
else list(_ALL_CONNECTORS)
)
normalized: list[str] = [] normalized: list[str] = []
for raw in connectors_to_search: for raw in connectors_to_search:
c = (raw or "").strip().upper() c = (raw or "").strip().upper()
if not c: if not c:
continue continue
# Map user-facing aliases to canonical names
if c == "WEBCRAWLER_CONNECTOR": if c == "WEBCRAWLER_CONNECTOR":
c = "CRAWLED_URL" c = "CRAWLED_URL"
normalized.append(c) normalized.append(c)
# de-dupe while preserving order + filter unknown # de-dupe while preserving order + filter to valid connectors
seen: set[str] = set() seen: set[str] = set()
out: list[str] = [] out: list[str] = []
for c in normalized: for c in normalized:
if c in seen: if c in seen:
continue continue
# Only include if it's a known connector AND available
if c not in _ALL_CONNECTORS: if c not in _ALL_CONNECTORS:
continue continue
if c not in valid_set:
continue
seen.add(c) seen.add(c)
out.append(c) out.append(c)
return out if out else list(_ALL_CONNECTORS)
# Fallback to all available if nothing matched
return (
out
if out
else (
list(available_connectors)
if available_connectors
else list(_ALL_CONNECTORS)
)
)
# ============================================================================= # =============================================================================
@ -233,6 +311,7 @@ async def search_knowledge_base_async(
top_k: int = 10, top_k: int = 10,
start_date: datetime | None = None, start_date: datetime | None = None,
end_date: datetime | None = None, end_date: datetime | None = None,
available_connectors: list[str] | None = None,
) -> str: ) -> str:
""" """
Search the user's knowledge base for relevant documents. Search the user's knowledge base for relevant documents.
@ -248,6 +327,8 @@ async def search_knowledge_base_async(
top_k: Number of results per connector top_k: Number of results per connector
start_date: Optional start datetime (UTC) for filtering documents start_date: Optional start datetime (UTC) for filtering documents
end_date: Optional end datetime (UTC) for filtering documents end_date: Optional end datetime (UTC) for filtering documents
available_connectors: Optional list of connectors actually available in the search space.
If provided, only these connectors will be searched.
Returns: Returns:
Formatted string with search results Formatted string with search results
@ -262,7 +343,7 @@ async def search_knowledge_base_async(
end_date=end_date, end_date=end_date,
) )
connectors = _normalize_connectors(connectors_to_search) connectors = _normalize_connectors(connectors_to_search, available_connectors)
for connector in connectors: for connector in connectors:
try: try:
@ -316,6 +397,16 @@ async def search_knowledge_base_async(
) )
all_documents.extend(chunks) all_documents.extend(chunks)
elif connector == "TEAMS_CONNECTOR":
_, chunks = await connector_service.search_teams(
user_query=query,
search_space_id=search_space_id,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
all_documents.extend(chunks)
elif connector == "NOTION_CONNECTOR": elif connector == "NOTION_CONNECTOR":
_, chunks = await connector_service.search_notion( _, chunks = await connector_service.search_notion(
user_query=query, user_query=query,
@ -519,6 +610,39 @@ async def search_knowledge_base_async(
) )
all_documents.extend(chunks) all_documents.extend(chunks)
# =========================================================
# Composio Connectors
# =========================================================
elif connector == "COMPOSIO_GOOGLE_DRIVE_CONNECTOR":
_, chunks = await connector_service.search_composio_google_drive(
user_query=query,
search_space_id=search_space_id,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
all_documents.extend(chunks)
elif connector == "COMPOSIO_GMAIL_CONNECTOR":
_, chunks = await connector_service.search_composio_gmail(
user_query=query,
search_space_id=search_space_id,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
all_documents.extend(chunks)
elif connector == "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR":
_, chunks = await connector_service.search_composio_google_calendar(
user_query=query,
search_space_id=search_space_id,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
all_documents.extend(chunks)
except Exception as e: except Exception as e:
print(f"Error searching connector {connector}: {e}") print(f"Error searching connector {connector}: {e}")
continue continue
@ -543,11 +667,68 @@ async def search_knowledge_base_async(
return format_documents_for_context(deduplicated) return format_documents_for_context(deduplicated)
def _build_connector_docstring(available_connectors: list[str] | None) -> str:
"""
Build the connector documentation section for the tool docstring.
Args:
available_connectors: List of available connector types, or None for all
Returns:
Formatted docstring section listing available connectors
"""
connectors = available_connectors if available_connectors else list(_ALL_CONNECTORS)
lines = []
for connector in connectors:
# Skip internal names, prefer user-facing aliases
if connector == "CRAWLED_URL":
# Show as WEBCRAWLER_CONNECTOR for user-facing docs
description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
lines.append(f"- WEBCRAWLER_CONNECTOR: {description}")
else:
description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
lines.append(f"- {connector}: {description}")
return "\n".join(lines)
# =============================================================================
# Tool Input Schema
# =============================================================================
class SearchKnowledgeBaseInput(BaseModel):
"""Input schema for the search_knowledge_base tool."""
query: str = Field(
description="The search query - be specific and include key terms"
)
top_k: int = Field(
default=10,
description="Number of results to retrieve (default: 10)",
)
start_date: str | None = Field(
default=None,
description="Optional ISO date/datetime (e.g. '2025-12-12' or '2025-12-12T00:00:00+00:00')",
)
end_date: str | None = Field(
default=None,
description="Optional ISO date/datetime (e.g. '2025-12-19' or '2025-12-19T23:59:59+00:00')",
)
connectors_to_search: list[str] | None = Field(
default=None,
description="Optional list of connector enums to search. If omitted, searches all available.",
)
def create_search_knowledge_base_tool( def create_search_knowledge_base_tool(
search_space_id: int, search_space_id: int,
db_session: AsyncSession, db_session: AsyncSession,
connector_service: ConnectorService, connector_service: ConnectorService,
): available_connectors: list[str] | None = None,
available_document_types: list[str] | None = None,
) -> StructuredTool:
""" """
Factory function to create the search_knowledge_base tool with injected dependencies. Factory function to create the search_knowledge_base tool with injected dependencies.
@ -555,72 +736,57 @@ def create_search_knowledge_base_tool(
search_space_id: The user's search space ID search_space_id: The user's search space ID
db_session: Database session db_session: Database session
connector_service: Initialized connector service connector_service: Initialized connector service
available_connectors: Optional list of connector types available in the search space.
Used to dynamically generate the tool docstring.
available_document_types: Optional list of document types that have data in the search space.
Used to inform the LLM about what data exists.
Returns: Returns:
A configured tool function A configured StructuredTool instance
""" """
# Build connector documentation dynamically
connector_docs = _build_connector_docstring(available_connectors)
@tool # Build context about available document types
async def search_knowledge_base( doc_types_info = ""
if available_document_types:
doc_types_info = f"""
## Document types with indexed content in this search space
The following document types have content available for search:
{", ".join(available_document_types)}
Focus searches on these types for best results."""
# Build the dynamic description for the tool
# This is what the LLM sees when deciding whether/how to use the tool
dynamic_description = f"""Search the user's personal knowledge base for relevant information.
Use this tool to find documents, notes, files, web pages, and other content that may help answer the user's question.
IMPORTANT:
- If the user requests a specific source type (e.g. "my notes", "Slack messages"), pass `connectors_to_search=[...]` using the enums below.
- If `connectors_to_search` is omitted/empty, the system will search broadly.
- Only connectors that are enabled/configured for this search space are available.{doc_types_info}
## Available connector enums for `connectors_to_search`
{connector_docs}
NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`."""
# Capture for closure
_available_connectors = available_connectors
async def _search_knowledge_base_impl(
query: str, query: str,
top_k: int = 10, top_k: int = 10,
start_date: str | None = None, start_date: str | None = None,
end_date: str | None = None, end_date: str | None = None,
connectors_to_search: list[str] | None = None, connectors_to_search: list[str] | None = None,
) -> str: ) -> str:
""" """Implementation function for knowledge base search."""
Search the user's personal knowledge base for relevant information.
Use this tool to find documents, notes, files, web pages, and other content
that may help answer the user's question.
IMPORTANT:
- If the user requests a specific source type (e.g. "my notes", "Slack messages"),
pass `connectors_to_search=[...]` using the enums below.
- If `connectors_to_search` is omitted/empty, the system will search broadly.
## Available connector enums for `connectors_to_search`
- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
- FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files)
- NOTE: "SurfSense Notes" (notes created inside SurfSense)
- SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications)
- TEAMS_CONNECTOR: "Microsoft Teams messages and conversations" (personal Teams communications)
- NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management)
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources)
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
- JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking)
- CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation)
- CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management)
- GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management)
- GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications)
- GOOGLE_DRIVE_FILE: "Google Drive files and documents" (personal cloud storage and file management)
- DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications)
- AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization)
- TAVILY_API: "Tavily search API results" (personalized search results)
- SEARXNG_API: "SearxNG search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)
- BAIDU_SEARCH_API: "Baidu search API results" (personalized search results)
- LUMA_CONNECTOR: "Luma events"
- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
- BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation)
- CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records)
- OBSIDIAN_CONNECTOR: "Obsidian vault notes and markdown files" (personal notes and knowledge management)
NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`.
Args:
query: The search query - be specific and include key terms
top_k: Number of results to retrieve (default: 10)
start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00")
end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
Returns:
Formatted string with relevant documents and their content
"""
from app.agents.new_chat.utils import parse_date_or_datetime from app.agents.new_chat.utils import parse_date_or_datetime
parsed_start: datetime | None = None parsed_start: datetime | None = None
@ -640,6 +806,16 @@ def create_search_knowledge_base_tool(
top_k=top_k, top_k=top_k,
start_date=parsed_start, start_date=parsed_start,
end_date=parsed_end, end_date=parsed_end,
available_connectors=_available_connectors,
) )
return search_knowledge_base # Create StructuredTool with dynamic description
# This properly sets the description that the LLM sees
tool = StructuredTool(
name="search_knowledge_base",
description=dynamic_description,
coroutine=_search_knowledge_base_impl,
args_schema=SearchKnowledgeBaseInput,
)
return tool

View file

@ -85,6 +85,7 @@ class ToolDefinition:
# Contributors: Add your new tools here! # Contributors: Add your new tools here!
BUILTIN_TOOLS: list[ToolDefinition] = [ BUILTIN_TOOLS: list[ToolDefinition] = [
# Core tool - searches the user's knowledge base # Core tool - searches the user's knowledge base
# Now supports dynamic connector/document type discovery
ToolDefinition( ToolDefinition(
name="search_knowledge_base", name="search_knowledge_base",
description="Search the user's personal knowledge base for relevant information", description="Search the user's personal knowledge base for relevant information",
@ -92,8 +93,12 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
search_space_id=deps["search_space_id"], search_space_id=deps["search_space_id"],
db_session=deps["db_session"], db_session=deps["db_session"],
connector_service=deps["connector_service"], connector_service=deps["connector_service"],
# Optional: dynamically discovered connectors/document types
available_connectors=deps.get("available_connectors"),
available_document_types=deps.get("available_document_types"),
), ),
requires=["search_space_id", "db_session", "connector_service"], requires=["search_space_id", "db_session", "connector_service"],
# Note: available_connectors and available_document_types are optional
), ),
# Podcast generation tool # Podcast generation tool
ToolDefinition( ToolDefinition(

View file

@ -2871,3 +2871,350 @@ class ConnectorService:
} }
return result_object, obsidian_docs return result_object, obsidian_docs
# =========================================================================
# Composio Connector Search Methods
# =========================================================================
async def search_composio_google_drive(
self,
user_query: str,
search_space_id: int,
top_k: int = 20,
start_date: datetime | None = None,
end_date: datetime | None = None,
) -> tuple:
"""
Search for Composio Google Drive files and return both the source information
and langchain documents.
Uses combined chunk-level and document-level hybrid search with RRF fusion.
Args:
user_query: The user's query
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
start_date: Optional start date for filtering documents by updated_at
end_date: Optional end date for filtering documents by updated_at
Returns:
tuple: (sources_info, langchain_documents)
"""
composio_drive_docs = await self._combined_rrf_search(
query_text=user_query,
search_space_id=search_space_id,
document_type="COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
top_k=top_k,
start_date=start_date,
end_date=end_date,
)
# Early return if no results
if not composio_drive_docs:
return {
"id": 54,
"name": "Google Drive (Composio)",
"type": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
"sources": [],
}, []
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return (
doc_info.get("title")
or metadata.get("title")
or metadata.get("file_name")
or "Untitled Document"
)
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return metadata.get("url") or metadata.get("web_view_link") or ""
def _description_fn(
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> str:
description = self._chunk_preview(chunk.get("content", ""), limit=200)
info_parts = []
mime_type = metadata.get("mime_type")
modified_time = metadata.get("modified_time")
if mime_type:
info_parts.append(f"Type: {mime_type}")
if modified_time:
info_parts.append(f"Modified: {modified_time}")
if info_parts:
description = (description + " | " + " | ".join(info_parts)).strip(" |")
return description
def _extra_fields_fn(
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> dict[str, Any]:
return {
"mime_type": metadata.get("mime_type", ""),
"file_id": metadata.get("file_id", ""),
"modified_time": metadata.get("modified_time", ""),
}
sources_list = self._build_chunk_sources_from_documents(
composio_drive_docs,
title_fn=_title_fn,
url_fn=_url_fn,
description_fn=_description_fn,
extra_fields_fn=_extra_fields_fn,
)
# Create result object
result_object = {
"id": 54,
"name": "Google Drive (Composio)",
"type": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
"sources": sources_list,
}
return result_object, composio_drive_docs
async def search_composio_gmail(
self,
user_query: str,
search_space_id: int,
top_k: int = 20,
start_date: datetime | None = None,
end_date: datetime | None = None,
) -> tuple:
"""
Search for Composio Gmail messages and return both the source information
and langchain documents.
Uses combined chunk-level and document-level hybrid search with RRF fusion.
Args:
user_query: The user's query
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
start_date: Optional start date for filtering documents by updated_at
end_date: Optional end date for filtering documents by updated_at
Returns:
tuple: (sources_info, langchain_documents)
"""
composio_gmail_docs = await self._combined_rrf_search(
query_text=user_query,
search_space_id=search_space_id,
document_type="COMPOSIO_GMAIL_CONNECTOR",
top_k=top_k,
start_date=start_date,
end_date=end_date,
)
# Early return if no results
if not composio_gmail_docs:
return {
"id": 55,
"name": "Gmail (Composio)",
"type": "COMPOSIO_GMAIL_CONNECTOR",
"sources": [],
}, []
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return (
doc_info.get("title")
or metadata.get("subject")
or metadata.get("title")
or "Untitled Email"
)
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return metadata.get("url") or ""
def _description_fn(
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> str:
description = self._chunk_preview(chunk.get("content", ""), limit=200)
info_parts = []
sender = metadata.get("from") or metadata.get("sender")
date = metadata.get("date") or metadata.get("received_at")
if sender:
info_parts.append(f"From: {sender}")
if date:
info_parts.append(f"Date: {date}")
if info_parts:
description = (description + " | " + " | ".join(info_parts)).strip(" |")
return description
def _extra_fields_fn(
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> dict[str, Any]:
return {
"message_id": metadata.get("message_id", ""),
"thread_id": metadata.get("thread_id", ""),
"from": metadata.get("from", ""),
"to": metadata.get("to", ""),
"date": metadata.get("date", ""),
}
sources_list = self._build_chunk_sources_from_documents(
composio_gmail_docs,
title_fn=_title_fn,
url_fn=_url_fn,
description_fn=_description_fn,
extra_fields_fn=_extra_fields_fn,
)
# Create result object
result_object = {
"id": 55,
"name": "Gmail (Composio)",
"type": "COMPOSIO_GMAIL_CONNECTOR",
"sources": sources_list,
}
return result_object, composio_gmail_docs
async def search_composio_google_calendar(
self,
user_query: str,
search_space_id: int,
top_k: int = 20,
start_date: datetime | None = None,
end_date: datetime | None = None,
) -> tuple:
"""
Search for Composio Google Calendar events and return both the source information
and langchain documents.
Uses combined chunk-level and document-level hybrid search with RRF fusion.
Args:
user_query: The user's query
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
start_date: Optional start date for filtering documents by updated_at
end_date: Optional end date for filtering documents by updated_at
Returns:
tuple: (sources_info, langchain_documents)
"""
composio_calendar_docs = await self._combined_rrf_search(
query_text=user_query,
search_space_id=search_space_id,
document_type="COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
top_k=top_k,
start_date=start_date,
end_date=end_date,
)
# Early return if no results
if not composio_calendar_docs:
return {
"id": 56,
"name": "Google Calendar (Composio)",
"type": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
"sources": [],
}, []
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return (
doc_info.get("title")
or metadata.get("summary")
or metadata.get("title")
or "Untitled Event"
)
def _url_fn(_doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return metadata.get("url") or metadata.get("html_link") or ""
def _description_fn(
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> str:
description = self._chunk_preview(chunk.get("content", ""), limit=200)
info_parts = []
start_time = metadata.get("start_time") or metadata.get("start")
end_time = metadata.get("end_time") or metadata.get("end")
if start_time:
info_parts.append(f"Start: {start_time}")
if end_time:
info_parts.append(f"End: {end_time}")
if info_parts:
description = (description + " | " + " | ".join(info_parts)).strip(" |")
return description
def _extra_fields_fn(
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> dict[str, Any]:
return {
"event_id": metadata.get("event_id", ""),
"calendar_id": metadata.get("calendar_id", ""),
"start_time": metadata.get("start_time", ""),
"end_time": metadata.get("end_time", ""),
"location": metadata.get("location", ""),
}
sources_list = self._build_chunk_sources_from_documents(
composio_calendar_docs,
title_fn=_title_fn,
url_fn=_url_fn,
description_fn=_description_fn,
extra_fields_fn=_extra_fields_fn,
)
# Create result object
result_object = {
"id": 56,
"name": "Google Calendar (Composio)",
"type": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
"sources": sources_list,
}
return result_object, composio_calendar_docs
# =========================================================================
# Utility Methods for Connector Discovery
# =========================================================================
async def get_available_connectors(
self,
search_space_id: int,
) -> list[SearchSourceConnectorType]:
"""
Get all available (enabled) connector types for a search space.
Args:
search_space_id: The search space ID
Returns:
List of SearchSourceConnectorType enums for enabled connectors
"""
query = (
select(SearchSourceConnector.connector_type)
.filter(
SearchSourceConnector.search_space_id == search_space_id,
)
.distinct()
)
result = await self.session.execute(query)
connector_types = result.scalars().all()
return list(connector_types)
async def get_available_document_types(
self,
search_space_id: int,
) -> list[str]:
"""
Get all document types that have at least one document in the search space.
Args:
search_space_id: The search space ID
Returns:
List of document type strings that have documents indexed
"""
from sqlalchemy import distinct
from app.db import Document
query = select(distinct(Document.document_type)).filter(
Document.search_space_id == search_space_id,
)
result = await self.session.execute(query)
doc_types = result.scalars().all()
return [str(dt) for dt in doc_types]

View file

@ -54,21 +54,64 @@ def format_attachments_as_context(attachments: list[ChatAttachment]) -> str:
def format_mentioned_documents_as_context(documents: list[Document]) -> str: def format_mentioned_documents_as_context(documents: list[Document]) -> str:
"""Format mentioned documents as context for the agent.""" """
Format mentioned documents as context for the agent.
Uses the same XML structure as knowledge_base.format_documents_for_context
to ensure citations work properly with chunk IDs.
"""
if not documents: if not documents:
return "" return ""
context_parts = ["<mentioned_documents>"] context_parts = ["<mentioned_documents>"]
context_parts.append( context_parts.append(
"The user has explicitly mentioned the following documents from their knowledge base. " "The user has explicitly mentioned the following documents from their knowledge base. "
"These documents are directly relevant to the query and should be prioritized as primary sources." "These documents are directly relevant to the query and should be prioritized as primary sources. "
"Use [citation:CHUNK_ID] format for citations (e.g., [citation:123])."
) )
for i, doc in enumerate(documents, 1): context_parts.append("")
context_parts.append(
f"<document index='{i}' id='{doc.id}' title='{doc.title}' type='{doc.document_type.value}'>" for doc in documents:
# Build metadata JSON
metadata = doc.document_metadata or {}
metadata_json = json.dumps(metadata, ensure_ascii=False)
# Get URL from metadata
url = (
metadata.get("url")
or metadata.get("source")
or metadata.get("page_url")
or ""
) )
context_parts.append(f"<![CDATA[{doc.content}]]>")
context_parts.append("<document>")
context_parts.append("<document_metadata>")
context_parts.append(f" <document_id>{doc.id}</document_id>")
context_parts.append(f" <document_type>{doc.document_type.value}</document_type>")
context_parts.append(f" <title><![CDATA[{doc.title}]]></title>")
context_parts.append(f" <url><![CDATA[{url}]]></url>")
context_parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
context_parts.append("</document_metadata>")
context_parts.append("")
context_parts.append("<document_content>")
# Use chunks if available (preferred for proper citations)
if hasattr(doc, "chunks") and doc.chunks:
for chunk in doc.chunks:
context_parts.append(
f" <chunk id='{chunk.id}'><![CDATA[{chunk.content}]]></chunk>"
)
else:
# Fallback to document content if chunks not loaded
# Use document ID as chunk ID prefix for consistency
context_parts.append(
f" <chunk id='{doc.id}'><![CDATA[{doc.content}]]></chunk>"
)
context_parts.append("</document_content>")
context_parts.append("</document>") context_parts.append("</document>")
context_parts.append("")
context_parts.append("</mentioned_documents>") context_parts.append("</mentioned_documents>")
return "\n".join(context_parts) return "\n".join(context_parts)
@ -81,8 +124,6 @@ def format_mentioned_surfsense_docs_as_context(
if not documents: if not documents:
return "" return ""
import json
context_parts = ["<mentioned_surfsense_docs>"] context_parts = ["<mentioned_surfsense_docs>"]
context_parts.append( context_parts.append(
"The user has explicitly mentioned the following SurfSense documentation pages. " "The user has explicitly mentioned the following SurfSense documentation pages. "
@ -262,11 +303,15 @@ async def stream_new_chat(
# Build input with message history from frontend # Build input with message history from frontend
langchain_messages = [] langchain_messages = []
# Fetch mentioned documents if any # Fetch mentioned documents if any (with chunks for proper citations)
mentioned_documents: list[Document] = [] mentioned_documents: list[Document] = []
if mentioned_document_ids: if mentioned_document_ids:
from sqlalchemy.orm import selectinload as doc_selectinload
result = await session.execute( result = await session.execute(
select(Document).filter( select(Document)
.options(doc_selectinload(Document.chunks))
.filter(
Document.id.in_(mentioned_document_ids), Document.id.in_(mentioned_document_ids),
Document.search_space_id == search_space_id, Document.search_space_id == search_space_id,
) )

View file

@ -1,6 +1,6 @@
[project] [project]
name = "surf-new-backend" name = "surf-new-backend"
version = "0.0.11" version = "0.0.12"
description = "SurfSense Backend" description = "SurfSense Backend"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [

View file

@ -6545,7 +6545,7 @@ wheels = [
[[package]] [[package]]
name = "surf-new-backend" name = "surf-new-backend"
version = "0.0.11" version = "0.0.12"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "alembic" }, { name = "alembic" },

View file

@ -1,7 +1,7 @@
{ {
"name": "surfsense_browser_extension", "name": "surfsense_browser_extension",
"displayName": "Surfsense Browser Extension", "displayName": "Surfsense Browser Extension",
"version": "0.0.11", "version": "0.0.12",
"description": "Extension to collect Browsing History for SurfSense.", "description": "Extension to collect Browsing History for SurfSense.",
"author": "https://github.com/MODSetter", "author": "https://github.com/MODSetter",
"engines": { "engines": {

View file

@ -24,6 +24,16 @@
"enabled": true, "enabled": true,
"status": "warning", "status": "warning",
"statusMessage": "Some requests may be blocked if not using Firecrawl." "statusMessage": "Some requests may be blocked if not using Firecrawl."
},
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": {
"enabled": false,
"status": "disabled",
"statusMessage": "Not available yet."
},
"GITHUB_CONNECTOR": {
"enabled": false,
"status": "warning",
"statusMessage": "Some issues with indexing repositories."
} }
}, },
"globalSettings": { "globalSettings": {

View file

@ -1,13 +1,14 @@
"use client"; "use client";
import { useQuery, useQueryClient } from "@tanstack/react-query"; import { useQuery, useQueryClient } from "@tanstack/react-query";
import { useAtomValue } from "jotai"; import { useAtomValue, useSetAtom } from "jotai";
import { Inbox, LogOut, SquareLibrary, Trash2 } from "lucide-react"; import { Inbox, LogOut, SquareLibrary, Trash2 } from "lucide-react";
import { useParams, usePathname, useRouter } from "next/navigation"; import { useParams, usePathname, useRouter } from "next/navigation";
import { useTranslations } from "next-intl"; import { useTranslations } from "next-intl";
import { useTheme } from "next-themes"; import { useTheme } from "next-themes";
import { useCallback, useMemo, useState } from "react"; import { useCallback, useEffect, useMemo, useState } from "react";
import { toast } from "sonner"; import { toast } from "sonner";
import { currentThreadAtom, resetCurrentThreadAtom } from "@/atoms/chat/current-thread.atom";
import { deleteSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms"; import { deleteSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms";
import { searchSpacesAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { searchSpacesAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { currentUserAtom } from "@/atoms/user/user-query.atoms";
@ -68,11 +69,16 @@ export function LayoutDataProvider({
const { data: user } = useAtomValue(currentUserAtom); const { data: user } = useAtomValue(currentUserAtom);
const { data: searchSpacesData, refetch: refetchSearchSpaces } = useAtomValue(searchSpacesAtom); const { data: searchSpacesData, refetch: refetchSearchSpaces } = useAtomValue(searchSpacesAtom);
const { mutateAsync: deleteSearchSpace } = useAtomValue(deleteSearchSpaceMutationAtom); const { mutateAsync: deleteSearchSpace } = useAtomValue(deleteSearchSpaceMutationAtom);
const currentThreadState = useAtomValue(currentThreadAtom);
const resetCurrentThread = useSetAtom(resetCurrentThreadAtom);
// Current IDs from URL // State for handling new chat navigation when router is out of sync
const [pendingNewChat, setPendingNewChat] = useState(false);
// Current IDs from URL, with fallback to atom for replaceState updates
const currentChatId = params?.chat_id const currentChatId = params?.chat_id
? Number(Array.isArray(params.chat_id) ? params.chat_id[0] : params.chat_id) ? Number(Array.isArray(params.chat_id) ? params.chat_id[0] : params.chat_id)
: null; : currentThreadState.id;
// Fetch current search space (for caching purposes) // Fetch current search space (for caching purposes)
useQuery({ useQuery({
@ -124,6 +130,17 @@ export function LayoutDataProvider({
const [isDeletingSearchSpace, setIsDeletingSearchSpace] = useState(false); const [isDeletingSearchSpace, setIsDeletingSearchSpace] = useState(false);
const [isLeavingSearchSpace, setIsLeavingSearchSpace] = useState(false); const [isLeavingSearchSpace, setIsLeavingSearchSpace] = useState(false);
// Effect to complete new chat navigation after router syncs
// This runs when handleNewChat detected an out-of-sync state and triggered a sync
useEffect(() => {
if (pendingNewChat && params?.chat_id) {
// Router is now synced (chat_id is in params), complete navigation to new-chat
resetCurrentThread();
router.push(`/dashboard/${searchSpaceId}/new-chat`);
setPendingNewChat(false);
}
}, [pendingNewChat, params?.chat_id, router, searchSpaceId, resetCurrentThread]);
const searchSpaces: SearchSpace[] = useMemo(() => { const searchSpaces: SearchSpace[] = useMemo(() => {
if (!searchSpacesData || !Array.isArray(searchSpacesData)) return []; if (!searchSpacesData || !Array.isArray(searchSpacesData)) return [];
return searchSpacesData.map((space) => ({ return searchSpacesData.map((space) => ({
@ -175,12 +192,6 @@ export function LayoutDataProvider({
// Navigation items // Navigation items
const navItems: NavItem[] = useMemo( const navItems: NavItem[] = useMemo(
() => [ () => [
{
title: "Documents",
url: `/dashboard/${searchSpaceId}/documents`,
icon: SquareLibrary,
isActive: pathname?.includes("/documents"),
},
{ {
title: "Inbox", title: "Inbox",
url: "#inbox", // Special URL to indicate this is handled differently url: "#inbox", // Special URL to indicate this is handled differently
@ -188,6 +199,12 @@ export function LayoutDataProvider({
isActive: isInboxSidebarOpen, isActive: isInboxSidebarOpen,
badge: unreadCount > 0 ? formatInboxCount(unreadCount) : undefined, badge: unreadCount > 0 ? formatInboxCount(unreadCount) : undefined,
}, },
{
title: "Documents",
url: `/dashboard/${searchSpaceId}/documents`,
icon: SquareLibrary,
isActive: pathname?.includes("/documents"),
},
], ],
[searchSpaceId, pathname, isInboxSidebarOpen, unreadCount] [searchSpaceId, pathname, isInboxSidebarOpen, unreadCount]
); );
@ -292,8 +309,20 @@ export function LayoutDataProvider({
); );
const handleNewChat = useCallback(() => { const handleNewChat = useCallback(() => {
router.push(`/dashboard/${searchSpaceId}/new-chat`); // Check if router is out of sync (thread created via replaceState but params don't have chat_id)
}, [router, searchSpaceId]); const isOutOfSync = currentThreadState.id !== null && !params?.chat_id;
if (isOutOfSync) {
// First sync Next.js router by navigating to the current chat's actual URL
// This updates the router's internal state to match the browser URL
router.replace(`/dashboard/${searchSpaceId}/new-chat/${currentThreadState.id}`);
// Set flag to trigger navigation to new-chat after params update
setPendingNewChat(true);
} else {
// Normal navigation - router is in sync
router.push(`/dashboard/${searchSpaceId}/new-chat`);
}
}, [router, searchSpaceId, currentThreadState.id, params?.chat_id]);
const handleChatSelect = useCallback( const handleChatSelect = useCallback(
(chat: ChatItem) => { (chat: ChatItem) => {

View file

@ -1,6 +1,6 @@
{ {
"name": "surfsense_web", "name": "surfsense_web",
"version": "0.0.11", "version": "0.0.12",
"private": true, "private": true,
"description": "SurfSense Frontend", "description": "SurfSense Frontend",
"scripts": { "scripts": {