mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
merge: upstream/dev with migration renumbering
This commit is contained in:
commit
a7145b2c63
176 changed files with 8791 additions and 3608 deletions
|
|
@ -7,6 +7,7 @@ via NewLLMConfig.
|
|||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
from deepagents import create_deep_agent
|
||||
from langchain_core.tools import BaseTool
|
||||
|
|
@ -23,6 +24,90 @@ from app.agents.new_chat.system_prompt import (
|
|||
from app.agents.new_chat.tools.registry import build_tools_async
|
||||
from app.services.connector_service import ConnectorService
|
||||
|
||||
# =============================================================================
|
||||
# Connector Type Mapping
|
||||
# =============================================================================
|
||||
|
||||
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
|
||||
# used by the knowledge_base tool. Some connectors map to different document types.
|
||||
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
|
||||
# Direct mappings (connector type == searchable type)
|
||||
"TAVILY_API": "TAVILY_API",
|
||||
"SEARXNG_API": "SEARXNG_API",
|
||||
"LINKUP_API": "LINKUP_API",
|
||||
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
|
||||
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
|
||||
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
|
||||
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
|
||||
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
|
||||
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
|
||||
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
|
||||
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
|
||||
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
|
||||
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
|
||||
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
|
||||
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
|
||||
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
|
||||
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
|
||||
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
|
||||
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
|
||||
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
|
||||
# Composio connectors
|
||||
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "COMPOSIO_GOOGLE_DRIVE_CONNECTOR",
|
||||
"COMPOSIO_GMAIL_CONNECTOR": "COMPOSIO_GMAIL_CONNECTOR",
|
||||
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR",
|
||||
}
|
||||
|
||||
# Document types that don't come from SearchSourceConnector but should always be searchable
|
||||
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
|
||||
"EXTENSION", # Browser extension data
|
||||
"FILE", # Uploaded files
|
||||
"NOTE", # User notes
|
||||
"YOUTUBE_VIDEO", # YouTube videos
|
||||
]
|
||||
|
||||
|
||||
def _map_connectors_to_searchable_types(
|
||||
connector_types: list[Any],
|
||||
) -> list[str]:
|
||||
"""
|
||||
Map SearchSourceConnectorType enums to searchable document/connector types.
|
||||
|
||||
This function:
|
||||
1. Converts connector type enums to their searchable counterparts
|
||||
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
|
||||
3. Deduplicates while preserving order
|
||||
|
||||
Args:
|
||||
connector_types: List of SearchSourceConnectorType enum values
|
||||
|
||||
Returns:
|
||||
List of searchable connector/document type strings
|
||||
"""
|
||||
result_set: set[str] = set()
|
||||
result_list: list[str] = []
|
||||
|
||||
# Add always-available document types first
|
||||
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
|
||||
if doc_type not in result_set:
|
||||
result_set.add(doc_type)
|
||||
result_list.append(doc_type)
|
||||
|
||||
# Map each connector type to its searchable equivalent
|
||||
for ct in connector_types:
|
||||
# Handle both enum and string types
|
||||
ct_str = ct.value if hasattr(ct, "value") else str(ct)
|
||||
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
|
||||
if searchable and searchable not in result_set:
|
||||
result_set.add(searchable)
|
||||
result_list.append(searchable)
|
||||
|
||||
return result_list
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Deep Agent Factory
|
||||
# =============================================================================
|
||||
|
|
@ -117,6 +202,30 @@ async def create_surfsense_deep_agent(
|
|||
additional_tools=[my_custom_tool]
|
||||
)
|
||||
"""
|
||||
# Discover available connectors and document types for this search space
|
||||
# This enables dynamic tool docstrings that inform the LLM about what's actually available
|
||||
available_connectors: list[str] | None = None
|
||||
available_document_types: list[str] | None = None
|
||||
|
||||
try:
|
||||
# Get enabled search source connectors for this search space
|
||||
connector_types = await connector_service.get_available_connectors(
|
||||
search_space_id
|
||||
)
|
||||
if connector_types:
|
||||
# Convert enum values to strings and also include mapped document types
|
||||
available_connectors = _map_connectors_to_searchable_types(connector_types)
|
||||
|
||||
# Get document types that have at least one document indexed
|
||||
available_document_types = await connector_service.get_available_document_types(
|
||||
search_space_id
|
||||
)
|
||||
except Exception as e:
|
||||
# Log but don't fail - fall back to all connectors if discovery fails
|
||||
import logging
|
||||
|
||||
logging.warning(f"Failed to discover available connectors/document types: {e}")
|
||||
|
||||
# Build dependencies dict for the tools registry
|
||||
dependencies = {
|
||||
"search_space_id": search_space_id,
|
||||
|
|
@ -125,6 +234,9 @@ async def create_surfsense_deep_agent(
|
|||
"firecrawl_api_key": firecrawl_api_key,
|
||||
"user_id": user_id, # Required for memory tools
|
||||
"thread_id": thread_id, # For podcast tool
|
||||
# Dynamic connector/document type discovery for knowledge base tool
|
||||
"available_connectors": available_connectors,
|
||||
"available_document_types": available_document_types,
|
||||
}
|
||||
|
||||
# Build tools using the async registry (includes MCP tools)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue