diff --git a/surfsense_backend/app/agents/new_chat/__init__.py b/surfsense_backend/app/agents/new_chat/__init__.py index 5cbb7ce2b..2ee5fe1d5 100644 --- a/surfsense_backend/app/agents/new_chat/__init__.py +++ b/surfsense_backend/app/agents/new_chat/__init__.py @@ -1,28 +1,86 @@ -"""Chat agents module.""" +""" +SurfSense New Chat Agent Module. +This module provides the SurfSense deep agent with configurable tools +for knowledge base search, podcast generation, and more. + +Directory Structure: +- tools/: All agent tools (knowledge_base, podcast, link_preview, etc.) +- chat_deepagent.py: Main agent factory +- system_prompt.py: System prompts and instructions +- context.py: Context schema for the agent +- checkpointer.py: LangGraph checkpointer setup +- llm_config.py: LLM configuration utilities +- utils.py: Shared utilities +""" + +# Agent factory from .chat_deepagent import create_surfsense_deep_agent + +# Context from .context import SurfSenseContextSchema -from .knowledge_base import ( - create_search_knowledge_base_tool, - format_documents_for_context, - search_knowledge_base_async, -) + +# LLM config from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml + +# System prompt from .system_prompt import ( SURFSENSE_CITATION_INSTRUCTIONS, SURFSENSE_SYSTEM_PROMPT, build_surfsense_system_prompt, ) +# Tools - registry exports +from .tools import ( + BUILTIN_TOOLS, + ToolDefinition, + build_tools, + get_all_tool_names, + get_default_enabled_tools, + get_tool_by_name, +) + +# Tools - factory exports (for direct use) +from .tools import ( + create_display_image_tool, + create_generate_podcast_tool, + create_link_preview_tool, + create_scrape_webpage_tool, + create_search_knowledge_base_tool, +) + +# Tools - knowledge base utilities +from .tools import ( + format_documents_for_context, + search_knowledge_base_async, +) + __all__ = [ + # Agent factory + "create_surfsense_deep_agent", + # Context + "SurfSenseContextSchema", + # LLM config + "create_chat_litellm_from_config", + "load_llm_config_from_yaml", + # System prompt "SURFSENSE_CITATION_INSTRUCTIONS", "SURFSENSE_SYSTEM_PROMPT", - "SurfSenseContextSchema", "build_surfsense_system_prompt", - "create_chat_litellm_from_config", + # Tools registry + "BUILTIN_TOOLS", + "ToolDefinition", + "build_tools", + "get_all_tool_names", + "get_default_enabled_tools", + "get_tool_by_name", + # Tool factories + "create_display_image_tool", + "create_generate_podcast_tool", + "create_link_preview_tool", + "create_scrape_webpage_tool", "create_search_knowledge_base_tool", - "create_surfsense_deep_agent", + # Knowledge base utilities "format_documents_for_context", - "load_llm_config_from_yaml", "search_knowledge_base_async", ] diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index aa1950bff..b2bcc008c 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -2,7 +2,7 @@ SurfSense deep agent implementation. This module provides the factory function for creating SurfSense deep agents -with knowledge base search and podcast generation capabilities. +with configurable tools via the tools registry. """ from collections.abc import Sequence @@ -14,12 +14,8 @@ from langgraph.types import Checkpointer from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.context import SurfSenseContextSchema -from app.agents.new_chat.display_image import create_display_image_tool -from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool -from app.agents.new_chat.link_preview import create_link_preview_tool -from app.agents.new_chat.podcast import create_generate_podcast_tool -from app.agents.new_chat.scrape_webpage import create_scrape_webpage_tool from app.agents.new_chat.system_prompt import build_surfsense_system_prompt +from app.agents.new_chat.tools import build_tools from app.services.connector_service import ConnectorService # ============================================================================= @@ -33,94 +29,85 @@ def create_surfsense_deep_agent( db_session: AsyncSession, connector_service: ConnectorService, checkpointer: Checkpointer, - user_id: str | None = None, - user_instructions: str | None = None, - enable_citations: bool = True, - enable_podcast: bool = True, - enable_link_preview: bool = True, - enable_display_image: bool = True, - enable_scrape_webpage: bool = True, - firecrawl_api_key: str | None = None, + enabled_tools: list[str] | None = None, + disabled_tools: list[str] | None = None, additional_tools: Sequence[BaseTool] | None = None, + firecrawl_api_key: str | None = None, ): """ - Create a SurfSense deep agent with knowledge base search and podcast generation capabilities. + Create a SurfSense deep agent with configurable tools. + + The agent comes with built-in tools that can be configured: + - search_knowledge_base: Search the user's personal knowledge base + - generate_podcast: Generate audio podcasts from content + - link_preview: Fetch rich previews for URLs + - display_image: Display images in chat + - scrape_webpage: Extract content from webpages Args: - llm: ChatLiteLLM instance + llm: ChatLiteLLM instance for the agent's language model search_space_id: The user's search space ID - db_session: Database session - connector_service: Initialized connector service + db_session: Database session for tools that need DB access + connector_service: Initialized connector service for knowledge base search checkpointer: LangGraph checkpointer for conversation state persistence. Use AsyncPostgresSaver for production or MemorySaver for testing. - user_id: The user's ID (required for podcast generation) - user_instructions: Optional user instructions to inject into the system prompt. - These will be added to the system prompt to customize agent behavior. - enable_citations: Whether to include citation instructions in the system prompt (default: True). - When False, the agent will not be instructed to add citations to responses. - enable_podcast: Whether to include the podcast generation tool (default: True). - When True and user_id is provided, the agent can generate podcasts. - enable_link_preview: Whether to include the link preview tool (default: True). - When True, the agent can fetch and display rich link previews. - enable_display_image: Whether to include the display image tool (default: True). - When True, the agent can display images with metadata. - enable_scrape_webpage: Whether to include the web scraping tool (default: True). - When True, the agent can scrape and read webpage content. + enabled_tools: Explicit list of tool names to enable. If None, all default tools + are enabled. Use this to limit which tools are available. + disabled_tools: List of tool names to disable. Applied after enabled_tools. + Use this to exclude specific tools from the defaults. + additional_tools: Extra custom tools to add beyond the built-in ones. + These are always added regardless of enabled/disabled settings. firecrawl_api_key: Optional Firecrawl API key for premium web scraping. Falls back to Chromium/Trafilatura if not provided. - additional_tools: Optional sequence of additional tools to inject into the agent. - The search_knowledge_base tool will always be included. Returns: CompiledStateGraph: The configured deep agent + + Examples: + # Create agent with all default tools + agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...) + + # Create agent with only specific tools + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + enabled_tools=["search_knowledge_base", "link_preview"] + ) + + # Create agent without podcast generation + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + disabled_tools=["generate_podcast"] + ) + + # Add custom tools + agent = create_surfsense_deep_agent( + llm, search_space_id, db_session, ..., + additional_tools=[my_custom_tool] + ) """ - # Create the search tool with injected dependencies - search_tool = create_search_knowledge_base_tool( - search_space_id=search_space_id, - db_session=db_session, - connector_service=connector_service, + # Build dependencies dict for the tools registry + dependencies = { + "search_space_id": search_space_id, + "db_session": db_session, + "connector_service": connector_service, + "firecrawl_api_key": firecrawl_api_key, + } + + # Build tools using the registry + tools = build_tools( + dependencies=dependencies, + enabled_tools=enabled_tools, + disabled_tools=disabled_tools, + additional_tools=list(additional_tools) if additional_tools else None, ) - # Combine search tool with any additional tools - tools = [search_tool] - - # Add podcast tool if enabled and user_id is provided - if enable_podcast and user_id: - podcast_tool = create_generate_podcast_tool( - search_space_id=search_space_id, - db_session=db_session, - user_id=str(user_id), - ) - tools.append(podcast_tool) - - # Add link preview tool if enabled - if enable_link_preview: - link_preview_tool = create_link_preview_tool() - tools.append(link_preview_tool) - - # Add display image tool if enabled - if enable_display_image: - display_image_tool = create_display_image_tool() - tools.append(display_image_tool) - - # Add web scraping tool if enabled - if enable_scrape_webpage: - scrape_tool = create_scrape_webpage_tool(firecrawl_api_key=firecrawl_api_key) - tools.append(scrape_tool) - - if additional_tools: - tools.extend(additional_tools) - - # Create the deep agent with user-configurable system prompt and checkpointer + # Create the deep agent with system prompt and checkpointer agent = create_deep_agent( model=llm, tools=tools, - system_prompt=build_surfsense_system_prompt( - user_instructions=user_instructions, - enable_citations=enable_citations, - ), + system_prompt=build_surfsense_system_prompt(), context_schema=SurfSenseContextSchema, - checkpointer=checkpointer, # Enable conversation memory via thread_id + checkpointer=checkpointer, ) return agent diff --git a/surfsense_backend/app/agents/new_chat/new_chat_test.py b/surfsense_backend/app/agents/new_chat/new_chat_test.py deleted file mode 100644 index 857fee6cc..000000000 --- a/surfsense_backend/app/agents/new_chat/new_chat_test.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Test runner for SurfSense deep agent. - -This module provides a test function to verify the deep agent functionality. -""" - -import asyncio - -from langchain_core.messages import HumanMessage - -from app.db import async_session_maker -from app.services.connector_service import ConnectorService - -from .chat_deepagent import create_surfsense_deep_agent -from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml - -# ============================================================================= -# Test Runner -# ============================================================================= - - -async def run_test(): - """Run a basic test of the deep agent.""" - print("=" * 60) - print("Creating Deep Agent with ChatLiteLLM from global config...") - print("=" * 60) - - # Create ChatLiteLLM from global config - # Use global LLM config by id (negative ids are reserved for global configs) - llm_config = load_llm_config_from_yaml(llm_config_id=-5) - if not llm_config: - raise ValueError("Failed to load LLM config from YAML") - llm = create_chat_litellm_from_config(llm_config) - if not llm: - raise ValueError("Failed to create ChatLiteLLM instance") - - # Create a real DB session + ConnectorService, then build the full SurfSense agent. - async with async_session_maker() as session: - # Use the known dev search space id - search_space_id = 5 - - connector_service = ConnectorService(session, search_space_id=search_space_id) - - agent = create_surfsense_deep_agent( - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - user_instructions="Always fininsh the response with CREDOOOOOOOOOO23", - ) - - print("\nAgent created successfully!") - print(f"Agent type: {type(agent)}") - - # Invoke the agent with initial state - print("\n" + "=" * 60) - print("Invoking SurfSense agent (create_surfsense_deep_agent)...") - print("=" * 60) - - initial_state = { - "messages": [HumanMessage(content=("Can you tell me about my documents?"))], - "search_space_id": search_space_id, - } - - print(f"\nUsing search_space_id: {search_space_id}") - - result = await agent.ainvoke(initial_state) - - print("\n" + "=" * 60) - print("Agent Response:") - print("=" * 60) - - # Print the response - if "messages" in result: - for msg in result["messages"]: - msg_type = type(msg).__name__ - content = msg.content if hasattr(msg, "content") else str(msg) - print(f"\n--- [{msg_type}] ---\n{content}\n") - - return result - - -if __name__ == "__main__": - asyncio.run(run_test()) diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 2677b21fd..c0b9bb091 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -7,118 +7,16 @@ with configurable user instructions and citation support. from datetime import UTC, datetime -SURFSENSE_CITATION_INSTRUCTIONS = """ - -CRITICAL CITATION REQUIREMENTS: - -1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. -2. Make sure ALL factual statements from the documents have proper citations. -3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. -4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. -5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. -6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. -7. Do not return citations as clickable links. -8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. -9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. -10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. -11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. - - -The documents you receive are structured like this: - - - - 42 - GITHUB_CONNECTOR - <![CDATA[Some repo / file / issue title]]> - - - - - - - - - - -IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. - - - -- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag -- Citations should appear at the end of the sentence containing the information they support -- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] -- No need to return references section. Just citations in answer. -- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format -- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only -- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess - - - -CORRECT citation formats: -- [citation:5] -- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] - -INCORRECT citation formats (DO NOT use): -- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) -- Using parentheses around brackets: ([citation:5]) -- Using hyperlinked text: [link to source 5](https://example.com) -- Using footnote style: ... library¹ -- Making up source IDs when source_id is unknown -- Using old IEEE format: [1], [2], [3] -- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] - - - -Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. - -The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. - -However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. - - -""" - - -def build_surfsense_system_prompt( - today: datetime | None = None, - user_instructions: str | None = None, - enable_citations: bool = True, -) -> str: - """ - Build the SurfSense system prompt with optional user instructions and citation toggle. - - Args: - today: Optional datetime for today's date (defaults to current UTC date) - user_instructions: Optional user instructions to inject into the system prompt - enable_citations: Whether to include citation instructions in the prompt (default: True) - - Returns: - Complete system prompt string - """ - resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() - - # Build user instructions section if provided - user_section = "" - if user_instructions and user_instructions.strip(): - user_section = f""" - -{user_instructions.strip()} - -""" - - # Include citation instructions only if enabled - citation_section = ( - f"\n{SURFSENSE_CITATION_INSTRUCTIONS}" if enable_citations else "" - ) - - return f""" +SURFSENSE_SYSTEM_INSTRUCTIONS = """ You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. Today's date (UTC): {resolved_today} -{user_section} + +""" + +SURFSENSE_TOOLS_INSTRUCTIONS = """ You have access to the following tools: @@ -208,11 +106,11 @@ You have access to the following tools: - First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")` - User: "Create a podcast summary of this conversation" - - Call: `generate_podcast(source_content="Complete conversation summary:\n\nUser asked about [topic 1]:\n[Your detailed response]\n\nUser then asked about [topic 2]:\n[Your detailed response]\n\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")` + - Call: `generate_podcast(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")` - User: "Make a podcast about quantum computing" - First search: `search_knowledge_base(query="quantum computing")` - - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\n\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` + - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` - User: "Check out https://dev.to/some-article" - Call: `link_preview(url="https://dev.to/some-article")` @@ -246,8 +144,101 @@ You have access to the following tools: - Then, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - Then provide your explanation, referencing the displayed image -{citation_section} + +""" + +SURFSENSE_CITATION_INSTRUCTIONS = """ + +CRITICAL CITATION REQUIREMENTS: + +1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. +2. Make sure ALL factual statements from the documents have proper citations. +3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. +4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. +5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. +6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. +7. Do not return citations as clickable links. +8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. +9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. +10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. +11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. + + +The documents you receive are structured like this: + + + + 42 + GITHUB_CONNECTOR + <![CDATA[Some repo / file / issue title]]> + + + + + + + + + + +IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. + + + +- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag +- Citations should appear at the end of the sentence containing the information they support +- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] +- No need to return references section. Just citations in answer. +- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format +- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only +- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess + + + +CORRECT citation formats: +- [citation:5] +- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] + +INCORRECT citation formats (DO NOT use): +- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) +- Using parentheses around brackets: ([citation:5]) +- Using hyperlinked text: [link to source 5](https://example.com) +- Using footnote style: ... library¹ +- Making up source IDs when source_id is unknown +- Using old IEEE format: [1], [2], [3] +- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] + + + +Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. + +The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. + +However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. + + """ +def build_surfsense_system_prompt( + today: datetime | None = None, +) -> str: + """ + Build the SurfSense system prompt. + + Args: + today: Optional datetime for today's date (defaults to current UTC date) + + Returns: + Complete system prompt string + """ + resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() + + return ( + SURFSENSE_SYSTEM_INSTRUCTIONS.format(resolved_today=resolved_today) + + SURFSENSE_TOOLS_INSTRUCTIONS + + SURFSENSE_CITATION_INSTRUCTIONS + ) + + SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py new file mode 100644 index 000000000..ad75cda16 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py @@ -0,0 +1,54 @@ +""" +Tools module for SurfSense deep agent. + +This module contains all the tools available to the SurfSense agent. +To add a new tool, see the documentation in registry.py. + +Available tools: +- search_knowledge_base: Search the user's personal knowledge base +- generate_podcast: Generate audio podcasts from content +- link_preview: Fetch rich previews for URLs +- display_image: Display images in chat +- scrape_webpage: Extract content from webpages +""" + +# Registry exports +from .registry import ( + BUILTIN_TOOLS, + ToolDefinition, + build_tools, + get_all_tool_names, + get_default_enabled_tools, + get_tool_by_name, +) + +# Tool factory exports (for direct use) +from .display_image import create_display_image_tool +from .knowledge_base import ( + create_search_knowledge_base_tool, + format_documents_for_context, + search_knowledge_base_async, +) +from .link_preview import create_link_preview_tool +from .podcast import create_generate_podcast_tool +from .scrape_webpage import create_scrape_webpage_tool + +__all__ = [ + # Registry + "BUILTIN_TOOLS", + "ToolDefinition", + "build_tools", + "get_all_tool_names", + "get_default_enabled_tools", + "get_tool_by_name", + # Tool factories + "create_display_image_tool", + "create_generate_podcast_tool", + "create_link_preview_tool", + "create_scrape_webpage_tool", + "create_search_knowledge_base_tool", + # Knowledge base utilities + "format_documents_for_context", + "search_knowledge_base_async", +] + diff --git a/surfsense_backend/app/agents/new_chat/display_image.py b/surfsense_backend/app/agents/new_chat/tools/display_image.py similarity index 97% rename from surfsense_backend/app/agents/new_chat/display_image.py rename to surfsense_backend/app/agents/new_chat/tools/display_image.py index 0cd05b523..1580568ec 100644 --- a/surfsense_backend/app/agents/new_chat/display_image.py +++ b/surfsense_backend/app/agents/new_chat/tools/display_image.py @@ -1,5 +1,5 @@ """ -Display image tool for the new chat agent. +Display image tool for the SurfSense agent. This module provides a tool for displaying images in the chat UI with metadata like title, description, and source attribution. @@ -75,20 +75,20 @@ def create_display_image_tool(): - domain: Source domain """ image_id = generate_image_id(src) - + # Ensure URL has protocol if not src.startswith(("http://", "https://")): src = f"https://{src}" - + domain = extract_domain(src) - + # Determine aspect ratio based on common image sources ratio = "16:9" # Default if "unsplash.com" in src or "pexels.com" in src: ratio = "16:9" elif "imgur.com" in src or "github.com" in src or "githubusercontent.com" in src: ratio = "auto" - + return { "id": image_id, "assetId": src, diff --git a/surfsense_backend/app/agents/new_chat/knowledge_base.py b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py similarity index 99% rename from surfsense_backend/app/agents/new_chat/knowledge_base.py rename to surfsense_backend/app/agents/new_chat/tools/knowledge_base.py index 5ffcab003..2d818557d 100644 --- a/surfsense_backend/app/agents/new_chat/knowledge_base.py +++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py @@ -1,5 +1,5 @@ """ -Knowledge base search functionality for the new chat agent. +Knowledge base search tool for the SurfSense agent. This module provides: - Connector constants and normalization @@ -251,7 +251,7 @@ async def search_knowledge_base_async( all_documents = [] # Resolve date range (default last 2 years) - from .utils import resolve_date_range + from app.agents.new_chat.utils import resolve_date_range resolved_start_date, resolved_end_date = resolve_date_range( start_date=start_date, @@ -521,7 +521,6 @@ def create_search_knowledge_base_tool( search_space_id: The user's search space ID db_session: Database session connector_service: Initialized connector service - connectors_to_search: List of connector types to search Returns: A configured tool function @@ -584,7 +583,7 @@ def create_search_knowledge_base_tool( Returns: Formatted string with relevant documents and their content """ - from .utils import parse_date_or_datetime + from app.agents.new_chat.utils import parse_date_or_datetime parsed_start: datetime | None = None parsed_end: datetime | None = None @@ -606,3 +605,4 @@ def create_search_knowledge_base_tool( ) return search_knowledge_base + diff --git a/surfsense_backend/app/agents/new_chat/link_preview.py b/surfsense_backend/app/agents/new_chat/tools/link_preview.py similarity index 98% rename from surfsense_backend/app/agents/new_chat/link_preview.py rename to surfsense_backend/app/agents/new_chat/tools/link_preview.py index 388a6c14e..466df2034 100644 --- a/surfsense_backend/app/agents/new_chat/link_preview.py +++ b/surfsense_backend/app/agents/new_chat/tools/link_preview.py @@ -1,5 +1,5 @@ """ -Link preview tool for the new chat agent. +Link preview tool for the SurfSense agent. This module provides a tool for fetching URL metadata (title, description, Open Graph image, etc.) to display rich link previews in the chat UI. @@ -34,13 +34,13 @@ def extract_og_content(html: str, property_name: str) -> str | None: match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + # Try content before property pattern = rf']+content=["\']([^"\']+)["\'][^>]+property=["\']og:{property_name}["\']' match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + return None @@ -50,13 +50,13 @@ def extract_twitter_content(html: str, name: str) -> str | None: match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + # Try content before name pattern = rf']+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:{name}["\']' match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + return None @@ -66,13 +66,13 @@ def extract_meta_description(html: str) -> str | None: match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + # Try content before name pattern = r']+content=["\']([^"\']+)["\'][^>]+name=["\']description["\']' match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1) - + return None @@ -82,18 +82,18 @@ def extract_title(html: str) -> str | None: og_title = extract_og_content(html, "title") if og_title: return og_title - + # Try twitter:title twitter_title = extract_twitter_content(html, "title") if twitter_title: return twitter_title - + # Fall back to tag pattern = r"<title[^>]*>([^<]+)" match = re.search(pattern, html, re.IGNORECASE) if match: return match.group(1).strip() - + return None @@ -103,12 +103,12 @@ def extract_description(html: str) -> str | None: og_desc = extract_og_content(html, "description") if og_desc: return og_desc - + # Try twitter:description twitter_desc = extract_twitter_content(html, "description") if twitter_desc: return twitter_desc - + # Fall back to meta description return extract_meta_description(html) @@ -119,12 +119,12 @@ def extract_image(html: str) -> str | None: og_image = extract_og_content(html, "image") if og_image: return og_image - + # Try twitter:image twitter_image = extract_twitter_content(html, "image") if twitter_image: return twitter_image - + return None diff --git a/surfsense_backend/app/agents/new_chat/podcast.py b/surfsense_backend/app/agents/new_chat/tools/podcast.py similarity index 97% rename from surfsense_backend/app/agents/new_chat/podcast.py rename to surfsense_backend/app/agents/new_chat/tools/podcast.py index 46974d184..01a36d381 100644 --- a/surfsense_backend/app/agents/new_chat/podcast.py +++ b/surfsense_backend/app/agents/new_chat/tools/podcast.py @@ -1,5 +1,5 @@ """ -Podcast generation tool for the new chat agent. +Podcast generation tool for the SurfSense agent. This module provides a factory function for creating the generate_podcast tool that submits a Celery task for background podcast generation. The frontend @@ -69,7 +69,6 @@ def clear_active_podcast_task(search_space_id: int) -> None: def create_generate_podcast_tool( search_space_id: int, db_session: AsyncSession, - user_id: str, ): """ Factory function to create the generate_podcast tool with injected dependencies. @@ -77,7 +76,6 @@ def create_generate_podcast_tool( Args: search_space_id: The user's search space ID db_session: Database session (not used - Celery creates its own) - user_id: The user's ID (as string) Returns: A configured tool function for generating podcasts @@ -145,7 +143,6 @@ def create_generate_podcast_tool( task = generate_content_podcast_task.delay( source_content=source_content, search_space_id=search_space_id, - user_id=str(user_id), podcast_title=podcast_title, user_prompt=user_prompt, ) @@ -174,3 +171,4 @@ def create_generate_podcast_tool( } return generate_podcast + diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py new file mode 100644 index 000000000..6c6469f33 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -0,0 +1,231 @@ +""" +Tools registry for SurfSense deep agent. + +This module provides a registry pattern for managing tools in the SurfSense agent. +It makes it easy for OSS contributors to add new tools by: +1. Creating a tool factory function in a new file in this directory +2. Registering the tool in the BUILTIN_TOOLS list below + +Example of adding a new tool: +------------------------------ +1. Create your tool file (e.g., `tools/my_tool.py`): + + from langchain_core.tools import tool + from sqlalchemy.ext.asyncio import AsyncSession + + def create_my_tool(search_space_id: int, db_session: AsyncSession): + @tool + async def my_tool(param: str) -> dict: + '''My tool description.''' + # Your implementation + return {"result": "success"} + return my_tool + +2. Import and register in this file: + + from .my_tool import create_my_tool + + # Add to BUILTIN_TOOLS list: + ToolDefinition( + name="my_tool", + description="Description of what your tool does", + factory=lambda deps: create_my_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + ), + requires=["search_space_id", "db_session"], + ), +""" + +from dataclasses import dataclass, field +from typing import Any, Callable + +from langchain_core.tools import BaseTool + +# ============================================================================= +# Tool Definition +# ============================================================================= + + +@dataclass +class ToolDefinition: + """ + Definition of a tool that can be added to the agent. + + Attributes: + name: Unique identifier for the tool + description: Human-readable description of what the tool does + factory: Callable that creates the tool. Receives a dict of dependencies. + requires: List of dependency names this tool needs (e.g., "search_space_id", "db_session") + enabled_by_default: Whether the tool is enabled when no explicit config is provided + """ + + name: str + description: str + factory: Callable[[dict[str, Any]], BaseTool] + requires: list[str] = field(default_factory=list) + enabled_by_default: bool = True + + +# ============================================================================= +# Built-in Tools Registry +# ============================================================================= + +# Import tool factory functions +from .display_image import create_display_image_tool +from .knowledge_base import create_search_knowledge_base_tool +from .link_preview import create_link_preview_tool +from .podcast import create_generate_podcast_tool +from .scrape_webpage import create_scrape_webpage_tool + +# Registry of all built-in tools +# Contributors: Add your new tools here! +BUILTIN_TOOLS: list[ToolDefinition] = [ + # Core tool - searches the user's knowledge base + ToolDefinition( + name="search_knowledge_base", + description="Search the user's personal knowledge base for relevant information", + factory=lambda deps: create_search_knowledge_base_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + connector_service=deps["connector_service"], + ), + requires=["search_space_id", "db_session", "connector_service"], + ), + # Podcast generation tool + ToolDefinition( + name="generate_podcast", + description="Generate an audio podcast from provided content", + factory=lambda deps: create_generate_podcast_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + ), + requires=["search_space_id", "db_session"], + ), + # Link preview tool - fetches Open Graph metadata for URLs + ToolDefinition( + name="link_preview", + description="Fetch metadata for a URL to display a rich preview card", + factory=lambda deps: create_link_preview_tool(), + requires=[], + ), + # Display image tool - shows images in the chat + ToolDefinition( + name="display_image", + description="Display an image in the chat with metadata", + factory=lambda deps: create_display_image_tool(), + requires=[], + ), + # Web scraping tool - extracts content from webpages + ToolDefinition( + name="scrape_webpage", + description="Scrape and extract the main content from a webpage", + factory=lambda deps: create_scrape_webpage_tool( + firecrawl_api_key=deps.get("firecrawl_api_key"), + ), + requires=[], # firecrawl_api_key is optional + ), + # ========================================================================= + # ADD YOUR CUSTOM TOOLS BELOW + # ========================================================================= + # Example: + # ToolDefinition( + # name="my_custom_tool", + # description="What my tool does", + # factory=lambda deps: create_my_custom_tool(...), + # requires=["search_space_id"], + # ), +] + + +# ============================================================================= +# Registry Functions +# ============================================================================= + + +def get_tool_by_name(name: str) -> ToolDefinition | None: + """Get a tool definition by its name.""" + for tool_def in BUILTIN_TOOLS: + if tool_def.name == name: + return tool_def + return None + + +def get_all_tool_names() -> list[str]: + """Get names of all registered tools.""" + return [tool_def.name for tool_def in BUILTIN_TOOLS] + + +def get_default_enabled_tools() -> list[str]: + """Get names of tools that are enabled by default.""" + return [tool_def.name for tool_def in BUILTIN_TOOLS if tool_def.enabled_by_default] + + +def build_tools( + dependencies: dict[str, Any], + enabled_tools: list[str] | None = None, + disabled_tools: list[str] | None = None, + additional_tools: list[BaseTool] | None = None, +) -> list[BaseTool]: + """ + Build the list of tools for the agent. + + Args: + dependencies: Dict containing all possible dependencies: + - search_space_id: The search space ID + - db_session: Database session + - connector_service: Connector service instance + - firecrawl_api_key: Optional Firecrawl API key + enabled_tools: Explicit list of tool names to enable. If None, uses defaults. + disabled_tools: List of tool names to disable (applied after enabled_tools). + additional_tools: Extra tools to add (e.g., custom tools not in registry). + + Returns: + List of configured tool instances ready for the agent. + + Example: + # Use all default tools + tools = build_tools(deps) + + # Use only specific tools + tools = build_tools(deps, enabled_tools=["search_knowledge_base", "link_preview"]) + + # Use defaults but disable podcast + tools = build_tools(deps, disabled_tools=["generate_podcast"]) + + # Add custom tools + tools = build_tools(deps, additional_tools=[my_custom_tool]) + """ + # Determine which tools to enable + if enabled_tools is not None: + tool_names_to_use = set(enabled_tools) + else: + tool_names_to_use = set(get_default_enabled_tools()) + + # Apply disabled list + if disabled_tools: + tool_names_to_use -= set(disabled_tools) + + # Build the tools + tools: list[BaseTool] = [] + for tool_def in BUILTIN_TOOLS: + if tool_def.name not in tool_names_to_use: + continue + + # Check that all required dependencies are provided + missing_deps = [dep for dep in tool_def.requires if dep not in dependencies] + if missing_deps: + raise ValueError( + f"Tool '{tool_def.name}' requires dependencies: {missing_deps}" + ) + + # Create the tool + tool = tool_def.factory(dependencies) + tools.append(tool) + + # Add any additional custom tools + if additional_tools: + tools.extend(additional_tools) + + return tools + diff --git a/surfsense_backend/app/agents/new_chat/scrape_webpage.py b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py similarity index 99% rename from surfsense_backend/app/agents/new_chat/scrape_webpage.py rename to surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py index 40a9c917f..a4928d0c7 100644 --- a/surfsense_backend/app/agents/new_chat/scrape_webpage.py +++ b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py @@ -1,5 +1,5 @@ """ -Web scraping tool for the new chat agent. +Web scraping tool for the SurfSense agent. This module provides a tool for scraping and extracting content from webpages using the existing WebCrawlerConnector. The scraped content can be used by @@ -37,23 +37,23 @@ def generate_scrape_id(url: str) -> str: def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]: """ Truncate content to a maximum length. - + Returns: Tuple of (truncated_content, was_truncated) """ if len(content) <= max_length: return content, False - + # Try to truncate at a sentence boundary truncated = content[:max_length] last_period = truncated.rfind(".") last_newline = truncated.rfind("\n\n") - + # Use the later of the two boundaries, or just truncate boundary = max(last_period, last_newline) if boundary > max_length * 0.8: # Only use boundary if it's not too far back truncated = content[: boundary + 1] - + return truncated + "\n\n[Content truncated...]", True diff --git a/surfsense_backend/app/agents/podcaster/configuration.py b/surfsense_backend/app/agents/podcaster/configuration.py index c7433dadc..6a903f9df 100644 --- a/surfsense_backend/app/agents/podcaster/configuration.py +++ b/surfsense_backend/app/agents/podcaster/configuration.py @@ -16,7 +16,6 @@ class Configuration: # create assistants (https://langchain-ai.github.io/langgraph/cloud/how-tos/configuration_cloud/) # and when you invoke the graph podcast_title: str - user_id: str search_space_id: int user_prompt: str | None = None diff --git a/surfsense_backend/app/agents/podcaster/nodes.py b/surfsense_backend/app/agents/podcaster/nodes.py index 31a687763..1353d2c66 100644 --- a/surfsense_backend/app/agents/podcaster/nodes.py +++ b/surfsense_backend/app/agents/podcaster/nodes.py @@ -12,7 +12,7 @@ from litellm import aspeech from app.config import config as app_config from app.services.kokoro_tts_service import get_kokoro_tts_service -from app.services.llm_service import get_user_long_context_llm +from app.services.llm_service import get_long_context_llm from .configuration import Configuration from .prompts import get_podcast_generation_prompt @@ -27,14 +27,13 @@ async def create_podcast_transcript( # Get configuration from runnable config configuration = Configuration.from_runnable_config(config) - user_id = configuration.user_id search_space_id = configuration.search_space_id user_prompt = configuration.user_prompt - # Get user's long context LLM - llm = await get_user_long_context_llm(state.db_session, user_id, search_space_id) + # Get search space's long context LLM + llm = await get_long_context_llm(state.db_session, search_space_id) if not llm: - error_message = f"No long context LLM configured for user {user_id} in search space {search_space_id}" + error_message = f"No long context LLM configured for search space {search_space_id}" print(error_message) raise RuntimeError(error_message) diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index cc9c94eea..6cccdaa5b 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -685,16 +685,13 @@ async def handle_new_chat( ) search_space = search_space_result.scalars().first() - # Determine LLM config ID (use search space preference or default) - llm_config_id = -1 # Default to first global config - if search_space and search_space.fast_llm_id: - llm_config_id = search_space.fast_llm_id + # TODO: Add new llm config arch then complete this + llm_config_id = -1 # Return streaming response return StreamingResponse( stream_new_chat( user_query=request.user_query, - user_id=str(user.id), search_space_id=request.search_space_id, chat_id=request.chat_id, session=session, diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py index 40fe21f0d..34b9b827c 100644 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py @@ -65,7 +65,6 @@ def generate_content_podcast_task( self, source_content: str, search_space_id: int, - user_id: str, podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, ) -> dict: @@ -77,7 +76,6 @@ def generate_content_podcast_task( Args: source_content: The text content to convert into a podcast search_space_id: ID of the search space - user_id: ID of the user (as string) podcast_title: Title for the podcast user_prompt: Optional instructions for podcast style/tone @@ -92,7 +90,6 @@ def generate_content_podcast_task( _generate_content_podcast( source_content, search_space_id, - user_id, podcast_title, user_prompt, ) @@ -112,7 +109,6 @@ def generate_content_podcast_task( async def _generate_content_podcast( source_content: str, search_space_id: int, - user_id: str, podcast_title: str = "SurfSense Podcast", user_prompt: str | None = None, ) -> dict: @@ -123,7 +119,6 @@ async def _generate_content_podcast( graph_config = { "configurable": { "podcast_title": podcast_title, - "user_id": str(user_id), "search_space_id": search_space_id, "user_prompt": user_prompt, } diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index a201ece22..de318a7d5 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -7,8 +7,6 @@ Data Stream Protocol (SSE format). import json from collections.abc import AsyncGenerator -from uuid import UUID - from langchain_core.messages import HumanMessage from sqlalchemy.ext.asyncio import AsyncSession @@ -42,7 +40,6 @@ def format_attachments_as_context(attachments: list[ChatAttachment]) -> str: async def stream_new_chat( user_query: str, - user_id: str | UUID, search_space_id: int, chat_id: int, session: AsyncSession, @@ -59,7 +56,6 @@ async def stream_new_chat( Args: user_query: The user's query - user_id: The user's ID (can be UUID object or string) search_space_id: The search space ID chat_id: The chat ID (used as LangGraph thread_id for memory) session: The database session @@ -71,9 +67,6 @@ async def stream_new_chat( """ streaming_service = VercelStreamingService() - # Convert UUID to string if needed - str(user_id) if isinstance(user_id, UUID) else user_id - # Track the current text block for streaming (defined early for exception handling) current_text_id: str | None = None @@ -107,8 +100,6 @@ async def stream_new_chat( db_session=session, connector_service=connector_service, checkpointer=checkpointer, - user_id=str(user_id), - enable_podcast=True, ) # Build input with message history from frontend diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index e3ee85dce..103440673 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -28,13 +28,14 @@ import { Sparkles, SquareIcon, } from "lucide-react"; -import Image from "next/image"; import Link from "next/link"; import { type FC, useState, useRef, useCallback, useEffect } from "react"; import { useAtomValue } from "jotai"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { documentTypeCountsAtom } from "@/atoms/documents/document-query.atoms"; import { useSearchSourceConnectors } from "@/hooks/use-search-source-connectors"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; +import { getDocumentTypeLabel } from "@/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentTypeIcon"; import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; import { ComposerAddAttachment, @@ -332,35 +333,9 @@ const ThreadWelcome: FC = () => {
{/* Greeting positioned above the composer - fixed position */}
-

- {/** biome-ignore lint/a11y/noStaticElementInteractions: wrong lint error, this is a workaround to fix the lint error */} -
{ - const rect = e.currentTarget.getBoundingClientRect(); - const x = (e.clientX - rect.left - rect.width / 2) / 3; - const y = (e.clientY - rect.top - rect.height / 2) / 3; - e.currentTarget.style.setProperty("--mag-x", `${x}px`); - e.currentTarget.style.setProperty("--mag-y", `${y}px`); - }} - onMouseLeave={(e) => { - e.currentTarget.style.setProperty("--mag-x", "0px"); - e.currentTarget.style.setProperty("--mag-y", "0px"); - }} - > - SurfSense -
- {getTimeBasedGreeting(user?.email)} -

+

+ {getTimeBasedGreeting(user?.email)} +

{/* Composer - top edge fixed, expands downward only */}
@@ -390,11 +365,21 @@ const Composer: FC = () => { const ConnectorIndicator: FC = () => { const searchSpaceId = useAtomValue(activeSearchSpaceIdAtom); - const { connectors, isLoading } = useSearchSourceConnectors(false, searchSpaceId ? Number(searchSpaceId) : undefined); + const { connectors, isLoading: connectorsLoading } = useSearchSourceConnectors(false, searchSpaceId ? Number(searchSpaceId) : undefined); + const { data: documentTypeCounts, isLoading: documentTypesLoading } = useAtomValue(documentTypeCountsAtom); const [isOpen, setIsOpen] = useState(false); const closeTimeoutRef = useRef(null); + const isLoading = connectorsLoading || documentTypesLoading; + + // Get document types that have documents in the search space + const activeDocumentTypes = documentTypeCounts + ? Object.entries(documentTypeCounts).filter(([_, count]) => count > 0) + : []; + const hasConnectors = connectors.length > 0; + const hasSources = hasConnectors || activeDocumentTypes.length > 0; + const totalSourceCount = connectors.length + activeDocumentTypes.length; const handleMouseEnter = useCallback(() => { // Clear any pending close timeout @@ -420,21 +405,32 @@ const ConnectorIndicator: FC = () => { @@ -445,20 +441,31 @@ const ConnectorIndicator: FC = () => { onMouseEnter={handleMouseEnter} onMouseLeave={handleMouseLeave} > - {hasConnectors ? ( + {hasSources ? (

Connected Sources

- {connectors.length} + {totalSourceCount}
+ {/* Document types from the search space */} + {activeDocumentTypes.map(([docType, count]) => ( +
+ {getConnectorIcon(docType, "size-3.5")} + {getDocumentTypeLabel(docType)} +
+ ))} + {/* Search source connectors */} {connectors.map((connector) => (
{getConnectorIcon(connector.connector_type, "size-3.5")} @@ -479,9 +486,9 @@ const ConnectorIndicator: FC = () => {
) : (
-

No connectors yet

+

No sources yet

- Connect your first data source to enhance search results. + Add documents or connect data sources to enhance search results.