diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index af0d6bdc5..f3d988e5b 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -303,9 +303,11 @@ async def create_surfsense_deep_agent( len(tools), ) - # Build system prompt based on agent_config + # Build system prompt based on agent_config, scoped to the tools actually enabled _t0 = time.perf_counter() _sandbox_enabled = sandbox_backend is not None + _enabled_tool_names = {t.name for t in tools} + _user_disabled_tool_names = set(disabled_tools) if disabled_tools else set() if agent_config is not None: system_prompt = build_configurable_system_prompt( custom_system_instructions=agent_config.system_instructions, @@ -313,11 +315,15 @@ async def create_surfsense_deep_agent( citations_enabled=agent_config.citations_enabled, thread_visibility=thread_visibility, sandbox_enabled=_sandbox_enabled, + enabled_tool_names=_enabled_tool_names, + disabled_tool_names=_user_disabled_tool_names, ) else: system_prompt = build_surfsense_system_prompt( thread_visibility=thread_visibility, sandbox_enabled=_sandbox_enabled, + enabled_tool_names=_enabled_tool_names, + disabled_tool_names=_user_disabled_tool_names, ) _perf_log.info( "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0 diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index e9a5af341..b042f75c3 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -58,8 +58,12 @@ def _get_system_instructions( return SURFSENSE_SYSTEM_INSTRUCTIONS.format(resolved_today=resolved_today) -# Tools 0-7 (common to both private and shared prompts) -_TOOLS_INSTRUCTIONS_COMMON = """ +# ============================================================================= +# Per-tool prompt instructions keyed by registry tool name. +# Only tools present in the enabled set will be included in the system prompt. +# ============================================================================= + +_TOOLS_PREAMBLE = """ You have access to the following tools: @@ -72,14 +76,24 @@ Only fall back to your own general knowledge if the search returns NO relevant r Do NOT skip the search and answer directly — the user's knowledge base may contain personalized, up-to-date, or domain-specific information that is more relevant than your general training data. -0. search_surfsense_docs: Search the official SurfSense documentation. +IMPORTANT: You can ONLY use the tools listed below. If a capability is not listed here, you do NOT have it. +Do NOT claim you can do something if the corresponding tool is not listed. + +""" + +_TOOL_INSTRUCTIONS: dict[str, str] = {} + +_TOOL_INSTRUCTIONS["search_surfsense_docs"] = """ +- search_surfsense_docs: Search the official SurfSense documentation. - Use this tool when the user asks anything about SurfSense itself (the application they are using). - Args: - query: The search query about SurfSense - top_k: Number of documentation chunks to retrieve (default: 10) - Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123]) +""" -1. search_knowledge_base: Search the user's personal knowledge base for relevant information. +_TOOL_INSTRUCTIONS["search_knowledge_base"] = """ +- search_knowledge_base: Search the user's personal knowledge base for relevant information. - DEFAULT ACTION: For any user question or ambiguous query, ALWAYS call this tool first to check for relevant context before answering from general knowledge. When in doubt, search. - IMPORTANT: When searching for information (meetings, schedules, notes, tasks, etc.), ALWAYS search broadly @@ -105,8 +119,10 @@ up-to-date, or domain-specific information that is more relevant than your gener - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") - connectors_to_search: Optional list of connector enums to search. If omitted, searches all. - Returns: Formatted string with relevant documents and their content +""" -2. generate_podcast: Generate an audio podcast from provided content. +_TOOL_INSTRUCTIONS["generate_podcast"] = """ +- generate_podcast: Generate an audio podcast from provided content. - Use this when the user asks to create, generate, or make a podcast. - Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast" - Args: @@ -120,8 +136,10 @@ up-to-date, or domain-specific information that is more relevant than your gener - Returns: A task_id for tracking. The podcast will be generated in the background. - IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating". - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes). +""" -3. generate_report: Generate or revise a structured Markdown report artifact. +_TOOL_INSTRUCTIONS["generate_report"] = """ +- generate_report: Generate or revise a structured Markdown report artifact. - WHEN TO CALL THIS TOOL — the message must contain a creation or modification VERB directed at producing a deliverable: * Creation verbs: write, create, generate, draft, produce, summarize into, turn into, make * Modification verbs: revise, update, expand, add (a section), rewrite, make (it shorter/longer/formal) @@ -159,8 +177,10 @@ up-to-date, or domain-specific information that is more relevant than your gener * When revising an existing report (parent_report_id set) and the conversation has relevant context → use source_strategy="conversation". The revision will use the previous report content plus your source_content. * NEVER call search_knowledge_base and then pass its results to generate_report. The tool handles KB search internally. - AFTER CALLING THIS TOOL: Do NOT repeat, summarize, or reproduce the report content in the chat. The report is already displayed as an interactive card that the user can open, read, copy, and export. Simply confirm that the report was generated (e.g., "I've generated your report on [topic]. You can view the Markdown report now, and export it in various formats from the card."). NEVER write out the report text in the chat. +""" -4. link_preview: Fetch metadata for a URL to display a rich preview card. +_TOOL_INSTRUCTIONS["link_preview"] = """ +- link_preview: Fetch metadata for a URL to display a rich preview card. - IMPORTANT: Use this tool WHENEVER the user shares or mentions a URL/link in their message. - This fetches the page's Open Graph metadata (title, description, thumbnail) to show a preview card. - NOTE: This tool only fetches metadata, NOT the full page content. It cannot read the article text. @@ -172,8 +192,10 @@ up-to-date, or domain-specific information that is more relevant than your gener - url: The URL to fetch metadata for (must be a valid HTTP/HTTPS URL) - Returns: A rich preview card with title, description, thumbnail, and domain - The preview card will automatically be displayed in the chat. +""" -5. display_image: Display an image in the chat with metadata. +_TOOL_INSTRUCTIONS["display_image"] = """ +- display_image: Display an image in the chat with metadata. - Use this tool ONLY when you have a valid public HTTP/HTTPS image URL to show. - This displays the image with an optional title, description, and source attribution. - Valid use cases: @@ -197,8 +219,10 @@ up-to-date, or domain-specific information that is more relevant than your gener - description: Optional description providing context about the image - Returns: An image card with the image, title, and description - The image will automatically be displayed in the chat. +""" -6. generate_image: Generate images from text descriptions using AI image models. +_TOOL_INSTRUCTIONS["generate_image"] = """ +- generate_image: Generate images from text descriptions using AI image models. - Use this when the user asks you to create, generate, draw, design, or make an image. - Trigger phrases: "generate an image of", "create a picture of", "draw me", "make an image", "design a logo", "create artwork" - Args: @@ -211,8 +235,10 @@ up-to-date, or domain-specific information that is more relevant than your gener - IMPORTANT: Write a detailed, descriptive prompt for best results. Don't just pass the user's words verbatim - expand and improve the prompt with specific details about style, lighting, composition, and mood. - If the user's request is vague (e.g., "make me an image of a cat"), enhance the prompt with artistic details. +""" -7. scrape_webpage: Scrape and extract the main content from a webpage. +_TOOL_INSTRUCTIONS["scrape_webpage"] = """ +- scrape_webpage: Scrape and extract the main content from a webpage. - Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage. - IMPORTANT: This is different from link_preview: * link_preview: Only fetches metadata (title, description, thumbnail) for display @@ -243,12 +269,14 @@ up-to-date, or domain-specific information that is more relevant than your gener * This makes your response more visual and engaging. * Prioritize showing: diagrams, charts, infographics, key illustrations, or images that help explain the content. * Don't show every image - just the most relevant 1-3 images that enhance understanding. - """ -# Private (user) memory: tools 8-9 + memory-specific examples -_TOOLS_INSTRUCTIONS_MEMORY_PRIVATE = """ -8. save_memory: Save facts, preferences, or context for personalized responses. +# Memory tool instructions have private and shared variants. +# We store them keyed as "save_memory" / "recall_memory" with sub-keys. +_MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { + "save_memory": { + "private": """ +- save_memory: Save facts, preferences, or context for personalized responses. - Use this when the user explicitly or implicitly shares information worth remembering. - Trigger scenarios: * User says "remember this", "keep this in mind", "note that", or similar @@ -270,8 +298,27 @@ _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE = """ - Returns: Confirmation of saved memory - IMPORTANT: Only save information that would be genuinely useful for future conversations. Don't save trivial or temporary information. - -9. recall_memory: Retrieve relevant memories about the user for personalized responses. +""", + "shared": """ +- save_memory: Save a fact, preference, or context to the team's shared memory for future reference. + - Use this when the user or a team member says "remember this", "keep this in mind", or similar in this shared chat. + - Use when the team agrees on something to remember (e.g., decisions, conventions). + - Someone shares a preference or fact that should be visible to the whole team. + - The saved information will be available in future shared conversations in this space. + - Args: + - content: The fact/preference/context to remember. Phrase it clearly, e.g. "API keys are stored in Vault", "The team prefers weekly demos on Fridays" + - category: Type of memory. One of: + * "preference": Team or workspace preferences + * "fact": Facts the team agreed on (e.g., processes, locations) + * "instruction": Standing instructions for the team + * "context": Current context (e.g., ongoing projects, goals) + - Returns: Confirmation of saved memory; returned context may include who added it (added_by). + - IMPORTANT: Only save information that would be genuinely useful for future team conversations in this space. +""", + }, + "recall_memory": { + "private": """ +- recall_memory: Retrieve relevant memories about the user for personalized responses. - Use this to access stored information about the user. - Trigger scenarios: * You need user context to give a better, more personalized answer @@ -286,45 +333,9 @@ _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE = """ - Returns: Relevant memories formatted as context - IMPORTANT: Use the recalled memories naturally in your response without explicitly stating "Based on your memory..." - integrate the context seamlessly. - - -- User: "Remember that I prefer TypeScript over JavaScript" - - Call: `save_memory(content="User prefers TypeScript over JavaScript for development", category="preference")` - -- User: "I'm a data scientist working on ML pipelines" - - Call: `save_memory(content="User is a data scientist working on ML pipelines", category="fact")` - -- User: "Always give me code examples in Python" - - Call: `save_memory(content="User wants code examples to be written in Python", category="instruction")` - -- User: "What programming language should I use for this project?" - - First recall: `recall_memory(query="programming language preferences")` - - Then provide a personalized recommendation based on their preferences - -- User: "What do you know about me?" - - Call: `recall_memory(top_k=10)` - - Then summarize the stored memories - -""" - -# Shared (team) memory: tools 7-8 + team memory examples -_TOOLS_INSTRUCTIONS_MEMORY_SHARED = """ -8. save_memory: Save a fact, preference, or context to the team's shared memory for future reference. - - Use this when the user or a team member says "remember this", "keep this in mind", or similar in this shared chat. - - Use when the team agrees on something to remember (e.g., decisions, conventions). - - Someone shares a preference or fact that should be visible to the whole team. - - The saved information will be available in future shared conversations in this space. - - Args: - - content: The fact/preference/context to remember. Phrase it clearly, e.g. "API keys are stored in Vault", "The team prefers weekly demos on Fridays" - - category: Type of memory. One of: - * "preference": Team or workspace preferences - * "fact": Facts the team agreed on (e.g., processes, locations) - * "instruction": Standing instructions for the team - * "context": Current context (e.g., ongoing projects, goals) - - Returns: Confirmation of saved memory; returned context may include who added it (added_by). - - IMPORTANT: Only save information that would be genuinely useful for future team conversations in this space. - -9. recall_memory: Recall relevant team memories for this space to provide contextual responses. +""", + "shared": """ +- recall_memory: Recall relevant team memories for this space to provide contextual responses. - Use when you need team context to answer (e.g., "where do we store X?", "what did we decide about Y?"). - Use when someone asks about something the team agreed to remember. - Use when team preferences or conventions would improve the response. @@ -333,222 +344,220 @@ _TOOLS_INSTRUCTIONS_MEMORY_SHARED = """ - category: Optional filter by category ("preference", "fact", "instruction", "context") - top_k: Number of memories to retrieve (default: 5, max: 20) - Returns: Relevant team memories and formatted context (may include added_by). Integrate naturally without saying "Based on team memory...". - - +""", + }, +} + +_MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = { + "save_memory": { + "private": """ +- User: "Remember that I prefer TypeScript over JavaScript" + - Call: `save_memory(content="User prefers TypeScript over JavaScript for development", category="preference")` +- User: "I'm a data scientist working on ML pipelines" + - Call: `save_memory(content="User is a data scientist working on ML pipelines", category="fact")` +- User: "Always give me code examples in Python" + - Call: `save_memory(content="User wants code examples to be written in Python", category="instruction")` +""", + "shared": """ - User: "Remember that API keys are stored in Vault" - Call: `save_memory(content="API keys are stored in Vault", category="fact")` - - User: "Let's remember that the team prefers weekly demos on Fridays" - Call: `save_memory(content="The team prefers weekly demos on Fridays", category="preference")` - +""", + }, + "recall_memory": { + "private": """ +- User: "What programming language should I use for this project?" + - First recall: `recall_memory(query="programming language preferences")` + - Then provide a personalized recommendation based on their preferences +- User: "What do you know about me?" + - Call: `recall_memory(top_k=10)` + - Then summarize the stored memories +""", + "shared": """ - User: "What did we decide about the release date?" - First recall: `recall_memory(query="release date decision")` - Then answer based on the team memories - - User: "Where do we document onboarding?" - Call: `recall_memory(query="onboarding documentation")` - Then answer using the recalled team context +""", + }, +} -- User: "What have we agreed to remember about our deployment process?" - - Call: `recall_memory(query="deployment process", top_k=10)` - - Then summarize the relevant team memories +# Per-tool examples keyed by tool name. Only examples for enabled tools are included. +_TOOL_EXAMPLES: dict[str, str] = {} -""" - -# Examples shared by both private and shared prompts (knowledge base, docs, podcast, links, images, etc.) -_TOOLS_INSTRUCTIONS_EXAMPLES_COMMON = """ +_TOOL_EXAMPLES["search_knowledge_base"] = """ - User: "What time is the team meeting today?" - Call: `search_knowledge_base(query="team meeting time today")` (searches ALL sources - calendar, notes, Obsidian, etc.) - DO NOT limit to just calendar - the info might be in notes! - - User: "When is my gym session?" - Call: `search_knowledge_base(query="gym session time schedule")` (searches ALL sources) - -- User: "How do I install SurfSense?" - - Call: `search_surfsense_docs(query="installation setup")` - -- User: "What connectors does SurfSense support?" - - Call: `search_surfsense_docs(query="available connectors integrations")` - -- User: "How do I set up the Notion connector?" - - Call: `search_surfsense_docs(query="Notion connector setup configuration")` - -- User: "How do I use Docker to run SurfSense?" - - Call: `search_surfsense_docs(query="Docker installation setup")` - - User: "Fetch all my notes and what's in them?" - Call: `search_knowledge_base(query="*", top_k=50, connectors_to_search=["NOTE"])` - - User: "What did I discuss on Slack last week about the React migration?" - Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")` - - User: "Check my Obsidian notes for meeting notes" - Call: `search_knowledge_base(query="meeting notes", connectors_to_search=["OBSIDIAN_CONNECTOR"])` - -- User: "What's in my Obsidian vault about project ideas?" - - Call: `search_knowledge_base(query="project ideas", connectors_to_search=["OBSIDIAN_CONNECTOR"])` - - User: "search me current usd to inr rate" - Call: `search_knowledge_base(query="current USD to INR exchange rate", connectors_to_search=["LINKUP_API", "TAVILY_API", "SEARXNG_API", "BAIDU_SEARCH_API"])` - Then answer using the returned live web results with citations. +""" -- User: "cant you search using linkup?" - - Call: `search_knowledge_base(query="", connectors_to_search=["LINKUP_API"])` - - Then answer from retrieved results (or clearly state that Linkup returned no data). +_TOOL_EXAMPLES["search_surfsense_docs"] = """ +- User: "How do I install SurfSense?" + - Call: `search_surfsense_docs(query="installation setup")` +- User: "What connectors does SurfSense support?" + - Call: `search_surfsense_docs(query="available connectors integrations")` +- User: "How do I set up the Notion connector?" + - Call: `search_surfsense_docs(query="Notion connector setup configuration")` +- User: "How do I use Docker to run SurfSense?" + - Call: `search_surfsense_docs(query="Docker installation setup")` +""" +_TOOL_EXAMPLES["generate_podcast"] = """ - User: "Give me a podcast about AI trends based on what we discussed" - First search for relevant content, then call: `generate_podcast(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", podcast_title="AI Trends Podcast")` - - User: "Create a podcast summary of this conversation" - Call: `generate_podcast(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")` - - User: "Make a podcast about quantum computing" - First search: `search_knowledge_base(query="quantum computing")` - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` +""" +_TOOL_EXAMPLES["generate_report"] = """ - User: "Generate a report about AI trends" - Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")` - WHY: Has creation verb "generate" → call the tool. No prior discussion → use kb_search. - - User: "Write a research report from this conversation" - - Call: `generate_report(topic="Research Report", source_strategy="conversation", source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", report_style="deep_research")` + - Call: `generate_report(topic="Research Report", source_strategy="conversation", source_content="Complete conversation summary:\\n\\n...", report_style="deep_research")` - WHY: Has creation verb "write" → call the tool. Conversation has the content → use source_strategy="conversation". - -- User: "Create a brief executive summary about our project progress" - - Call: `generate_report(topic="Project Progress Executive Summary", source_strategy="kb_search", search_queries=["project progress updates", "project milestones completed", "upcoming project deadlines"], report_style="executive_summary", user_instructions="Focus on milestones achieved and upcoming deadlines")` - - WHY: Has creation verb "create" → call the tool. New topic → use kb_search. - -- User: (after extensive Q&A about React performance) "Turn this into a report" - - Call: `generate_report(topic="React Performance Optimization Guide", source_strategy="conversation", source_content="[Thorough summary of all Q&A from this conversation about React performance...]", report_style="detailed")` - - WHY: Has creation verb "turn into" → call the tool. Conversation has the content → use source_strategy="conversation". - - User: (after a report on Climate Change was generated) "Add a section about carbon capture technologies" - Call: `generate_report(topic="Climate Crisis: Causes, Impacts, and Solutions", source_strategy="conversation", source_content="[summary of conversation context if any]", parent_report_id=, user_instructions="Add a new section about carbon capture technologies")` - - WHY: Has modification verb "add" + specific deliverable target → call the tool with parent_report_id. Use source_strategy="conversation" since the report already exists. - + - WHY: Has modification verb "add" + specific deliverable target → call the tool with parent_report_id. - User: (after a report was generated) "What else could we add to have more depth?" - - Do NOT call generate_report. Answer in chat with suggestions, e.g.: "Here are some areas we could expand: 1. ... 2. ... 3. ... Would you like me to add any of these to the report?" - - WHY: No creation/modification verb directed at producing a deliverable. This is a question asking for suggestions. - -- User: (after a report was generated) "Is the conclusion strong enough?" - - Do NOT call generate_report. Answer in chat, e.g.: "The conclusion covers X and Y well, but could be strengthened by adding Z. Want me to revise it?" - - WHY: This is a question/critique, not a modification request. - -- User: (after a report was generated) "What's missing from this report?" - - Do NOT call generate_report. Answer in chat with analysis of gaps. - - WHY: This is a question. The user is asking you to identify gaps, not to fix them yet. + - Do NOT call generate_report. Answer in chat with suggestions. + - WHY: No creation/modification verb directed at producing a deliverable. +""" +_TOOL_EXAMPLES["scrape_webpage"] = """ - User: "Check out https://dev.to/some-article" - Call: `link_preview(url="https://dev.to/some-article")` - Call: `scrape_webpage(url="https://dev.to/some-article")` - - After getting the content, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then provide your analysis, referencing the displayed image - -- User: "What's this blog post about? https://example.com/blog/post" - - Call: `link_preview(url="https://example.com/blog/post")` - - Call: `scrape_webpage(url="https://example.com/blog/post")` - - After getting the content, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then provide your analysis, referencing the displayed image - -- User: "https://github.com/some/repo" - - Call: `link_preview(url="https://github.com/some/repo")` - - Call: `scrape_webpage(url="https://github.com/some/repo")` - - After getting the content, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then provide your analysis, referencing the displayed image - -- User: "Show me this image: https://example.com/image.png" - - Call: `display_image(src="https://example.com/image.png", alt="User shared image")` - -- User uploads an image file and asks: "What is this image about?" - - DO NOT call display_image! The user's uploaded image is already visible in the chat. - - Simply analyze the image content (which you receive as extracted text/description) and respond. - - WRONG: `display_image(src="...", ...)` - This will fail with "Image not available" - - CORRECT: Just provide your analysis directly: "Based on the image you shared, this appears to be..." - -- User uploads a screenshot and asks: "Can you explain what's in this image?" - - DO NOT call display_image! Just analyze and respond directly. - - The user can already see their screenshot - they don't need you to display it again. - + - Then provide your analysis of the content. - User: "Read this article and summarize it for me: https://example.com/blog/ai-trends" - - Call: `link_preview(url="https://example.com/blog/ai-trends")` - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")` - - After getting the content, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then provide a summary based on the scraped text - -- User: "What does this page say about machine learning? https://docs.example.com/ml-guide" - - Call: `link_preview(url="https://docs.example.com/ml-guide")` - - Call: `scrape_webpage(url="https://docs.example.com/ml-guide")` - - After getting the content, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then answer the question using the extracted content - -- User: "Summarize this blog post: https://medium.com/some-article" - - Call: `link_preview(url="https://medium.com/some-article")` - - Call: `scrape_webpage(url="https://medium.com/some-article")` - - After getting the content, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then provide a comprehensive summary of the article content - -- User: "Read this tutorial and explain it: https://example.com/ml-tutorial" - - First: `scrape_webpage(url="https://example.com/ml-tutorial")` - - Then, if the content contains useful diagrams/images like `![Neural Network Diagram](https://example.com/nn-diagram.png)`: - - Call: `display_image(src="https://example.com/nn-diagram.png", alt="Neural Network Diagram", title="Neural Network Architecture")` - - Then provide your explanation, referencing the displayed image - -- User: (after discussing https://example.com/stats in the conversation) "Can you get the live data from that page?" + - Then provide a summary based on the scraped text. +- User: (after discussing https://example.com/stats) "Can you get the live data from that page?" - Call: `scrape_webpage(url="https://example.com/stats")` - IMPORTANT: Always attempt scraping first. Never refuse before trying the tool. - - Then present the extracted data to the user. +""" -- User: "Pull the table from https://example.com/leaderboard" - - Call: `scrape_webpage(url="https://example.com/leaderboard")` - - Then parse and present the table data from the scraped content. +_TOOL_EXAMPLES["display_image"] = """ +- User: "Show me this image: https://example.com/image.png" + - Call: `display_image(src="https://example.com/image.png", alt="User shared image")` +- User uploads an image file and asks: "What is this image about?" + - DO NOT call display_image! The user's uploaded image is already visible in the chat. + - Simply analyze the image content and respond directly. +""" +_TOOL_EXAMPLES["generate_image"] = """ - User: "Generate an image of a cat" - Step 1: `generate_image(prompt="A fluffy orange tabby cat sitting on a windowsill, bathed in warm golden sunlight, soft bokeh background with green houseplants, photorealistic style, cozy atmosphere")` - Step 2: Use the returned "src" URL to display it: `display_image(src="", alt="A fluffy orange tabby cat on a windowsill", title="Generated Image")` - -- User: "Create a landscape painting of mountains" - - Step 1: `generate_image(prompt="Majestic snow-capped mountain range at sunset, dramatic orange and purple sky, alpine meadow with wildflowers in the foreground, oil painting style with visible brushstrokes, inspired by the Hudson River School art movement")` - - Step 2: `display_image(src="", alt="Mountain landscape painting", title="Generated Image")` - - User: "Draw me a logo for a coffee shop called Bean Dream" - Step 1: `generate_image(prompt="Minimalist modern logo design for a coffee shop called 'Bean Dream', featuring a stylized coffee bean with dream-like swirls of steam, clean vector style, warm brown and cream color palette, white background, professional branding")` - Step 2: `display_image(src="", alt="Bean Dream coffee shop logo", title="Generated Image")` - -- User: "Make a wide banner image for my blog about AI" - - Step 1: `generate_image(prompt="Wide banner illustration for an AI technology blog, featuring abstract neural network patterns, glowing blue and purple connections, modern futuristic aesthetic, digital art style, clean and professional")` - - Step 2: `display_image(src="", alt="AI blog banner", title="Generated Image")` - """ -# Reassemble so existing callers see no change (same full prompt) -SURFSENSE_TOOLS_INSTRUCTIONS = ( - _TOOLS_INSTRUCTIONS_COMMON - + _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE - + _TOOLS_INSTRUCTIONS_EXAMPLES_COMMON -) +# All tool names that have prompt instructions (order matters for prompt readability) +_ALL_TOOL_NAMES_ORDERED = [ + "search_surfsense_docs", + "search_knowledge_base", + "generate_podcast", + "generate_report", + "link_preview", + "display_image", + "generate_image", + "scrape_webpage", + "save_memory", + "recall_memory", +] -def _get_tools_instructions(thread_visibility: ChatVisibility | None = None) -> str: - """Build tools instructions based on thread visibility (private vs shared). +def _format_tool_name(name: str) -> str: + """Convert snake_case tool name to a human-readable label.""" + return name.replace("_", " ").title() - For private chats: use user-focused memory wording and examples. - For shared chats: use team memory wording and examples. + +def _get_tools_instructions( + thread_visibility: ChatVisibility | None = None, + enabled_tool_names: set[str] | None = None, + disabled_tool_names: set[str] | None = None, +) -> str: + """Build tools instructions containing only the enabled tools. + + Args: + thread_visibility: Private vs shared — affects memory tool wording. + enabled_tool_names: Set of tool names that are actually bound to the agent. + When None, all tools are included (backward-compatible default). + disabled_tool_names: Set of tool names that the user explicitly disabled. + When provided, a note is appended telling the model about these tools + so it can inform the user they can re-enable them. """ visibility = thread_visibility or ChatVisibility.PRIVATE - memory_block = ( - _TOOLS_INSTRUCTIONS_MEMORY_SHARED - if visibility == ChatVisibility.SEARCH_SPACE - else _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE + memory_variant = ( + "shared" if visibility == ChatVisibility.SEARCH_SPACE else "private" ) - return ( - _TOOLS_INSTRUCTIONS_COMMON + memory_block + _TOOLS_INSTRUCTIONS_EXAMPLES_COMMON + + parts: list[str] = [_TOOLS_PREAMBLE] + examples: list[str] = [] + + for tool_name in _ALL_TOOL_NAMES_ORDERED: + if enabled_tool_names is not None and tool_name not in enabled_tool_names: + continue + + if tool_name in _TOOL_INSTRUCTIONS: + parts.append(_TOOL_INSTRUCTIONS[tool_name]) + elif tool_name in _MEMORY_TOOL_INSTRUCTIONS: + parts.append(_MEMORY_TOOL_INSTRUCTIONS[tool_name][memory_variant]) + + if tool_name in _TOOL_EXAMPLES: + examples.append(_TOOL_EXAMPLES[tool_name]) + elif tool_name in _MEMORY_TOOL_EXAMPLES: + examples.append(_MEMORY_TOOL_EXAMPLES[tool_name][memory_variant]) + + # Append a note about user-disabled tools so the model can inform the user + known_disabled = ( + disabled_tool_names & set(_ALL_TOOL_NAMES_ORDERED) + if disabled_tool_names + else set() ) + if known_disabled: + disabled_list = ", ".join( + _format_tool_name(n) for n in _ALL_TOOL_NAMES_ORDERED if n in known_disabled + ) + parts.append(f""" +DISABLED TOOLS (by user): +The following tools are available in SurfSense but have been disabled by the user for this session: {disabled_list}. +You do NOT have access to these tools and MUST NOT claim you can use them. +If the user asks about a capability provided by a disabled tool, let them know the relevant tool +is currently disabled and they can re-enable it from the tools menu (wrench icon) in the composer toolbar. +""") + + parts.append("\n\n") + + if examples: + parts.append("") + parts.extend(examples) + parts.append("\n") + + return "".join(parts) + + +# Backward-compatible constant: all tools included (private memory variant) +SURFSENSE_TOOLS_INSTRUCTIONS = _get_tools_instructions() SURFSENSE_CITATION_INSTRUCTIONS = """ @@ -752,13 +761,15 @@ def build_surfsense_system_prompt( today: datetime | None = None, thread_visibility: ChatVisibility | None = None, sandbox_enabled: bool = False, + enabled_tool_names: set[str] | None = None, + disabled_tool_names: set[str] | None = None, ) -> str: """ Build the SurfSense system prompt with default settings. This is a convenience function that builds the prompt with: - Default system instructions - - Tools instructions (always included) + - Tools instructions (only for enabled tools) - Citation instructions enabled - Sandbox execution instructions (when sandbox_enabled=True) @@ -766,6 +777,8 @@ def build_surfsense_system_prompt( today: Optional datetime for today's date (defaults to current UTC date) thread_visibility: Optional; when provided, used for conditional prompt (e.g. private vs shared memory wording). Defaults to private behavior when None. sandbox_enabled: Whether the sandbox backend is active (adds code execution instructions). + enabled_tool_names: Set of tool names actually bound to the agent. When None all tools are included. + disabled_tool_names: Set of tool names the user explicitly disabled. Included as a note so the model can inform the user. Returns: Complete system prompt string @@ -773,7 +786,9 @@ def build_surfsense_system_prompt( visibility = thread_visibility or ChatVisibility.PRIVATE system_instructions = _get_system_instructions(visibility, today) - tools_instructions = _get_tools_instructions(visibility) + tools_instructions = _get_tools_instructions( + visibility, enabled_tool_names, disabled_tool_names + ) citation_instructions = SURFSENSE_CITATION_INSTRUCTIONS sandbox_instructions = SANDBOX_EXECUTION_INSTRUCTIONS if sandbox_enabled else "" return ( @@ -791,13 +806,15 @@ def build_configurable_system_prompt( today: datetime | None = None, thread_visibility: ChatVisibility | None = None, sandbox_enabled: bool = False, + enabled_tool_names: set[str] | None = None, + disabled_tool_names: set[str] | None = None, ) -> str: """ Build a configurable SurfSense system prompt based on NewLLMConfig settings. The prompt is composed of up to four parts: 1. System Instructions - either custom or default SURFSENSE_SYSTEM_INSTRUCTIONS - 2. Tools Instructions - always included (SURFSENSE_TOOLS_INSTRUCTIONS) + 2. Tools Instructions - only for enabled tools, with a note about disabled ones 3. Citation Instructions - either SURFSENSE_CITATION_INSTRUCTIONS or SURFSENSE_NO_CITATION_INSTRUCTIONS 4. Sandbox Execution Instructions - when sandbox_enabled=True @@ -812,6 +829,8 @@ def build_configurable_system_prompt( today: Optional datetime for today's date (defaults to current UTC date) thread_visibility: Optional; when provided, used for conditional prompt (e.g. private vs shared memory wording). Defaults to private behavior when None. sandbox_enabled: Whether the sandbox backend is active (adds code execution instructions). + enabled_tool_names: Set of tool names actually bound to the agent. When None all tools are included. + disabled_tool_names: Set of tool names the user explicitly disabled. Included as a note so the model can inform the user. Returns: Complete system prompt string @@ -829,8 +848,10 @@ def build_configurable_system_prompt( else: system_instructions = "" - # Tools instructions: conditional on thread_visibility (private vs shared memory wording) - tools_instructions = _get_tools_instructions(thread_visibility) + # Tools instructions: only include enabled tools, note disabled ones + tools_instructions = _get_tools_instructions( + thread_visibility, enabled_tool_names, disabled_tool_names + ) # Citation instructions based on toggle citation_instructions = ( diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index 99cb09b38..030cbf239 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -97,6 +97,7 @@ class ToolDefinition: factory: Callable[[dict[str, Any]], BaseTool] requires: list[str] = field(default_factory=list) enabled_by_default: bool = True + hidden: bool = False # ============================================================================= @@ -139,7 +140,7 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ # need to call search_knowledge_base separately before generating. ToolDefinition( name="generate_report", - description="Generate a structured Markdown report from provided content", + description="Generate a structured report from provided content and export it", factory=lambda deps: create_generate_report_tool( search_space_id=deps["search_space_id"], thread_id=deps["thread_id"], @@ -234,7 +235,7 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ requires=["user_id", "search_space_id", "db_session", "thread_visibility"], ), # ========================================================================= - # LINEAR TOOLS - create, update, delete issues + # LINEAR TOOLS - create, update, delete issues (WIP - hidden from UI) # ========================================================================= ToolDefinition( name="create_linear_issue", @@ -245,6 +246,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), ToolDefinition( name="update_linear_issue", @@ -255,6 +258,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), ToolDefinition( name="delete_linear_issue", @@ -265,9 +270,11 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), # ========================================================================= - # NOTION TOOLS - create, update, delete pages + # NOTION TOOLS - create, update, delete pages (WIP - hidden from UI) # ========================================================================= ToolDefinition( name="create_notion_page", @@ -278,6 +285,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), ToolDefinition( name="update_notion_page", @@ -288,6 +297,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), ToolDefinition( name="delete_notion_page", @@ -298,9 +309,11 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), # ========================================================================= - # GOOGLE DRIVE TOOLS - create files, delete files + # GOOGLE DRIVE TOOLS - create files, delete files (WIP - hidden from UI) # ========================================================================= ToolDefinition( name="create_google_drive_file", @@ -311,6 +324,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), ToolDefinition( name="delete_google_drive_file", @@ -321,6 +336,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ user_id=deps["user_id"], ), requires=["db_session", "search_space_id", "user_id"], + enabled_by_default=False, + hidden=True, ), ] @@ -344,7 +361,7 @@ def get_all_tool_names() -> list[str]: def get_default_enabled_tools() -> list[str]: - """Get names of tools that are enabled by default.""" + """Get names of tools that are enabled by default (excludes hidden tools).""" return [tool_def.name for tool_def in BUILTIN_TOOLS if tool_def.enabled_by_default] @@ -393,10 +410,10 @@ def build_tools( if disabled_tools: tool_names_to_use -= set(disabled_tools) - # Build the tools + # Build the tools (skip hidden/WIP tools unconditionally) tools: list[BaseTool] = [] for tool_def in BUILTIN_TOOLS: - if tool_def.name not in tool_names_to_use: + if tool_def.hidden or tool_def.name not in tool_names_to_use: continue # Check that all required dependencies are provided diff --git a/surfsense_backend/app/connectors/composio_gmail_connector.py b/surfsense_backend/app/connectors/composio_gmail_connector.py index 94ee7b14c..2a382f3b8 100644 --- a/surfsense_backend/app/connectors/composio_gmail_connector.py +++ b/surfsense_backend/app/connectors/composio_gmail_connector.py @@ -12,7 +12,6 @@ from typing import Any from bs4 import BeautifulSoup from markdownify import markdownify as md - from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload @@ -139,7 +138,9 @@ class ComposioGmailConnector(ComposioConnector): soup = BeautifulSoup(html, "html.parser") for tag in soup.find_all(["style", "script", "img"]): tag.decompose() - for tag in soup.find_all(["table", "thead", "tbody", "tfoot", "tr", "td", "th"]): + for tag in soup.find_all( + ["table", "thead", "tbody", "tfoot", "tr", "td", "th"] + ): tag.unwrap() return md(str(soup)).strip() diff --git a/surfsense_backend/app/connectors/google_gmail_connector.py b/surfsense_backend/app/connectors/google_gmail_connector.py index 46b825253..e8347ea73 100644 --- a/surfsense_backend/app/connectors/google_gmail_connector.py +++ b/surfsense_backend/app/connectors/google_gmail_connector.py @@ -10,11 +10,10 @@ import logging from typing import Any from bs4 import BeautifulSoup -from markdownify import markdownify as md - from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from googleapiclient.discovery import build +from markdownify import markdownify as md from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm.attributes import flag_modified @@ -356,7 +355,9 @@ class GoogleGmailConnector: soup = BeautifulSoup(html, "html.parser") for tag in soup.find_all(["style", "script", "img"]): tag.decompose() - for tag in soup.find_all(["table", "thead", "tbody", "tfoot", "tr", "td", "th"]): + for tag in soup.find_all( + ["table", "thead", "tbody", "tfoot", "tr", "td", "th"] + ): tag.unwrap() return md(str(soup)).strip() diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 04d1328a6..062b11b3a 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -960,7 +960,10 @@ class Chunk(BaseModel, TimestampMixin): embedding = Column(Vector(config.embedding_model_instance.dimension)) document_id = Column( - Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False, index=True + Integer, + ForeignKey("documents.id", ondelete="CASCADE"), + nullable=False, + index=True, ) document = relationship("Document", back_populates="chunks") diff --git a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py index 0fadfc42f..490aac782 100644 --- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py +++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py @@ -211,7 +211,7 @@ class IndexingPipelineService: chunks = [ Chunk(content=text, embedding=emb) - for text, emb in zip(chunk_texts, chunk_embeddings) + for text, emb in zip(chunk_texts, chunk_embeddings, strict=False) ] perf.info( "[indexing] chunk+embed doc=%d chunks=%d in %.3fs", diff --git a/surfsense_backend/app/routes/new_chat_routes.py b/surfsense_backend/app/routes/new_chat_routes.py index 5c52ce495..10a6951fa 100644 --- a/surfsense_backend/app/routes/new_chat_routes.py +++ b/surfsense_backend/app/routes/new_chat_routes.py @@ -35,6 +35,7 @@ from app.db import ( shielded_async_session, ) from app.schemas.new_chat import ( + AgentToolInfo, NewChatMessageRead, NewChatRequest, NewChatThreadCreate, @@ -1024,6 +1025,32 @@ async def list_messages( ) from None +# ============================================================================= +# Agent Tools Endpoint +# ============================================================================= + + +@router.get("/agent/tools", response_model=list[AgentToolInfo]) +async def list_agent_tools( + _user: User = Depends(current_active_user), +): + """Return the list of built-in agent tools with their metadata. + + Hidden (WIP) tools are excluded from the response. + """ + from app.agents.new_chat.tools.registry import BUILTIN_TOOLS + + return [ + AgentToolInfo( + name=t.name, + description=t.description, + enabled_by_default=t.enabled_by_default, + ) + for t in BUILTIN_TOOLS + if not t.hidden + ] + + # ============================================================================= # Chat Streaming Endpoint # ============================================================================= @@ -1108,6 +1135,7 @@ async def handle_new_chat( needs_history_bootstrap=thread.needs_history_bootstrap, thread_visibility=thread.visibility, current_user_display_name=user.display_name or "A team member", + disabled_tools=request.disabled_tools, ), media_type="text/event-stream", headers={ @@ -1344,6 +1372,7 @@ async def regenerate_response( needs_history_bootstrap=thread.needs_history_bootstrap, thread_visibility=thread.visibility, current_user_display_name=user.display_name or "A team member", + disabled_tools=request.disabled_tools, ): yield chunk streaming_completed = True diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 800eb5629..b241aa2fb 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -18,6 +18,7 @@ Note: OAuth connectors (Gmail, Drive, Slack, etc.) support multiple accounts per Non-OAuth connectors (BookStack, GitHub, etc.) are limited to one per search space. """ +import asyncio import logging from contextlib import suppress from datetime import UTC, datetime, timedelta @@ -52,8 +53,6 @@ from app.schemas import ( SearchSourceConnectorRead, SearchSourceConnectorUpdate, ) -import asyncio - from app.services.composio_service import ComposioService, get_composio_service from app.services.notification_service import NotificationService from app.tasks.connector_indexers import ( @@ -3124,7 +3123,9 @@ async def get_drive_picker_token( detail="Composio connected account not found. Please reconnect.", ) service = get_composio_service() - access_token = await asyncio.to_thread(service.get_access_token, composio_account_id) + access_token = await asyncio.to_thread( + service.get_access_token, composio_account_id + ) return { "access_token": access_token, "client_id": config.GOOGLE_OAUTH_CLIENT_ID, diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py index 916f42024..5d8ae207e 100644 --- a/surfsense_backend/app/schemas/new_chat.py +++ b/surfsense_backend/app/schemas/new_chat.py @@ -172,6 +172,9 @@ class NewChatRequest(BaseModel): mentioned_surfsense_doc_ids: list[int] | None = ( None # Optional SurfSense documentation IDs mentioned with @ in the chat ) + disabled_tools: list[str] | None = ( + None # Optional list of tool names the user has disabled from the UI + ) class RegenerateRequest(BaseModel): @@ -191,6 +194,20 @@ class RegenerateRequest(BaseModel): ) mentioned_document_ids: list[int] | None = None mentioned_surfsense_doc_ids: list[int] | None = None + disabled_tools: list[str] | None = None + + +# ============================================================================= +# Agent Tools Schemas +# ============================================================================= + + +class AgentToolInfo(BaseModel): + """Schema for a single agent tool's public metadata.""" + + name: str + description: str + enabled_by_default: bool class ResumeDecision(BaseModel): diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 3f0cee145..7abd22d4a 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1022,6 +1022,7 @@ async def stream_new_chat( needs_history_bootstrap: bool = False, thread_visibility: ChatVisibility | None = None, current_user_display_name: str | None = None, + disabled_tools: list[str] | None = None, ) -> AsyncGenerator[str, None]: """ Stream chat responses from the new SurfSense deep agent. @@ -1153,6 +1154,7 @@ async def stream_new_chat( firecrawl_api_key=firecrawl_api_key, thread_visibility=visibility, sandbox_backend=sandbox_backend, + disabled_tools=disabled_tools, ) _perf_log.info( "[stream_new_chat] Agent created in %.3fs", time.perf_counter() - _t0 diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index 6a59990f5..0cacdd1d3 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -230,7 +230,7 @@ async def create_document_chunks(content: str) -> list[Chunk]: chunk_embeddings = embed_texts(chunk_texts) return [ Chunk(content=text, embedding=emb) - for text, emb in zip(chunk_texts, chunk_embeddings) + for text, emb in zip(chunk_texts, chunk_embeddings, strict=False) ] diff --git a/surfsense_backend/tests/integration/conftest.py b/surfsense_backend/tests/integration/conftest.py index 4e43ea302..d9d7cacae 100644 --- a/surfsense_backend/tests/integration/conftest.py +++ b/surfsense_backend/tests/integration/conftest.py @@ -130,9 +130,7 @@ def patched_summarize_raises(monkeypatch) -> AsyncMock: @pytest.fixture def patched_embed_texts(monkeypatch) -> MagicMock: - mock = MagicMock( - side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts] - ) + mock = MagicMock(side_effect=lambda texts: [[0.1] * _EMBEDDING_DIM for _ in texts]) monkeypatch.setattr( "app.indexing_pipeline.indexing_pipeline_service.embed_texts", mock, diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index 2a0428209..0f0eaed96 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -12,6 +12,7 @@ import { useParams, useSearchParams } from "next/navigation"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { toast } from "sonner"; import { z } from "zod"; +import { disabledToolsAtom } from "@/atoms/agent-tools/agent-tools.atoms"; import { clearTargetCommentIdAtom, currentThreadAtom, @@ -180,6 +181,9 @@ export default function NewChatPage() { interruptData: Record; } | null>(null); + // Get disabled tools from the tool toggle UI + const disabledTools = useAtomValue(disabledToolsAtom); + // Get mentioned document IDs from the composer (derived from @ mentions + sidebar selections) const mentionedDocumentIds = useAtomValue(mentionedDocumentIdsAtom); const mentionedDocuments = useAtomValue(mentionedDocumentsAtom); @@ -640,6 +644,7 @@ export default function NewChatPage() { mentioned_surfsense_doc_ids: hasSurfsenseDocIds ? mentionedDocumentIds.surfsense_doc_ids : undefined, + disabled_tools: disabledTools.length > 0 ? disabledTools : undefined, }), signal: controller.signal, }); @@ -918,6 +923,7 @@ export default function NewChatPage() { queryClient, currentThread, currentUser, + disabledTools, ] ); @@ -1371,6 +1377,7 @@ export default function NewChatPage() { body: JSON.stringify({ search_space_id: searchSpaceId, user_query: newUserQuery || null, + disabled_tools: disabledTools.length > 0 ? disabledTools : undefined, }), signal: controller.signal, }); @@ -1542,7 +1549,7 @@ export default function NewChatPage() { abortControllerRef.current = null; } }, - [threadId, searchSpaceId, messages, setMessageThinkingSteps] + [threadId, searchSpaceId, messages, setMessageThinkingSteps, disabledTools] ); // Handle editing a message - truncates history and regenerates with new query diff --git a/surfsense_web/atoms/agent-tools/agent-tools.atoms.ts b/surfsense_web/atoms/agent-tools/agent-tools.atoms.ts new file mode 100644 index 000000000..3e05fc006 --- /dev/null +++ b/surfsense_web/atoms/agent-tools/agent-tools.atoms.ts @@ -0,0 +1,96 @@ +import { atom } from "jotai"; +import { atomWithQuery } from "jotai-tanstack-query"; +import { agentToolsApiService, type AgentToolInfo } from "@/lib/apis/agent-tools-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { activeSearchSpaceIdAtom } from "../search-spaces/search-space-query.atoms"; + +export const agentToolsAtom = atomWithQuery((_get) => ({ + queryKey: cacheKeys.agentTools.all(), + staleTime: 30 * 60 * 1000, // 30 min – tool list rarely changes + queryFn: async () => agentToolsApiService.getTools(), +})); + +const STORAGE_PREFIX = "surfsense-disabled-tools-"; + +function loadDisabledTools(searchSpaceId: string): string[] { + if (typeof window === "undefined") return []; + try { + const raw = localStorage.getItem(`${STORAGE_PREFIX}${searchSpaceId}`); + return raw ? (JSON.parse(raw) as string[]) : []; + } catch { + return []; + } +} + +function saveDisabledTools(searchSpaceId: string, tools: string[]) { + if (typeof window === "undefined") return; + if (tools.length === 0) { + localStorage.removeItem(`${STORAGE_PREFIX}${searchSpaceId}`); + } else { + localStorage.setItem(`${STORAGE_PREFIX}${searchSpaceId}`, JSON.stringify(tools)); + } +} + +const disabledToolsBaseAtom = atom([]); + +/** Tracks whether the atom has been hydrated from localStorage for the current search space */ +const hydratedForAtom = atom(null); + +/** + * Read/write atom for the set of disabled tool names. + * Persists to localStorage keyed by search space ID. + */ +export const disabledToolsAtom = atom( + (get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + const hydratedFor = get(hydratedForAtom); + if (searchSpaceId && hydratedFor !== searchSpaceId) { + return loadDisabledTools(searchSpaceId); + } + return get(disabledToolsBaseAtom); + }, + (get, set, update: string[] | ((prev: string[]) => string[])) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + const prev = get(disabledToolsBaseAtom); + const next = typeof update === "function" ? update(prev) : update; + set(disabledToolsBaseAtom, next); + set(hydratedForAtom, searchSpaceId); + if (searchSpaceId) { + saveDisabledTools(searchSpaceId, next); + } + } +); + +/** + * Hydrate disabled tools from localStorage when search space changes. + * Call this from a useEffect in a component that has access to the search space. + */ +export const hydrateDisabledToolsAtom = atom(null, (get, set) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + if (!searchSpaceId) return; + const stored = loadDisabledTools(searchSpaceId); + set(disabledToolsBaseAtom, stored); + set(hydratedForAtom, searchSpaceId); +}); + +/** Toggle a single tool's enabled/disabled state */ +export const toggleToolAtom = atom(null, (get, set, toolName: string) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + const current = get(disabledToolsBaseAtom); + const next = current.includes(toolName) + ? current.filter((t) => t !== toolName) + : [...current, toolName]; + set(disabledToolsBaseAtom, next); + set(hydratedForAtom, searchSpaceId); + if (searchSpaceId) { + saveDisabledTools(searchSpaceId, next); + } +}); + +/** Derive the count of currently enabled tools */ +export const enabledToolCountAtom = atom((get) => { + const { data: tools } = get(agentToolsAtom); + const disabled = get(disabledToolsAtom); + if (!tools) return 0; + return tools.length - disabled.filter((d) => tools.some((t) => t.name === d)).length; +}); diff --git a/surfsense_web/components/assistant-ui/thread.tsx b/surfsense_web/components/assistant-ui/thread.tsx index 51a9b9275..d25b5fe9e 100644 --- a/surfsense_web/components/assistant-ui/thread.tsx +++ b/surfsense_web/components/assistant-ui/thread.tsx @@ -19,11 +19,10 @@ import { ChevronRightIcon, CopyIcon, DownloadIcon, - PlusIcon, RefreshCwIcon, SquareIcon, Unplug, - Upload, + Wrench, X, } from "lucide-react"; import { useParams } from "next/navigation"; @@ -46,11 +45,7 @@ import { import { currentUserAtom } from "@/atoms/user/user-query.atoms"; import { AssistantMessage } from "@/components/assistant-ui/assistant-message"; import { ChatSessionStatus } from "@/components/assistant-ui/chat-session-status"; -import { - ConnectorIndicator, - type ConnectorIndicatorHandle, -} from "@/components/assistant-ui/connector-popup"; -import { useDocumentUploadDialog } from "@/components/assistant-ui/document-upload-popup"; +import { ConnectorIndicator } from "@/components/assistant-ui/connector-popup"; import { InlineMentionEditor, type InlineMentionEditorRef, @@ -71,16 +66,20 @@ import { import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import { Avatar, AvatarFallback, AvatarGroup } from "@/components/ui/avatar"; import { Button } from "@/components/ui/button"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; import { getConnectorIcon } from "@/contracts/enums/connectorIcons"; import type { Document } from "@/contracts/types/document.types"; import { useBatchCommentsPreload } from "@/hooks/use-comments"; import { useCommentsElectric } from "@/hooks/use-comments-electric"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { Switch } from "@/components/ui/switch"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; +import { + agentToolsAtom, + disabledToolsAtom, + enabledToolCountAtom, + hydrateDisabledToolsAtom, + toggleToolAtom, +} from "@/atoms/agent-tools/agent-tools.atoms"; import { cn } from "@/lib/utils"; /** Placeholder texts that cycle in new chats when input is empty */ @@ -548,6 +547,7 @@ const Composer: FC = () => { document.body )} + @@ -562,11 +562,7 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false const mentionedDocuments = useAtomValue(mentionedDocumentsAtom); const sidebarDocs = useAtomValue(sidebarSelectedDocumentsAtom); const setDocumentsSidebarOpen = useSetAtom(documentsSidebarOpenAtom); - const connectorRef = useRef(null); - const [addMenuOpen, setAddMenuOpen] = useState(false); - const { openDialog: openUploadDialog } = useDocumentUploadDialog(); - const { data: connectors } = useAtomValue(connectorsAtom); - const connectorCount = connectors?.length ?? 0; + const [toolsPopoverOpen, setToolsPopoverOpen] = useState(false); const isComposerTextEmpty = useAssistantState(({ composer }) => { const text = composer.text?.trim() || ""; return text.length === 0; @@ -577,6 +573,16 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false const { data: globalConfigs } = useAtomValue(globalNewLLMConfigsAtom); const { data: preferences } = useAtomValue(llmPreferencesAtom); + const { data: agentTools } = useAtomValue(agentToolsAtom); + const disabledTools = useAtomValue(disabledToolsAtom); + const toggleTool = useSetAtom(toggleToolAtom); + const hydrateDisabled = useSetAtom(hydrateDisabledToolsAtom); + const enabledCount = useAtomValue(enabledToolCountAtom); + + useEffect(() => { + hydrateDisabled(); + }, [hydrateDisabled]); + const hasModelConfigured = useMemo(() => { if (!preferences) return false; const agentLlmId = preferences.agent_llm_id; @@ -593,50 +599,61 @@ const ComposerAction: FC = ({ isBlockedByOtherUser = false return (
- - + + - + - - + - { - setAddMenuOpen(false); - openUploadDialog(); - }} - > - - Upload files - - { - setAddMenuOpen(false); - connectorRef.current?.open(); - }} - > - - {connectorCount > 0 ? "Manage tools" : "Connect your tools"} - {connectorCount > 0 && ( - {connectorCount} +
+ Agent Tools + + {enabledCount}/{agentTools?.length ?? 0} enabled + +
+
+ {agentTools?.map((tool) => { + const isDisabled = disabledTools.includes(tool.name); + return ( + + + + + + {tool.description} + + + ); + })} + {!agentTools?.length && ( +
+ Loading tools... +
)} - - - - +
+
+ {sidebarDocs.length > 0 && (