refactor: remove search_surfsense_docs tool and related references

- Deleted the `search_surfsense_docs` tool and its associated files, streamlining the agent's toolset.
- Updated various components and prompts to remove references to the now-removed tool, ensuring consistency across the codebase.
- Adjusted documentation to direct users to the SurfSense documentation link for product-related queries instead.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-28 22:35:14 -07:00
parent 9b9e6828c7
commit 40ca9e6ed2
71 changed files with 232 additions and 1676 deletions

View file

@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent.
To add a new tool, see the documentation in registry.py.
Available tools:
- search_surfsense_docs: Search Surfsense documentation for usage help
- generate_podcast: Generate audio podcasts from content
- generate_video_presentation: Generate video presentations with slides and narration
- generate_image: Generate images from text descriptions using AI models
@ -31,7 +30,6 @@ from .registry import (
get_tool_by_name,
)
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .update_memory import create_update_memory_tool, create_update_team_memory_tool
from .video_presentation import create_generate_video_presentation_tool
@ -47,7 +45,6 @@ __all__ = [
"create_generate_podcast_tool",
"create_generate_video_presentation_tool",
"create_scrape_webpage_tool",
"create_search_surfsense_docs_tool",
"create_update_memory_tool",
"create_update_team_memory_tool",
"format_documents_for_context",

View file

@ -101,7 +101,6 @@ from .podcast import create_generate_podcast_tool
from .report import create_generate_report_tool
from .resume import create_generate_resume_tool
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .teams import (
create_list_teams_channels_tool,
create_read_teams_messages_tool,
@ -258,15 +257,6 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
),
requires=[],
),
# Surfsense documentation search tool
ToolDefinition(
name="search_surfsense_docs",
description="Search Surfsense documentation for help with using the application",
factory=lambda deps: create_search_surfsense_docs_tool(
db_session=deps["db_session"],
),
requires=["db_session"],
),
# =========================================================================
# SERVICE ACCOUNT DISCOVERY
# Generic tool for the LLM to discover connected accounts and resolve

View file

@ -1,174 +0,0 @@
"""
Surfsense documentation search tool.
This tool allows the agent to search the pre-indexed Surfsense documentation
to help users with questions about how to use the application.
The documentation is indexed at deployment time from MDX files and stored
in dedicated tables (surfsense_docs_documents, surfsense_docs_chunks).
"""
import asyncio
import json
from langchain_core.tools import tool
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
from app.utils.document_converters import embed_text
from app.utils.surfsense_docs import surfsense_docs_public_url
def format_surfsense_docs_results(results: list[tuple]) -> str:
"""
Format search results into XML structure for the LLM context.
Uses the same XML structure as format_documents_for_context from knowledge_base.py
but with 'doc-' prefix on chunk IDs. This allows:
- LLM to use consistent [citation:doc-XXX] format
- Frontend to detect 'doc-' prefix and route to surfsense docs endpoint
Args:
results: List of (chunk, document) tuples from the database query
Returns:
Formatted XML string with documentation content and citation-ready chunks
"""
if not results:
return "No relevant Surfsense documentation found for your query."
# Group chunks by document
grouped: dict[int, dict] = {}
for chunk, doc in results:
public_url = surfsense_docs_public_url(doc.source)
if doc.id not in grouped:
grouped[doc.id] = {
"document_id": f"doc-{doc.id}",
"document_type": "SURFSENSE_DOCS",
"title": doc.title,
"url": public_url,
"metadata": {"source": doc.source, "public_url": public_url},
"chunks": [],
}
grouped[doc.id]["chunks"].append(
{
"chunk_id": f"doc-{chunk.id}",
"content": chunk.content,
}
)
# Render XML matching format_documents_for_context structure
parts: list[str] = []
for g in grouped.values():
metadata_json = json.dumps(g["metadata"], ensure_ascii=False)
parts.append("<document>")
parts.append("<document_metadata>")
parts.append(f" <document_id>{g['document_id']}</document_id>")
parts.append(f" <document_type>{g['document_type']}</document_type>")
parts.append(f" <title><![CDATA[{g['title']}]]></title>")
parts.append(f" <url><![CDATA[{g['url']}]]></url>")
parts.append(f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>")
parts.append("</document_metadata>")
parts.append("")
parts.append("<document_content>")
for ch in g["chunks"]:
parts.append(
f" <chunk id='{ch['chunk_id']}'><![CDATA[{ch['content']}]]></chunk>"
)
parts.append("</document_content>")
parts.append("</document>")
parts.append("")
return "\n".join(parts).strip()
async def search_surfsense_docs_async(
query: str,
db_session: AsyncSession,
top_k: int = 10,
) -> str:
"""
Search Surfsense documentation using vector similarity.
Args:
query: The search query about Surfsense usage
db_session: Database session for executing queries
top_k: Number of results to return
Returns:
Formatted string with relevant documentation content
"""
# Get embedding for the query
query_embedding = await asyncio.to_thread(embed_text, query)
# Vector similarity search on chunks, joining with documents
stmt = (
select(SurfsenseDocsChunk, SurfsenseDocsDocument)
.join(
SurfsenseDocsDocument,
SurfsenseDocsChunk.document_id == SurfsenseDocsDocument.id,
)
.order_by(SurfsenseDocsChunk.embedding.op("<=>")(query_embedding))
.limit(top_k)
)
result = await db_session.execute(stmt)
rows = result.all()
return format_surfsense_docs_results(rows)
def create_search_surfsense_docs_tool(db_session: AsyncSession):
"""
Factory function to create the search_surfsense_docs tool.
The tool acquires its own short-lived ``AsyncSession`` per call via
:data:`async_session_maker` so the closure is safe to share across
HTTP requests by the compiled-agent cache. Capturing a per-request
session here would surface stale/closed sessions on cache hits.
Args:
db_session: Reserved for registry compatibility. Per-call sessions
are opened via :data:`async_session_maker` inside the tool body.
Returns:
A configured tool function for searching Surfsense documentation
"""
del db_session # per-call session — see docstring
@tool
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
"""
Search Surfsense documentation for help with using the application.
Use this tool when the user asks questions about:
- How to use Surfsense features
- Installation and setup instructions
- Configuration options and settings
- Troubleshooting common issues
- Available connectors and integrations
- Browser extension usage
- API documentation
This searches the official Surfsense documentation that was indexed
at deployment time. It does NOT search the user's personal knowledge base.
Args:
query: The search query about Surfsense usage or features
top_k: Number of documentation chunks to retrieve (default: 10)
Returns:
Relevant documentation content formatted with chunk IDs for citations
"""
async with async_session_maker() as db_session:
return await search_surfsense_docs_async(
query=query,
db_session=db_session,
top_k=top_k,
)
return search_surfsense_docs