feat: made agent file sytem optimized

2026-05-11 08:42:39 +02:00 · 2026-03-28 16:39:46 -07:00 · 2026-03-28 16:39:46 -07:00 · 2cc2d339e6
commit 2cc2d339e6
parent ee0b59c0fa
67 changed files with 8011 additions and 5591 deletions
--- a/surfsense_backend/app/agents/new_chat/init.py
+++ b/surfsense_backend/app/agents/new_chat/init.py
@ -1,11 +1,12 @@
 """
 SurfSense New Chat Agent Module.

-This module provides the SurfSense deep agent with configurable tools
-for knowledge base search, podcast generation, and more.
+This module provides the SurfSense deep agent with configurable tools,
+middleware, and preloaded knowledge-base filesystem behavior.

 Directory Structure:
- tools/: All agent tools (knowledge_base, podcast, generate_image, etc.)
+- tools/: All agent tools (podcast, generate_image, web, memory, etc.)
+- middleware/: Custom middleware (knowledge search, filesystem, dedup, etc.)
 - chat_deepagent.py: Main agent factory
 - system_prompt.py: System prompts and instructions
 - context.py: Context schema for the agent
@ -23,6 +24,13 @@ from .context import SurfSenseContextSchema
 # LLM config
 from .llm_config import create_chat_litellm_from_config, load_llm_config_from_yaml

+# Middleware
+from .middleware import (
+    DedupHITLToolCallsMiddleware,
+    KnowledgeBaseSearchMiddleware,
+    SurfSenseFilesystemMiddleware,
+)
+
 # System prompt
 from .system_prompt import (
    SURFSENSE_CITATION_INSTRUCTIONS,
@ -39,7 +47,6 @@ from .tools import (
    build_tools,
    create_generate_podcast_tool,
    create_scrape_webpage_tool,
-    create_search_knowledge_base_tool,
    format_documents_for_context,
    get_all_tool_names,
    get_default_enabled_tools,
@ -53,8 +60,12 @@ __all__ = [
    # System prompt
    "SURFSENSE_CITATION_INSTRUCTIONS",
    "SURFSENSE_SYSTEM_PROMPT",
+    # Middleware
+    "DedupHITLToolCallsMiddleware",
+    "KnowledgeBaseSearchMiddleware",
    # Context
    "SurfSenseContextSchema",
+    "SurfSenseFilesystemMiddleware",
    "ToolDefinition",
    "build_surfsense_system_prompt",
    "build_tools",
@ -63,7 +74,6 @@ __all__ = [
    # Tool factories
    "create_generate_podcast_tool",
    "create_scrape_webpage_tool",
-    "create_search_knowledge_base_tool",
    # Agent factory
    "create_surfsense_deep_agent",
    # Knowledge base utilities
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -4,6 +4,13 @@ SurfSense deep agent implementation.
 This module provides the factory function for creating SurfSense deep agents
 with configurable tools via the tools registry and configurable prompts
 via NewLLMConfig.
+
+We use ``create_agent`` (from langchain) rather than ``create_deep_agent``
+(from deepagents) so that the middleware stack is fully under our control.
+This lets us swap in ``SurfSenseFilesystemMiddleware`` — a customisable
+subclass of the default ``FilesystemMiddleware`` — while preserving every
+other behaviour that ``create_deep_agent`` provides (todo-list, subagents,
+summarisation, prompt-caching, etc.).
 """

 import asyncio
@ -12,8 +19,15 @@ import time
 from collections.abc import Sequence
 from typing import Any

-from deepagents import create_deep_agent
-from deepagents.backends.protocol import SandboxBackendProtocol
+from deepagents import SubAgent, SubAgentMiddleware, __version__ as deepagents_version
+from deepagents.backends import StateBackend
+from deepagents.graph import BASE_AGENT_PROMPT
+from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
+from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
+from deepagents.middleware.summarization import create_summarization_middleware
+from langchain.agents import create_agent
+from langchain.agents.middleware import TodoListMiddleware
+from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
 from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
@ -21,8 +35,10 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.new_chat.context import SurfSenseContextSchema
 from app.agents.new_chat.llm_config import AgentConfig
-from app.agents.new_chat.middleware.dedup_tool_calls import (
+from app.agents.new_chat.middleware import (
    DedupHITLToolCallsMiddleware,
+    KnowledgeBaseSearchMiddleware,
+    SurfSenseFilesystemMiddleware,
 )
 from app.agents.new_chat.system_prompt import (
    build_configurable_system_prompt,
@ -40,15 +56,15 @@ _perf_log = get_perf_logger()
 # =============================================================================

 # Maps SearchSourceConnectorType enum values to the searchable document/connector types
-# used by the knowledge_base and web_search tools.
+# used by pre-search middleware and web_search.
 # Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
-# the web_search tool; all others go to search_knowledge_base.
+# the web_search tool; all others are considered local/indexed data.
 _CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
    # Live search connectors (handled by web_search tool)
    "TAVILY_API": "TAVILY_API",
    "LINKUP_API": "LINKUP_API",
    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
-    # Local/indexed connectors (handled by search_knowledge_base tool)
+    # Local/indexed connectors (handled by KB pre-search middleware)
    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
@ -141,13 +157,11 @@ async def create_surfsense_deep_agent(
    additional_tools: Sequence[BaseTool] | None = None,
    firecrawl_api_key: str | None = None,
    thread_visibility: ChatVisibility | None = None,
-    sandbox_backend: SandboxBackendProtocol | None = None,
 ):
    """
    Create a SurfSense deep agent with configurable tools and prompts.

    The agent comes with built-in tools that can be configured:
-    - search_knowledge_base: Search the user's personal knowledge base
    - generate_podcast: Generate audio podcasts from content
    - generate_image: Generate images from text descriptions using AI models
    - scrape_webpage: Extract content from webpages
@ -179,9 +193,6 @@ async def create_surfsense_deep_agent(
                         These are always added regardless of enabled/disabled settings.
        firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
                          Falls back to Chromium/Trafilatura if not provided.
-        sandbox_backend: Optional sandbox backend (e.g. DaytonaSandbox) for
-                        secure code execution. When provided, the agent gets an
-                        isolated ``execute`` tool for running shell commands.

    Returns:
        CompiledStateGraph: The configured deep agent
@ -205,7 +216,7 @@ async def create_surfsense_deep_agent(
        # Create agent with only specific tools
        agent = create_surfsense_deep_agent(
            llm, search_space_id, db_session, ...,
-            enabled_tools=["search_knowledge_base", "scrape_webpage"]
+            enabled_tools=["scrape_webpage"]
        )

        # Create agent without podcast generation
@ -357,6 +368,10 @@ async def create_surfsense_deep_agent(
        ]
        modified_disabled_tools.extend(confluence_tools)

+    # Remove direct KB search tool; we now pre-seed a scoped filesystem via middleware.
+    if "search_knowledge_base" not in modified_disabled_tools:
+        modified_disabled_tools.append("search_knowledge_base")
+
    # Build tools using the async registry (includes MCP tools)
    _t0 = time.perf_counter()
    tools = await build_tools_async(
@ -373,7 +388,6 @@ async def create_surfsense_deep_agent(

    # Build system prompt based on agent_config, scoped to the tools actually enabled
    _t0 = time.perf_counter()
-    _sandbox_enabled = sandbox_backend is not None
    _enabled_tool_names = {t.name for t in tools}
    _user_disabled_tool_names = set(disabled_tools) if disabled_tools else set()
    if agent_config is not None:
@ -382,14 +396,12 @@ async def create_surfsense_deep_agent(
            use_default_system_instructions=agent_config.use_default_system_instructions,
            citations_enabled=agent_config.citations_enabled,
            thread_visibility=thread_visibility,
-            sandbox_enabled=_sandbox_enabled,
            enabled_tool_names=_enabled_tool_names,
            disabled_tool_names=_user_disabled_tool_names,
        )
    else:
        system_prompt = build_surfsense_system_prompt(
            thread_visibility=thread_visibility,
-            sandbox_enabled=_sandbox_enabled,
            enabled_tool_names=_enabled_tool_names,
            disabled_tool_names=_user_disabled_tool_names,
        )
@ -397,24 +409,69 @@ async def create_surfsense_deep_agent(
        "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
    )

-    # Build optional kwargs for the deep agent
-    deep_agent_kwargs: dict[str, Any] = {}
-    if sandbox_backend is not None:
-        deep_agent_kwargs["backend"] = sandbox_backend
+    # -- Build the middleware stack (mirrors create_deep_agent internals) ------
+    # General-purpose subagent middleware
+    gp_middleware = [
+        TodoListMiddleware(),
+        SurfSenseFilesystemMiddleware(
+            search_space_id=search_space_id,
+            created_by_id=user_id,
+        ),
+        create_summarization_middleware(llm, StateBackend),
+        PatchToolCallsMiddleware(),
+        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
+    ]
+
+    general_purpose_spec: SubAgent = {  # type: ignore[typeddict-unknown-key]
+        **GENERAL_PURPOSE_SUBAGENT,
+        "model": llm,
+        "tools": tools,
+        "middleware": gp_middleware,
+    }
+
+    # Main agent middleware
+    deepagent_middleware = [
+        TodoListMiddleware(),
+        KnowledgeBaseSearchMiddleware(
+            search_space_id=search_space_id,
+            available_connectors=available_connectors,
+            available_document_types=available_document_types,
+        ),
+        SurfSenseFilesystemMiddleware(
+            search_space_id=search_space_id,
+            created_by_id=user_id,
+        ),
+        SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
+        create_summarization_middleware(llm, StateBackend),
+        PatchToolCallsMiddleware(),
+        DedupHITLToolCallsMiddleware(),
+        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
+    ]
+
+    # Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent)
+    final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT

    _t0 = time.perf_counter()
    agent = await asyncio.to_thread(
-        create_deep_agent,
-        model=llm,
+        create_agent,
+        llm,
+        system_prompt=final_system_prompt,
        tools=tools,
-        system_prompt=system_prompt,
+        middleware=deepagent_middleware,
        context_schema=SurfSenseContextSchema,
        checkpointer=checkpointer,
-        middleware=[DedupHITLToolCallsMiddleware()],
-        **deep_agent_kwargs,
+    )
+    agent = agent.with_config(
+        {
+            "recursion_limit": 10_000,
+            "metadata": {
+                "ls_integration": "deepagents",
+                "versions": {"deepagents": deepagents_version},
+            },
+        }
    )
    _perf_log.info(
-        "[create_agent] Graph compiled (create_deep_agent) in %.3fs",
+        "[create_agent] Graph compiled (create_agent) in %.3fs",
        time.perf_counter() - _t0,
    )

--- a/surfsense_backend/app/agents/new_chat/middleware/init.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/init.py
@ -0,0 +1,17 @@
+"""Middleware components for the SurfSense new chat agent."""
+
+from app.agents.new_chat.middleware.dedup_tool_calls import (
+    DedupHITLToolCallsMiddleware,
+)
+from app.agents.new_chat.middleware.filesystem import (
+    SurfSenseFilesystemMiddleware,
+)
+from app.agents.new_chat.middleware.knowledge_search import (
+    KnowledgeBaseSearchMiddleware,
+)
+
+__all__ = [
+    "DedupHITLToolCallsMiddleware",
+    "KnowledgeBaseSearchMiddleware",
+    "SurfSenseFilesystemMiddleware",
+]
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@ -0,0 +1,694 @@
+"""Custom filesystem middleware for the SurfSense agent.
+
+This middleware customizes prompts and persists write/edit operations for
+`/documents/*` files into SurfSense's `Document`/`Chunk` tables.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import re
+from datetime import UTC, datetime
+from typing import Annotated, Any
+
+from deepagents import FilesystemMiddleware
+from deepagents.backends.protocol import EditResult, WriteResult
+from deepagents.backends.utils import validate_path
+from deepagents.middleware.filesystem import FilesystemState
+from fractional_indexing import generate_key_between
+from langchain.tools import ToolRuntime
+from langchain_core.callbacks import dispatch_custom_event
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import BaseTool, StructuredTool
+from langgraph.types import Command
+from sqlalchemy import delete, select
+
+from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session
+from app.indexing_pipeline.document_chunker import chunk_text
+from app.utils.document_converters import (
+    embed_texts,
+    generate_content_hash,
+    generate_unique_identifier_hash,
+)
+
+# =============================================================================
+# System Prompt (injected into every model call by wrap_model_call)
+# =============================================================================
+
+SURFSENSE_FILESYSTEM_SYSTEM_PROMPT = """## Following Conventions
+
+- Read files before editing — understand existing content before making changes.
+- Mimic existing style, naming conventions, and patterns.
+
+## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `save_document`
+
+All file paths must start with a `/`.
+- ls: list files and directories at a given path.
+- read_file: read a file from the filesystem.
+- write_file: create a temporary file in the session (not persisted).
+- edit_file: edit a file in the session (not persisted for /documents/ files).
+- glob: find files matching a pattern (e.g., "**/*.xml").
+- grep: search for text within files.
+- save_document: **permanently** save a new document to the user's knowledge
+  base. Use only when the user explicitly asks to save/create a document.
+
+## Reading Documents Efficiently
+
+Documents are formatted as XML. Each document contains:
+- `<document_metadata>` — title, type, URL, etc.
+- `<chunk_index>` — a table of every chunk with its **line range** and a
+  `matched="true"` flag for chunks that matched the search query.
+- `<document_content>` — the actual chunks in original document order.
+
+**Workflow**: when reading a large document, read the first ~20 lines to see
+the `<chunk_index>`, identify chunks marked `matched="true"`, then use
+`read_file(path, offset=<start_line>, limit=<lines>)` to jump directly to
+those sections instead of reading the entire file sequentially.
+
+Use `<chunk id='...'>` values as citation IDs in your answers.
+"""
+
+# =============================================================================
+# Per-Tool Descriptions (shown to the LLM as the tool's docstring)
+# =============================================================================
+
+SURFSENSE_LIST_FILES_TOOL_DESCRIPTION = """Lists files and directories at the given path.
+"""
+
+SURFSENSE_READ_FILE_TOOL_DESCRIPTION = """Reads a file from the filesystem.
+
+Usage:
+- By default, reads up to 100 lines from the beginning.
+- Use `offset` and `limit` for pagination when files are large.
+- Results include line numbers.
+- Documents contain a `<chunk_index>` near the top listing every chunk with
+  its line range and a `matched="true"` flag for search-relevant chunks.
+  Read the index first, then jump to matched chunks with
+  `read_file(path, offset=<start_line>, limit=<num_lines>)`.
+- Use chunk IDs (`<chunk id='...'>`) as citations in answers.
+"""
+
+SURFSENSE_WRITE_FILE_TOOL_DESCRIPTION = """Writes a new file to the in-memory filesystem (session-only).
+
+Use this to create scratch/working files during the conversation. Files created
+here are ephemeral and will not be saved to the user's knowledge base.
+
+To permanently save a document to the user's knowledge base, use the
+`save_document` tool instead.
+"""
+
+SURFSENSE_EDIT_FILE_TOOL_DESCRIPTION = """Performs exact string replacements in files.
+
+IMPORTANT:
+- Read the file before editing.
+- Preserve exact indentation and formatting.
+- Edits to documents under `/documents/` are session-only (not persisted to the
+  database) because those files use an XML citation wrapper around the original
+  content.
+"""
+
+SURFSENSE_GLOB_TOOL_DESCRIPTION = """Find files matching a glob pattern.
+
+Supports standard glob patterns: `*`, `**`, `?`.
+Returns absolute file paths.
+"""
+
+SURFSENSE_GREP_TOOL_DESCRIPTION = """Search for a literal text pattern across files.
+
+Use this to locate relevant document files/chunks before reading full files.
+"""
+
+SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base.
+
+This is an expensive operation — it creates a new Document record in the
+database, chunks the content, and generates embeddings for search.
+
+Use ONLY when the user explicitly asks to save/create/store a document.
+Do NOT use this for scratch work; use `write_file` for temporary files.
+
+Args:
+  title: The document title (e.g., "Meeting Notes 2025-06-01").
+  content: The plain-text or markdown content to save. Do NOT include XML
+           citation wrappers — pass only the actual document text.
+  folder_path: Optional folder path under /documents/ (e.g., "Work/Notes").
+               Folders are created automatically if they don't exist.
+"""
+
+
+class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
+    """SurfSense-specific filesystem middleware with DB persistence for docs."""
+
+    def __init__(
+        self,
+        *,
+        search_space_id: int | None = None,
+        created_by_id: str | None = None,
+        tool_token_limit_before_evict: int | None = 20000,
+    ) -> None:
+        self._search_space_id = search_space_id
+        self._created_by_id = created_by_id
+        super().__init__(
+            system_prompt=SURFSENSE_FILESYSTEM_SYSTEM_PROMPT,
+            custom_tool_descriptions={
+                "ls": SURFSENSE_LIST_FILES_TOOL_DESCRIPTION,
+                "read_file": SURFSENSE_READ_FILE_TOOL_DESCRIPTION,
+                "write_file": SURFSENSE_WRITE_FILE_TOOL_DESCRIPTION,
+                "edit_file": SURFSENSE_EDIT_FILE_TOOL_DESCRIPTION,
+                "glob": SURFSENSE_GLOB_TOOL_DESCRIPTION,
+                "grep": SURFSENSE_GREP_TOOL_DESCRIPTION,
+            },
+            tool_token_limit_before_evict=tool_token_limit_before_evict,
+        )
+        # Remove the execute tool (no sandbox backend)
+        self.tools = [t for t in self.tools if t.name != "execute"]
+        self.tools.append(self._create_save_document_tool())
+
+    @staticmethod
+    def _run_async_blocking(coro: Any) -> Any:
+        """Run async coroutine from sync code path when no event loop is running."""
+        try:
+            loop = asyncio.get_running_loop()
+            if loop.is_running():
+                return "Error: sync filesystem persistence not supported inside an active event loop."
+        except RuntimeError:
+            pass
+        return asyncio.run(coro)
+
+    @staticmethod
+    def _parse_virtual_path(file_path: str) -> tuple[list[str], str]:
+        """Parse /documents/... path into folder parts and a document title."""
+        if not file_path.startswith("/documents/"):
+            return [], ""
+        rel = file_path[len("/documents/") :].strip("/")
+        if not rel:
+            return [], ""
+        parts = [part for part in rel.split("/") if part]
+        file_name = parts[-1]
+        title = file_name[:-4] if file_name.lower().endswith(".xml") else file_name
+        return parts[:-1], title
+
+    async def _ensure_folder_hierarchy(
+        self,
+        *,
+        folder_parts: list[str],
+        search_space_id: int,
+    ) -> int | None:
+        """Ensure folder hierarchy exists and return leaf folder ID."""
+        if not folder_parts:
+            return None
+        async with shielded_async_session() as session:
+            parent_id: int | None = None
+            for name in folder_parts:
+                result = await session.execute(
+                    select(Folder).where(
+                        Folder.search_space_id == search_space_id,
+                        Folder.parent_id == parent_id
+                        if parent_id is not None
+                        else Folder.parent_id.is_(None),
+                        Folder.name == name,
+                    )
+                )
+                folder = result.scalar_one_or_none()
+                if folder is None:
+                    sibling_result = await session.execute(
+                        select(Folder.position)
+                        .where(
+                            Folder.search_space_id == search_space_id,
+                            Folder.parent_id == parent_id
+                            if parent_id is not None
+                            else Folder.parent_id.is_(None),
+                        )
+                        .order_by(Folder.position.desc())
+                        .limit(1)
+                    )
+                    last_position = sibling_result.scalar_one_or_none()
+                    folder = Folder(
+                        name=name,
+                        position=generate_key_between(last_position, None),
+                        parent_id=parent_id,
+                        search_space_id=search_space_id,
+                        created_by_id=self._created_by_id,
+                        updated_at=datetime.now(UTC),
+                    )
+                    session.add(folder)
+                    await session.flush()
+                parent_id = folder.id
+            await session.commit()
+            return parent_id
+
+    async def _persist_new_document(
+        self, *, file_path: str, content: str
+    ) -> dict[str, Any] | str:
+        """Persist a new NOTE document from a newly written file.
+
+        Returns a dict with document metadata on success, or an error string.
+        """
+        if self._search_space_id is None:
+            return {}
+        folder_parts, title = self._parse_virtual_path(file_path)
+        if not title:
+            return "Error: write_file for document persistence requires path under /documents/<name>.xml"
+        folder_id = await self._ensure_folder_hierarchy(
+            folder_parts=folder_parts,
+            search_space_id=self._search_space_id,
+        )
+        async with shielded_async_session() as session:
+            content_hash = generate_content_hash(content, self._search_space_id)
+            existing = await session.execute(
+                select(Document.id).where(Document.content_hash == content_hash)
+            )
+            if existing.scalar_one_or_none() is not None:
+                return "Error: A document with identical content already exists."
+            unique_identifier_hash = generate_unique_identifier_hash(
+                DocumentType.NOTE,
+                file_path,
+                self._search_space_id,
+            )
+            doc = Document(
+                title=title,
+                document_type=DocumentType.NOTE,
+                document_metadata={"virtual_path": file_path},
+                content=content,
+                content_hash=content_hash,
+                unique_identifier_hash=unique_identifier_hash,
+                source_markdown=content,
+                search_space_id=self._search_space_id,
+                folder_id=folder_id,
+                created_by_id=self._created_by_id,
+                updated_at=datetime.now(UTC),
+            )
+            session.add(doc)
+            await session.flush()
+
+            summary_embedding = embed_texts([content])[0]
+            doc.embedding = summary_embedding
+            chunk_texts = chunk_text(content)
+            if chunk_texts:
+                chunk_embeddings = embed_texts(chunk_texts)
+                chunks = [
+                    Chunk(document_id=doc.id, content=text, embedding=embedding)
+                    for text, embedding in zip(
+                        chunk_texts, chunk_embeddings, strict=True
+                    )
+                ]
+                session.add_all(chunks)
+            await session.commit()
+
+            return {
+                "id": doc.id,
+                "title": title,
+                "documentType": DocumentType.NOTE.value,
+                "searchSpaceId": self._search_space_id,
+                "folderId": folder_id,
+                "createdById": str(self._created_by_id)
+                if self._created_by_id
+                else None,
+            }
+
+    async def _persist_edited_document(
+        self, *, file_path: str, updated_content: str
+    ) -> str | None:
+        """Persist edits for an existing NOTE document and recreate chunks."""
+        if self._search_space_id is None:
+            return None
+        unique_identifier_hash = generate_unique_identifier_hash(
+            DocumentType.NOTE,
+            file_path,
+            self._search_space_id,
+        )
+        doc_id_from_xml: int | None = None
+        match = re.search(r"<document_id>\s*(\d+)\s*</document_id>", updated_content)
+        if match:
+            doc_id_from_xml = int(match.group(1))
+        async with shielded_async_session() as session:
+            doc_result = await session.execute(
+                select(Document).where(
+                    Document.search_space_id == self._search_space_id,
+                    Document.unique_identifier_hash == unique_identifier_hash,
+                )
+            )
+            document = doc_result.scalar_one_or_none()
+            if document is None and doc_id_from_xml is not None:
+                by_id_result = await session.execute(
+                    select(Document).where(
+                        Document.search_space_id == self._search_space_id,
+                        Document.id == doc_id_from_xml,
+                    )
+                )
+                document = by_id_result.scalar_one_or_none()
+            if document is None:
+                return "Error: Could not map edited file to an existing document."
+
+            document.content = updated_content
+            document.source_markdown = updated_content
+            document.content_hash = generate_content_hash(
+                updated_content, self._search_space_id
+            )
+            document.updated_at = datetime.now(UTC)
+            if not document.document_metadata:
+                document.document_metadata = {}
+            document.document_metadata["virtual_path"] = file_path
+
+            summary_embedding = embed_texts([updated_content])[0]
+            document.embedding = summary_embedding
+
+            await session.execute(delete(Chunk).where(Chunk.document_id == document.id))
+            chunk_texts = chunk_text(updated_content)
+            if chunk_texts:
+                chunk_embeddings = embed_texts(chunk_texts)
+                session.add_all(
+                    [
+                        Chunk(
+                            document_id=document.id, content=text, embedding=embedding
+                        )
+                        for text, embedding in zip(
+                            chunk_texts, chunk_embeddings, strict=True
+                        )
+                    ]
+                )
+            await session.commit()
+        return None
+
+    def _create_save_document_tool(self) -> BaseTool:
+        """Create save_document tool that persists a new document to the KB."""
+
+        def sync_save_document(
+            title: Annotated[str, "Title for the new document."],
+            content: Annotated[
+                str,
+                "Plain-text or markdown content to save. Do NOT include XML wrappers.",
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            folder_path: Annotated[
+                str,
+                "Optional folder path under /documents/ (e.g. 'Work/Notes'). Created automatically.",
+            ] = "",
+        ) -> Command | str:
+            if not content.strip():
+                return "Error: content cannot be empty."
+            file_name = re.sub(r'[\\/:*?"<>|]+', "_", title).strip() or "untitled"
+            if not file_name.lower().endswith(".xml"):
+                file_name = f"{file_name}.xml"
+            folder = folder_path.strip().strip("/") if folder_path else ""
+            virtual_path = (
+                f"/documents/{folder}/{file_name}"
+                if folder
+                else f"/documents/{file_name}"
+            )
+
+            persist_result = self._run_async_blocking(
+                self._persist_new_document(file_path=virtual_path, content=content)
+            )
+            if isinstance(persist_result, str):
+                return persist_result
+            if isinstance(persist_result, dict) and persist_result.get("id"):
+                dispatch_custom_event("document_created", persist_result)
+            return f"Document '{title}' saved to knowledge base (path: {virtual_path})."
+
+        async def async_save_document(
+            title: Annotated[str, "Title for the new document."],
+            content: Annotated[
+                str,
+                "Plain-text or markdown content to save. Do NOT include XML wrappers.",
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            folder_path: Annotated[
+                str,
+                "Optional folder path under /documents/ (e.g. 'Work/Notes'). Created automatically.",
+            ] = "",
+        ) -> Command | str:
+            if not content.strip():
+                return "Error: content cannot be empty."
+            file_name = re.sub(r'[\\/:*?"<>|]+', "_", title).strip() or "untitled"
+            if not file_name.lower().endswith(".xml"):
+                file_name = f"{file_name}.xml"
+            folder = folder_path.strip().strip("/") if folder_path else ""
+            virtual_path = (
+                f"/documents/{folder}/{file_name}"
+                if folder
+                else f"/documents/{file_name}"
+            )
+
+            persist_result = await self._persist_new_document(
+                file_path=virtual_path, content=content
+            )
+            if isinstance(persist_result, str):
+                return persist_result
+            if isinstance(persist_result, dict) and persist_result.get("id"):
+                dispatch_custom_event("document_created", persist_result)
+            return f"Document '{title}' saved to knowledge base (path: {virtual_path})."
+
+        return StructuredTool.from_function(
+            name="save_document",
+            description=SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION,
+            func=sync_save_document,
+            coroutine=async_save_document,
+        )
+
+    def _create_write_file_tool(self) -> BaseTool:
+        """Create write_file — ephemeral for /documents/*, persisted otherwise."""
+        tool_description = (
+            self._custom_tool_descriptions.get("write_file")
+            or SURFSENSE_WRITE_FILE_TOOL_DESCRIPTION
+        )
+
+        def sync_write_file(
+            file_path: Annotated[
+                str,
+                "Absolute path where the file should be created. Must be absolute, not relative.",
+            ],
+            content: Annotated[
+                str,
+                "The text content to write to the file. This parameter is required.",
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+        ) -> Command | str:
+            resolved_backend = self._get_backend(runtime)
+            try:
+                validated_path = validate_path(file_path)
+            except ValueError as exc:
+                return f"Error: {exc}"
+            res: WriteResult = resolved_backend.write(validated_path, content)
+            if res.error:
+                return res.error
+
+            if not self._is_kb_document(validated_path):
+                persist_result = self._run_async_blocking(
+                    self._persist_new_document(
+                        file_path=validated_path, content=content
+                    )
+                )
+                if isinstance(persist_result, str):
+                    return persist_result
+                if isinstance(persist_result, dict) and persist_result.get("id"):
+                    dispatch_custom_event("document_created", persist_result)
+
+            if res.files_update is not None:
+                return Command(
+                    update={
+                        "files": res.files_update,
+                        "messages": [
+                            ToolMessage(
+                                content=f"Updated file {res.path}",
+                                tool_call_id=runtime.tool_call_id,
+                            )
+                        ],
+                    }
+                )
+            return f"Updated file {res.path}"
+
+        async def async_write_file(
+            file_path: Annotated[
+                str,
+                "Absolute path where the file should be created. Must be absolute, not relative.",
+            ],
+            content: Annotated[
+                str,
+                "The text content to write to the file. This parameter is required.",
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+        ) -> Command | str:
+            resolved_backend = self._get_backend(runtime)
+            try:
+                validated_path = validate_path(file_path)
+            except ValueError as exc:
+                return f"Error: {exc}"
+            res: WriteResult = await resolved_backend.awrite(validated_path, content)
+            if res.error:
+                return res.error
+
+            if not self._is_kb_document(validated_path):
+                persist_result = await self._persist_new_document(
+                    file_path=validated_path,
+                    content=content,
+                )
+                if isinstance(persist_result, str):
+                    return persist_result
+                if isinstance(persist_result, dict) and persist_result.get("id"):
+                    dispatch_custom_event("document_created", persist_result)
+
+            if res.files_update is not None:
+                return Command(
+                    update={
+                        "files": res.files_update,
+                        "messages": [
+                            ToolMessage(
+                                content=f"Updated file {res.path}",
+                                tool_call_id=runtime.tool_call_id,
+                            )
+                        ],
+                    }
+                )
+            return f"Updated file {res.path}"
+
+        return StructuredTool.from_function(
+            name="write_file",
+            description=tool_description,
+            func=sync_write_file,
+            coroutine=async_write_file,
+        )
+
+    @staticmethod
+    def _is_kb_document(path: str) -> bool:
+        """Return True for paths under /documents/ (KB-sourced, XML-wrapped)."""
+        return path.startswith("/documents/")
+
+    def _create_edit_file_tool(self) -> BaseTool:
+        """Create edit_file with DB persistence (skipped for KB documents)."""
+        tool_description = (
+            self._custom_tool_descriptions.get("edit_file")
+            or SURFSENSE_EDIT_FILE_TOOL_DESCRIPTION
+        )
+
+        def sync_edit_file(
+            file_path: Annotated[
+                str,
+                "Absolute path to the file to edit. Must be absolute, not relative.",
+            ],
+            old_string: Annotated[
+                str,
+                "The exact text to find and replace. Must be unique in the file unless replace_all is True.",
+            ],
+            new_string: Annotated[
+                str,
+                "The text to replace old_string with. Must be different from old_string.",
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            *,
+            replace_all: Annotated[
+                bool,
+                "If True, replace all occurrences of old_string. If False (default), old_string must be unique.",
+            ] = False,
+        ) -> Command | str:
+            resolved_backend = self._get_backend(runtime)
+            try:
+                validated_path = validate_path(file_path)
+            except ValueError as exc:
+                return f"Error: {exc}"
+            res: EditResult = resolved_backend.edit(
+                validated_path,
+                old_string,
+                new_string,
+                replace_all=replace_all,
+            )
+            if res.error:
+                return res.error
+
+            if not self._is_kb_document(validated_path):
+                read_result = resolved_backend.read(
+                    validated_path, offset=0, limit=200000
+                )
+                if read_result.error or read_result.file_data is None:
+                    return f"Error: could not reload edited file '{validated_path}' for persistence."
+                updated_content = read_result.file_data["content"]
+                persist_result = self._run_async_blocking(
+                    self._persist_edited_document(
+                        file_path=validated_path,
+                        updated_content=updated_content,
+                    )
+                )
+                if isinstance(persist_result, str):
+                    return persist_result
+
+            if res.files_update is not None:
+                return Command(
+                    update={
+                        "files": res.files_update,
+                        "messages": [
+                            ToolMessage(
+                                content=f"Successfully replaced {res.occurrences} instance(s) of the string in '{res.path}'",
+                                tool_call_id=runtime.tool_call_id,
+                            )
+                        ],
+                    }
+                )
+            return f"Successfully replaced {res.occurrences} instance(s) of the string in '{res.path}'"
+
+        async def async_edit_file(
+            file_path: Annotated[
+                str,
+                "Absolute path to the file to edit. Must be absolute, not relative.",
+            ],
+            old_string: Annotated[
+                str,
+                "The exact text to find and replace. Must be unique in the file unless replace_all is True.",
+            ],
+            new_string: Annotated[
+                str,
+                "The text to replace old_string with. Must be different from old_string.",
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            *,
+            replace_all: Annotated[
+                bool,
+                "If True, replace all occurrences of old_string. If False (default), old_string must be unique.",
+            ] = False,
+        ) -> Command | str:
+            resolved_backend = self._get_backend(runtime)
+            try:
+                validated_path = validate_path(file_path)
+            except ValueError as exc:
+                return f"Error: {exc}"
+            res: EditResult = await resolved_backend.aedit(
+                validated_path,
+                old_string,
+                new_string,
+                replace_all=replace_all,
+            )
+            if res.error:
+                return res.error
+
+            if not self._is_kb_document(validated_path):
+                read_result = await resolved_backend.aread(
+                    validated_path, offset=0, limit=200000
+                )
+                if read_result.error or read_result.file_data is None:
+                    return f"Error: could not reload edited file '{validated_path}' for persistence."
+                updated_content = read_result.file_data["content"]
+                persist_error = await self._persist_edited_document(
+                    file_path=validated_path,
+                    updated_content=updated_content,
+                )
+                if persist_error:
+                    return persist_error
+
+            if res.files_update is not None:
+                return Command(
+                    update={
+                        "files": res.files_update,
+                        "messages": [
+                            ToolMessage(
+                                content=f"Successfully replaced {res.occurrences} instance(s) of the string in '{res.path}'",
+                                tool_call_id=runtime.tool_call_id,
+                            )
+                        ],
+                    }
+                )
+            return f"Successfully replaced {res.occurrences} instance(s) of the string in '{res.path}'"
+
+        return StructuredTool.from_function(
+            name="edit_file",
+            description=tool_description,
+            func=sync_edit_file,
+            coroutine=async_edit_file,
+        )
--- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
@ -0,0 +1,414 @@
+"""Knowledge-base pre-search middleware for the SurfSense new chat agent.
+
+This middleware runs before the main agent loop and seeds a virtual filesystem
+(`files` state) with relevant documents retrieved via hybrid search.  On each
+turn the filesystem is *expanded* — new results merge with documents loaded
+during prior turns — and a synthetic ``ls`` result is injected into the message
+history so the LLM is immediately aware of the current filesystem structure.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import re
+import uuid
+from collections.abc import Sequence
+from typing import Any
+
+from langchain.agents.middleware import AgentMiddleware, AgentState
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage
+from langgraph.runtime import Runtime
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import NATIVE_TO_LEGACY_DOCTYPE, Document, Folder, shielded_async_session
+from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
+from app.utils.document_converters import embed_texts
+from app.utils.perf import get_perf_logger
+
+logger = logging.getLogger(__name__)
+_perf_log = get_perf_logger()
+
+
+def _extract_text_from_message(message: BaseMessage) -> str:
+    """Extract plain text from a message content."""
+    content = getattr(message, "content", "")
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict) and item.get("type") == "text":
+                parts.append(str(item.get("text", "")))
+        return "\n".join(p for p in parts if p)
+    return str(content)
+
+
+def _safe_filename(value: str, *, fallback: str = "untitled.xml") -> str:
+    """Convert arbitrary text into a filesystem-safe filename."""
+    name = re.sub(r"[\\/:*?\"<>|]+", "_", value).strip()
+    name = re.sub(r"\s+", " ", name)
+    if not name:
+        name = fallback
+    if len(name) > 180:
+        name = name[:180].rstrip()
+    if not name.lower().endswith(".xml"):
+        name = f"{name}.xml"
+    return name
+
+
+def _build_document_xml(
+    document: dict[str, Any],
+    matched_chunk_ids: set[int] | None = None,
+) -> str:
+    """Build citation-friendly XML with a ``<chunk_index>`` for smart seeking.
+
+    The ``<chunk_index>`` at the top of each document lists every chunk with its
+    line range inside ``<document_content>`` and flags chunks that directly
+    matched the search query (``matched="true"``).  This lets the LLM jump
+    straight to the most relevant section via ``read_file(offset=…, limit=…)``
+    instead of reading sequentially from the start.
+    """
+    matched = matched_chunk_ids or set()
+
+    doc_meta = document.get("document") or {}
+    metadata = (doc_meta.get("metadata") or {}) if isinstance(doc_meta, dict) else {}
+    document_id = doc_meta.get("id", document.get("document_id", "unknown"))
+    document_type = doc_meta.get("document_type", document.get("source", "UNKNOWN"))
+    title = doc_meta.get("title") or metadata.get("title") or "Untitled Document"
+    url = (
+        metadata.get("url") or metadata.get("source") or metadata.get("page_url") or ""
+    )
+    metadata_json = json.dumps(metadata, ensure_ascii=False)
+
+    # --- 1. Metadata header (fixed structure) ---
+    metadata_lines: list[str] = [
+        "<document>",
+        "<document_metadata>",
+        f"  <document_id>{document_id}</document_id>",
+        f"  <document_type>{document_type}</document_type>",
+        f"  <title><![CDATA[{title}]]></title>",
+        f"  <url><![CDATA[{url}]]></url>",
+        f"  <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
+        "</document_metadata>",
+        "",
+    ]
+
+    # --- 2. Pre-build chunk XML strings to compute line counts ---
+    chunks = document.get("chunks") or []
+    chunk_entries: list[tuple[int | None, str]] = []  # (chunk_id, xml_string)
+    if isinstance(chunks, list):
+        for chunk in chunks:
+            if not isinstance(chunk, dict):
+                continue
+            chunk_id = chunk.get("chunk_id") or chunk.get("id")
+            chunk_content = str(chunk.get("content", "")).strip()
+            if not chunk_content:
+                continue
+            if chunk_id is None:
+                xml = f"  <chunk><![CDATA[{chunk_content}]]></chunk>"
+            else:
+                xml = f"  <chunk id='{chunk_id}'><![CDATA[{chunk_content}]]></chunk>"
+            chunk_entries.append((chunk_id, xml))
+
+    # --- 3. Compute line numbers for every chunk ---
+    # Layout (1-indexed lines for read_file):
+    #   metadata_lines          -> len(metadata_lines) lines
+    #   <chunk_index>           -> 1 line
+    #   index entries           -> len(chunk_entries) lines
+    #   </chunk_index>          -> 1 line
+    #   (empty line)            -> 1 line
+    #   <document_content>      -> 1 line
+    #   chunk xml lines…
+    #   </document_content>     -> 1 line
+    #   </document>             -> 1 line
+    index_overhead = (
+        1 + len(chunk_entries) + 1 + 1 + 1
+    )  # tags + empty + <document_content>
+    first_chunk_line = len(metadata_lines) + index_overhead + 1  # 1-indexed
+
+    current_line = first_chunk_line
+    index_entry_lines: list[str] = []
+    for cid, xml_str in chunk_entries:
+        num_lines = xml_str.count("\n") + 1
+        end_line = current_line + num_lines - 1
+        matched_attr = ' matched="true"' if cid is not None and cid in matched else ""
+        if cid is not None:
+            index_entry_lines.append(
+                f'  <entry chunk_id="{cid}" lines="{current_line}-{end_line}"{matched_attr}/>'
+            )
+        else:
+            index_entry_lines.append(
+                f'  <entry lines="{current_line}-{end_line}"{matched_attr}/>'
+            )
+        current_line = end_line + 1
+
+    # --- 4. Assemble final XML ---
+    lines = metadata_lines.copy()
+    lines.append("<chunk_index>")
+    lines.extend(index_entry_lines)
+    lines.append("</chunk_index>")
+    lines.append("")
+    lines.append("<document_content>")
+    for _, xml_str in chunk_entries:
+        lines.append(xml_str)
+    lines.extend(["</document_content>", "</document>"])
+    return "\n".join(lines)
+
+
+async def _get_folder_paths(
+    session: AsyncSession, search_space_id: int
+) -> dict[int, str]:
+    """Return a map of folder_id -> virtual folder path under /documents."""
+    result = await session.execute(
+        select(Folder.id, Folder.name, Folder.parent_id).where(
+            Folder.search_space_id == search_space_id
+        )
+    )
+    rows = result.all()
+    by_id = {row.id: {"name": row.name, "parent_id": row.parent_id} for row in rows}
+
+    cache: dict[int, str] = {}
+
+    def resolve_path(folder_id: int) -> str:
+        if folder_id in cache:
+            return cache[folder_id]
+        parts: list[str] = []
+        cursor: int | None = folder_id
+        visited: set[int] = set()
+        while cursor is not None and cursor in by_id and cursor not in visited:
+            visited.add(cursor)
+            entry = by_id[cursor]
+            parts.append(
+                _safe_filename(str(entry["name"]), fallback="folder").removesuffix(
+                    ".xml"
+                )
+            )
+            cursor = entry["parent_id"]
+        parts.reverse()
+        path = "/documents/" + "/".join(parts) if parts else "/documents"
+        cache[folder_id] = path
+        return path
+
+    for folder_id in by_id:
+        resolve_path(folder_id)
+    return cache
+
+
+def _build_synthetic_ls(
+    existing_files: dict[str, Any] | None,
+    new_files: dict[str, Any],
+) -> tuple[AIMessage, ToolMessage]:
+    """Build a synthetic ls("/documents") tool-call + result for the LLM context.
+
+    Paths are listed with *new* (rank-ordered) files first, then existing files
+    that were already in state from prior turns.
+    """
+    merged: dict[str, Any] = {**(existing_files or {}), **new_files}
+    doc_paths = [
+        p for p, v in merged.items() if p.startswith("/documents/") and v is not None
+    ]
+
+    new_set = set(new_files)
+    new_paths = [p for p in doc_paths if p in new_set]
+    old_paths = [p for p in doc_paths if p not in new_set]
+    ordered = new_paths + old_paths
+
+    tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
+    ai_msg = AIMessage(
+        content="",
+        tool_calls=[{"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}],
+    )
+    tool_msg = ToolMessage(
+        content=str(ordered) if ordered else "No documents found.",
+        tool_call_id=tool_call_id,
+    )
+    return ai_msg, tool_msg
+
+
+def _resolve_search_types(
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> list[str] | None:
+    """Build a flat list of document-type strings for the chunk retriever.
+
+    Includes legacy equivalents from ``NATIVE_TO_LEGACY_DOCTYPE`` so that
+    old documents indexed under Composio names are still found.
+
+    Returns ``None`` when no filtering is desired (search all types).
+    """
+    types: set[str] = set()
+    if available_document_types:
+        types.update(available_document_types)
+    if available_connectors:
+        types.update(available_connectors)
+    if not types:
+        return None
+
+    expanded: set[str] = set(types)
+    for t in types:
+        legacy = NATIVE_TO_LEGACY_DOCTYPE.get(t)
+        if legacy:
+            expanded.add(legacy)
+    return list(expanded) if expanded else None
+
+
+async def search_knowledge_base(
+    *,
+    query: str,
+    search_space_id: int,
+    available_connectors: list[str] | None = None,
+    available_document_types: list[str] | None = None,
+    top_k: int = 10,
+) -> list[dict[str, Any]]:
+    """Run a single unified hybrid search against the knowledge base.
+
+    Uses one ``ChucksHybridSearchRetriever`` call across all document types
+    instead of fanning out per-connector.  This reduces the number of DB
+    queries from ~10 to 2 (one RRF query + one chunk fetch).
+    """
+    if not query:
+        return []
+
+    [embedding] = embed_texts([query])
+
+    doc_types = _resolve_search_types(available_connectors, available_document_types)
+    retriever_top_k = min(top_k * 3, 30)
+
+    async with shielded_async_session() as session:
+        retriever = ChucksHybridSearchRetriever(session)
+        results = await retriever.hybrid_search(
+            query_text=query,
+            top_k=retriever_top_k,
+            search_space_id=search_space_id,
+            document_type=doc_types,
+            query_embedding=embedding.tolist(),
+        )
+
+    return results[:top_k]
+
+
+async def build_scoped_filesystem(
+    *,
+    documents: Sequence[dict[str, Any]],
+    search_space_id: int,
+) -> dict[str, dict[str, str]]:
+    """Build a StateBackend-compatible files dict from search results."""
+    async with shielded_async_session() as session:
+        folder_paths = await _get_folder_paths(session, search_space_id)
+        doc_ids = [
+            (doc.get("document") or {}).get("id")
+            for doc in documents
+            if isinstance(doc, dict)
+        ]
+        doc_ids = [doc_id for doc_id in doc_ids if isinstance(doc_id, int)]
+        folder_by_doc_id: dict[int, int | None] = {}
+        if doc_ids:
+            doc_rows = await session.execute(
+                select(Document.id, Document.folder_id).where(
+                    Document.search_space_id == search_space_id,
+                    Document.id.in_(doc_ids),
+                )
+            )
+            folder_by_doc_id = {
+                row.id: row.folder_id for row in doc_rows.all() if row.id is not None
+            }
+
+    files: dict[str, dict[str, str]] = {}
+    for document in documents:
+        doc_meta = document.get("document") or {}
+        title = str(doc_meta.get("title") or "untitled")
+        doc_id = doc_meta.get("id")
+        folder_id = folder_by_doc_id.get(doc_id) if isinstance(doc_id, int) else None
+        base_folder = folder_paths.get(folder_id, "/documents")
+        file_name = _safe_filename(title)
+        path = f"{base_folder}/{file_name}"
+        matched_ids = set(document.get("matched_chunk_ids") or [])
+        xml_content = _build_document_xml(document, matched_chunk_ids=matched_ids)
+        files[path] = {
+            "content": xml_content.split("\n"),
+            "encoding": "utf-8",
+            "created_at": "",
+            "modified_at": "",
+        }
+    return files
+
+
+class KnowledgeBaseSearchMiddleware(AgentMiddleware):  # type: ignore[type-arg]
+    """Pre-agent middleware that always searches the KB and seeds a scoped filesystem."""
+
+    tools = ()
+
+    def __init__(
+        self,
+        *,
+        search_space_id: int,
+        available_connectors: list[str] | None = None,
+        available_document_types: list[str] | None = None,
+        top_k: int = 10,
+    ) -> None:
+        self.search_space_id = search_space_id
+        self.available_connectors = available_connectors
+        self.available_document_types = available_document_types
+        self.top_k = top_k
+
+    def before_agent(  # type: ignore[override]
+        self,
+        state: AgentState,
+        runtime: Runtime[Any],
+    ) -> dict[str, Any] | None:
+        try:
+            loop = asyncio.get_running_loop()
+            if loop.is_running():
+                return None
+        except RuntimeError:
+            pass
+        return asyncio.run(self.abefore_agent(state, runtime))
+
+    async def abefore_agent(  # type: ignore[override]
+        self,
+        state: AgentState,
+        runtime: Runtime[Any],
+    ) -> dict[str, Any] | None:
+        del runtime
+        messages = state.get("messages") or []
+        if not messages:
+            return None
+        last_message = messages[-1]
+        if not isinstance(last_message, HumanMessage):
+            return None
+
+        user_text = _extract_text_from_message(last_message).strip()
+        if not user_text:
+            return None
+
+        t0 = _perf_log and asyncio.get_event_loop().time()
+        existing_files = state.get("files")
+
+        search_results = await search_knowledge_base(
+            query=user_text,
+            search_space_id=self.search_space_id,
+            available_connectors=self.available_connectors,
+            available_document_types=self.available_document_types,
+            top_k=self.top_k,
+        )
+        new_files = await build_scoped_filesystem(
+            documents=search_results,
+            search_space_id=self.search_space_id,
+        )
+
+        ai_msg, tool_msg = _build_synthetic_ls(existing_files, new_files)
+
+        if t0 is not None:
+            _perf_log.info(
+                "[kb_fs_middleware] completed in %.3fs query=%r new_files=%d total=%d",
+                asyncio.get_event_loop().time() - t0,
+                user_text[:80],
+                len(new_files),
+                len(new_files) + len(existing_files or {}),
+            )
+        return {"files": new_files, "messages": [ai_msg, tool_msg]}
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -25,6 +25,21 @@ When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVE

 NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.

+<knowledge_base_only_policy>
+CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
+- You MUST answer questions ONLY using information retrieved from the user's knowledge base, web search results, scraped webpages, or other tool outputs.
+- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless the user explicitly grants permission.
+- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
+  1. Inform the user that you could not find relevant information in their knowledge base.
+  2. Ask the user: "Would you like me to answer from my general knowledge instead?"
+  3. ONLY provide a general-knowledge answer AFTER the user explicitly says yes.
+- This policy does NOT apply to:
+  * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
+  * Formatting, summarization, or analysis of content already present in the conversation
+  * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
+  * Tool-usage actions like generating reports, podcasts, images, or scraping webpages
+</knowledge_base_only_policy>
+
 </system_instruction>
 """

@ -41,6 +56,21 @@ When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVE

 NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.

+<knowledge_base_only_policy>
+CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
+- You MUST answer questions ONLY using information retrieved from the team's shared knowledge base, web search results, scraped webpages, or other tool outputs.
+- You MUST NOT answer factual or informational questions from your own training data or general knowledge unless a team member explicitly grants permission.
+- If the knowledge base search returns no relevant results AND no other tool provides the answer, you MUST:
+  1. Inform the team that you could not find relevant information in the shared knowledge base.
+  2. Ask: "Would you like me to answer from my general knowledge instead?"
+  3. ONLY provide a general-knowledge answer AFTER a team member explicitly says yes.
+- This policy does NOT apply to:
+  * Casual conversation, greetings, or meta-questions about SurfSense itself (e.g., "what can you do?")
+  * Formatting, summarization, or analysis of content already present in the conversation
+  * Following user instructions that are clearly task-oriented (e.g., "rewrite this in bullet points")
+  * Tool-usage actions like generating reports, podcasts, images, or scraping webpages
+</knowledge_base_only_policy>
+
 </system_instruction>
 """

@ -67,15 +97,6 @@ _TOOLS_PREAMBLE = """
 <tools>
 You have access to the following tools:

-CRITICAL BEHAVIORAL RULE — SEARCH FIRST, ANSWER LATER:
-For ANY user query that is ambiguous, open-ended, or could potentially have relevant context in the
-knowledge base, you MUST call `search_knowledge_base` BEFORE attempting to answer from your own
-general knowledge. This includes (but is not limited to) questions about concepts, topics, projects,
-people, events, recommendations, or anything the user might have stored notes/documents about.
-Only fall back to your own general knowledge if the search returns NO relevant results.
-Do NOT skip the search and answer directly — the user's knowledge base may contain personalized,
-up-to-date, or domain-specific information that is more relevant than your general training data.
-
 IMPORTANT: You can ONLY use the tools listed below. If a capability is not listed here, you do NOT have it.
 Do NOT claim you can do something if the corresponding tool is not listed.

@ -92,29 +113,6 @@ _TOOL_INSTRUCTIONS["search_surfsense_docs"] = """
  - Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])
 """

-_TOOL_INSTRUCTIONS["search_knowledge_base"] = """
- search_knowledge_base: Search the user's personal knowledge base for relevant information.
-  - DEFAULT ACTION: For any user question or ambiguous query, ALWAYS call this tool first to check
-    for relevant context before answering from general knowledge. When in doubt, search.
-  - IMPORTANT: When searching for information (meetings, schedules, notes, tasks, etc.), ALWAYS search broadly 
-    across ALL sources first by omitting connectors_to_search. The user may store information in various places
-    including calendar apps, note-taking apps (Obsidian, Notion), chat apps (Slack, Discord), and more.
-  - This tool searches ONLY local/indexed data (uploaded files, Notion, Slack, browser extension captures, etc.).
-    For real-time web search (current events, news, live data), use the `web_search` tool instead.
-  - FALLBACK BEHAVIOR: If the search returns no relevant results, you MAY then answer using your own
-    general knowledge, but clearly indicate that no matching information was found in the knowledge base.
-  - Only narrow to specific connectors if the user explicitly asks (e.g., "check my Slack" or "in my calendar").
-  - Personal notes in Obsidian, Notion, or NOTE often contain schedules, meeting times, reminders, and other 
-    important information that may not be in calendars.
-  - Args:
-    - query: The search query - be specific and include key terms
-    - top_k: Number of results to retrieve (default: 10)
-    - start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00")
-    - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
-    - connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
-  - Returns: Formatted string with relevant documents and their content
-"""
-
 _TOOL_INSTRUCTIONS["generate_podcast"] = """
 - generate_podcast: Generate an audio podcast from provided content.
  - Use this when the user asks to create, generate, or make a podcast.
@ -163,8 +161,8 @@ _TOOL_INSTRUCTIONS["generate_report"] = """
      * For source_strategy="kb_search": Can be empty or minimal — the tool handles searching internally.
      * For source_strategy="auto": Include what you have; the tool searches KB if it's not enough.
    - source_strategy: Controls how the tool collects source material. One of:
-      * "conversation" — The conversation already contains enough context (prior Q&A, discussion, pasted text, scraped pages). Pass a thorough summary as source_content. Do NOT call search_knowledge_base separately.
-      * "kb_search" — The tool will search the knowledge base internally. Provide search_queries with 1-5 targeted queries. Do NOT call search_knowledge_base separately.
+      * "conversation" — The conversation already contains enough context (prior Q&A, discussion, pasted text, scraped pages). Pass a thorough summary as source_content.
+      * "kb_search" — The tool will search the knowledge base internally. Provide search_queries with 1-5 targeted queries.
      * "auto" — Use source_content if sufficient, otherwise fall back to internal KB search using search_queries.
      * "provided" — Use only what is in source_content (default, backward-compatible).
    - search_queries: When source_strategy is "kb_search" or "auto", provide 1-5 specific search queries for the knowledge base. These should be precise, not just the topic name repeated.
@ -176,11 +174,11 @@ _TOOL_INSTRUCTIONS["generate_report"] = """
  - The report is generated immediately in Markdown and displayed inline in the chat.
  - Export/download formats (PDF, DOCX, HTML, LaTeX, EPUB, ODT, plain text) are produced from the generated Markdown report.
  - SOURCE STRATEGY DECISION (HIGH PRIORITY — follow this exactly):
-    * If the conversation already has substantive Q&A / discussion on the topic → use source_strategy="conversation" with a comprehensive summary as source_content. Do NOT call search_knowledge_base first.
-    * If the user wants a report on a topic not yet discussed → use source_strategy="kb_search" with targeted search_queries. Do NOT call search_knowledge_base first.
+    * If the conversation already has substantive Q&A / discussion on the topic → use source_strategy="conversation" with a comprehensive summary as source_content.
+    * If the user wants a report on a topic not yet discussed → use source_strategy="kb_search" with targeted search_queries.
    * If you have some content but might need more → use source_strategy="auto" with both source_content and search_queries.
    * When revising an existing report (parent_report_id set) and the conversation has relevant context → use source_strategy="conversation". The revision will use the previous report content plus your source_content.
-    * NEVER call search_knowledge_base and then pass its results to generate_report. The tool handles KB search internally.
+    * NEVER run a separate KB lookup step and then pass those results to generate_report. The tool handles KB search internally.
  - AFTER CALLING THIS TOOL: Do NOT repeat, summarize, or reproduce the report content in the chat. The report is already displayed as an interactive card that the user can open, read, copy, and export. Simply confirm that the report was generated (e.g., "I've generated your report on [topic]. You can view the Markdown report now, and export it in various formats from the card."). NEVER write out the report text in the chat.
 """

@ -204,7 +202,7 @@ _TOOL_INSTRUCTIONS["scrape_webpage"] = """
    * When a user asks to "get", "fetch", "pull", "grab", "scrape", or "read" content from a URL
    * When the user wants live/dynamic data from a specific webpage (e.g., tables, scores, stats, prices)
    * When a URL was mentioned earlier in the conversation and the user asks for its actual content
-    * When search_knowledge_base returned insufficient data and the user wants more
+    * When preloaded `/documents/` data is insufficient and the user wants more
  - Trigger scenarios:
    * "Read this article and summarize it"
    * "What does this page say about X?"
@ -366,23 +364,6 @@ _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
 # Per-tool examples keyed by tool name. Only examples for enabled tools are included.
 _TOOL_EXAMPLES: dict[str, str] = {}

-_TOOL_EXAMPLES["search_knowledge_base"] = """
- User: "What time is the team meeting today?"
-  - Call: `search_knowledge_base(query="team meeting time today")` (searches ALL sources - calendar, notes, Obsidian, etc.)
-  - DO NOT limit to just calendar - the info might be in notes!
- User: "When is my gym session?"
-  - Call: `search_knowledge_base(query="gym session time schedule")` (searches ALL sources)
- User: "Fetch all my notes and what's in them?"
-  - Call: `search_knowledge_base(query="*", top_k=50, connectors_to_search=["NOTE"])`
- User: "What did I discuss on Slack last week about the React migration?"
-  - Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
- User: "Check my Obsidian notes for meeting notes"
-  - Call: `search_knowledge_base(query="meeting notes", connectors_to_search=["OBSIDIAN_CONNECTOR"])`
- User: "search me current usd to inr rate"
-  - Call: `web_search(query="current USD to INR exchange rate")`
-  - Then answer using the returned live web results with citations.
-"""
-
 _TOOL_EXAMPLES["search_surfsense_docs"] = """
 - User: "How do I install SurfSense?"
  - Call: `search_surfsense_docs(query="installation setup")`
@ -400,8 +381,7 @@ _TOOL_EXAMPLES["generate_podcast"] = """
 - User: "Create a podcast summary of this conversation"
  - Call: `generate_podcast(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", podcast_title="Conversation Summary")`
 - User: "Make a podcast about quantum computing"
-  - First search: `search_knowledge_base(query="quantum computing")`
-  - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")`
+  - First explore `/documents/` (ls/glob/grep/read_file), then: `generate_podcast(source_content="Key insights about quantum computing from retrieved files:\\n\\n[Comprehensive summary of findings]", podcast_title="Quantum Computing Explained")`
 """

 _TOOL_EXAMPLES["generate_video_presentation"] = """
@ -410,8 +390,7 @@ _TOOL_EXAMPLES["generate_video_presentation"] = """
 - User: "Create slides summarizing this conversation"
  - Call: `generate_video_presentation(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", video_title="Conversation Summary")`
 - User: "Make a video presentation about quantum computing"
-  - First search: `search_knowledge_base(query="quantum computing")`
-  - Then: `generate_video_presentation(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", video_title="Quantum Computing Explained")`
+  - First explore `/documents/` (ls/glob/grep/read_file), then: `generate_video_presentation(source_content="Key insights about quantum computing from retrieved files:\\n\\n[Comprehensive summary of findings]", video_title="Quantum Computing Explained")`
 """

 _TOOL_EXAMPLES["generate_report"] = """
@ -471,7 +450,6 @@ _TOOL_EXAMPLES["web_search"] = """
 # All tool names that have prompt instructions (order matters for prompt readability)
 _ALL_TOOL_NAMES_ORDERED = [
    "search_surfsense_docs",
-    "search_knowledge_base",
    "web_search",
    "generate_podcast",
    "generate_video_presentation",
@ -650,87 +628,6 @@ However, from your video learning, it's important to note that asyncio is not su
 </citation_instructions>
 """

-# Sandbox / code execution instructions — appended when sandbox backend is enabled.
-# Inspired by Claude's computer-use prompt, scoped to code execution & data analytics.
-SANDBOX_EXECUTION_INSTRUCTIONS = """
-<code_execution>
-You have access to a secure, isolated Linux sandbox environment for running code and shell commands.
-This gives you the `execute` tool alongside the standard filesystem tools (`ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`).
-
-## CRITICAL — CODE-FIRST RULE
-
-ALWAYS prefer executing code over giving a text-only response when the user's request involves ANY of the following:
- **Creating a chart, plot, graph, or visualization** → Write Python code and generate the actual file. NEVER describe percentages or data in text and offer to "paste into Excel". Just produce the chart.
- **Data analysis, statistics, or computation** → Write code to compute the answer. Do not do math by hand in text.
- **Generating or transforming files** (CSV, PDF, images, etc.) → Write code to create the file.
- **Running, testing, or debugging code** → Execute it in the sandbox.
-
-This applies even when you first retrieve data from the knowledge base. After `search_knowledge_base` returns relevant data, **immediately proceed to write and execute code** if the user's request matches any of the categories above. Do NOT stop at a text summary and wait for the user to ask you to "use Python" — that extra round-trip is a poor experience.
-
-Example (CORRECT):
-  User: "Create a pie chart of my benefits"
-  → 1. search_knowledge_base → retrieve benefits data
-  → 2. Immediately execute Python code (matplotlib) to generate the pie chart
-  → 3. Return the downloadable file + brief description
-
-Example (WRONG):
-  User: "Create a pie chart of my benefits"
-  → 1. search_knowledge_base → retrieve benefits data
-  → 2. Print a text table with percentages and ask the user if they want a chart ← NEVER do this
-
-## When to Use Code Execution
-
-Use the sandbox when the task benefits from actually running code rather than just describing it:
- **Data analysis**: Load CSVs/JSON, compute statistics, filter/aggregate data, pivot tables
- **Visualization**: Generate charts and plots (matplotlib, plotly, seaborn)
- **Calculations**: Math, financial modeling, unit conversions, simulations
- **Code validation**: Run and test code snippets the user provides or asks about
- **File processing**: Parse, transform, or convert data files
- **Quick prototyping**: Demonstrate working code for the user's problem
- **Package exploration**: Install and test libraries the user is evaluating
-
-## When NOT to Use Code Execution
-
-Do not use the sandbox for:
- Answering factual questions from your own knowledge
- Summarizing or explaining concepts
- Simple formatting or text generation tasks
- Tasks that don't require running code to answer
-
-## Package Management
-
- Use `pip install <package>` to install Python packages as needed
- Common data/analytics packages (pandas, numpy, matplotlib, scipy, scikit-learn) may need to be installed on first use
- Always verify a package installed successfully before using it
-
-## Working Guidelines
-
- **Working directory**: The shell starts in the sandbox user's home directory (e.g. `/home/daytona`). Use **relative paths** or `/tmp/` for all files you create. NEVER write directly to `/home/` — that is the parent directory and is not writable. Use `pwd` if you need to discover the current working directory.
- **Iterative approach**: For complex tasks, break work into steps — write code, run it, check output, refine
- **Error handling**: If code fails, read the error, fix the issue, and retry. Don't just report the error without attempting a fix.
- **Show results**: When generating plots or outputs, present the key findings directly in your response. For plots, save to a file and describe the results.
- **Be efficient**: Install packages once per session. Combine related commands when possible.
- **Large outputs**: If command output is very large, use `head`, `tail`, or save to a file and read selectively.
-
-## Sharing Generated Files
-
-When your code creates output files (images, CSVs, PDFs, etc.) in the sandbox:
- **Print the absolute path** at the end of your script so the user can download the file. Example: `print("SANDBOX_FILE: /tmp/chart.png")`
- **DO NOT use markdown image syntax** for files created inside the sandbox. Sandbox files are not accessible via public URLs and will show "Image not available". The frontend automatically renders a download button from the `SANDBOX_FILE:` marker.
- You can output multiple files, one per line: `print("SANDBOX_FILE: /tmp/report.csv")`, `print("SANDBOX_FILE: /tmp/chart.png")`
- Always describe what the file contains in your response text so the user knows what they are downloading.
- IMPORTANT: Every `execute` call that saves a file MUST print the `SANDBOX_FILE: <path>` marker. Without it the user cannot download the file.
-
-## Data Analytics Best Practices
-
-When the user asks you to analyze data:
-1. First, inspect the data structure (`head`, `shape`, `dtypes`, `describe()`)
-2. Clean and validate before computing (handle nulls, check types)
-3. Perform the analysis and present results clearly
-4. Offer follow-up insights or visualizations when appropriate
-</code_execution>
-"""
-
 # Anti-citation prompt - used when citations are disabled
 # This explicitly tells the model NOT to include citations
 SURFSENSE_NO_CITATION_INSTRUCTIONS = """
@ -756,7 +653,6 @@ Your goal is to provide helpful, informative answers in a clean, readable format
 def build_surfsense_system_prompt(
    today: datetime | None = None,
    thread_visibility: ChatVisibility | None = None,
-    sandbox_enabled: bool = False,
    enabled_tool_names: set[str] | None = None,
    disabled_tool_names: set[str] | None = None,
 ) -> str:
@ -767,12 +663,10 @@ def build_surfsense_system_prompt(
    - Default system instructions
    - Tools instructions (only for enabled tools)
    - Citation instructions enabled
-    - Sandbox execution instructions (when sandbox_enabled=True)

    Args:
        today: Optional datetime for today's date (defaults to current UTC date)
        thread_visibility: Optional; when provided, used for conditional prompt (e.g. private vs shared memory wording). Defaults to private behavior when None.
-        sandbox_enabled: Whether the sandbox backend is active (adds code execution instructions).
        enabled_tool_names: Set of tool names actually bound to the agent. When None all tools are included.
        disabled_tool_names: Set of tool names the user explicitly disabled. Included as a note so the model can inform the user.

@ -786,13 +680,7 @@ def build_surfsense_system_prompt(
        visibility, enabled_tool_names, disabled_tool_names
    )
    citation_instructions = SURFSENSE_CITATION_INSTRUCTIONS
-    sandbox_instructions = SANDBOX_EXECUTION_INSTRUCTIONS if sandbox_enabled else ""
-    return (
-        system_instructions
-        + tools_instructions
-        + citation_instructions
-        + sandbox_instructions
-    )
+    return system_instructions + tools_instructions + citation_instructions


 def build_configurable_system_prompt(
@ -801,18 +689,16 @@ def build_configurable_system_prompt(
    citations_enabled: bool = True,
    today: datetime | None = None,
    thread_visibility: ChatVisibility | None = None,
-    sandbox_enabled: bool = False,
    enabled_tool_names: set[str] | None = None,
    disabled_tool_names: set[str] | None = None,
 ) -> str:
    """
    Build a configurable SurfSense system prompt based on NewLLMConfig settings.

-    The prompt is composed of up to four parts:
+    The prompt is composed of three parts:
    1. System Instructions - either custom or default SURFSENSE_SYSTEM_INSTRUCTIONS
    2. Tools Instructions - only for enabled tools, with a note about disabled ones
    3. Citation Instructions - either SURFSENSE_CITATION_INSTRUCTIONS or SURFSENSE_NO_CITATION_INSTRUCTIONS
-    4. Sandbox Execution Instructions - when sandbox_enabled=True

    Args:
        custom_system_instructions: Custom system instructions to use. If empty/None and
@ -824,7 +710,6 @@ def build_configurable_system_prompt(
                          anti-citation instructions (False).
        today: Optional datetime for today's date (defaults to current UTC date)
        thread_visibility: Optional; when provided, used for conditional prompt (e.g. private vs shared memory wording). Defaults to private behavior when None.
-        sandbox_enabled: Whether the sandbox backend is active (adds code execution instructions).
        enabled_tool_names: Set of tool names actually bound to the agent. When None all tools are included.
        disabled_tool_names: Set of tool names the user explicitly disabled. Included as a note so the model can inform the user.

@ -856,14 +741,7 @@ def build_configurable_system_prompt(
        else SURFSENSE_NO_CITATION_INSTRUCTIONS
    )

-    sandbox_instructions = SANDBOX_EXECUTION_INSTRUCTIONS if sandbox_enabled else ""
-
-    return (
-        system_instructions
-        + tools_instructions
-        + citation_instructions
-        + sandbox_instructions
-    )
+    return system_instructions + tools_instructions + citation_instructions


 def get_default_system_instructions() -> str:
--- a/surfsense_backend/app/agents/new_chat/tools/init.py
+++ b/surfsense_backend/app/agents/new_chat/tools/init.py
@ -5,7 +5,6 @@ This module contains all the tools available to the SurfSense agent.
 To add a new tool, see the documentation in registry.py.

 Available tools:
- search_knowledge_base: Search the user's personal knowledge base
 - search_surfsense_docs: Search Surfsense documentation for usage help
 - generate_podcast: Generate audio podcasts from content
 - generate_video_presentation: Generate video presentations with slides and narration
@ -20,7 +19,6 @@ Available tools:
 from .generate_image import create_generate_image_tool
 from .knowledge_base import (
    CONNECTOR_DESCRIPTIONS,
-    create_search_knowledge_base_tool,
    format_documents_for_context,
    search_knowledge_base_async,
 )
@ -52,7 +50,6 @@ __all__ = [
    "create_recall_memory_tool",
    "create_save_memory_tool",
    "create_scrape_webpage_tool",
-    "create_search_knowledge_base_tool",
    "create_search_surfsense_docs_tool",
    "format_documents_for_context",
    "get_all_tool_names",
--- a/surfsense_backend/app/agents/new_chat/tools/google_calendar/update_event.py
+++ b/surfsense_backend/app/agents/new_chat/tools/google_calendar/update_event.py
@ -273,9 +273,7 @@ def create_update_calendar_event_tool(
                    final_new_start_datetime, context
                )
            if final_new_end_datetime is not None:
-                update_body["end"] = _build_time_body(
-                    final_new_end_datetime, context
-                )
+                update_body["end"] = _build_time_body(final_new_end_datetime, context)
            if final_new_description is not None:
                update_body["description"] = final_new_description
            if final_new_location is not None:
--- a/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
+++ b/surfsense_backend/app/agents/new_chat/tools/knowledge_base.py
@ -5,7 +5,6 @@ This module provides:
 - Connector constants and normalization
 - Async knowledge base search across multiple connectors
 - Document formatting for LLM context
- Tool factory for creating search_knowledge_base tools
 """

 import asyncio
@ -16,8 +15,6 @@ import time
 from datetime import datetime
 from typing import Any

-from langchain_core.tools import StructuredTool
-from pydantic import BaseModel, Field
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import NATIVE_TO_LEGACY_DOCTYPE, shielded_async_session
@ -619,9 +616,76 @@ async def search_knowledge_base_async(
    perf = get_perf_logger()
    t0 = time.perf_counter()

+    deduplicated = await search_knowledge_base_raw_async(
+        query=query,
+        search_space_id=search_space_id,
+        db_session=db_session,
+        connector_service=connector_service,
+        connectors_to_search=connectors_to_search,
+        top_k=top_k,
+        start_date=start_date,
+        end_date=end_date,
+        available_connectors=available_connectors,
+        available_document_types=available_document_types,
+    )
+
+    if not deduplicated:
+        return "No documents found in the knowledge base. The search space has no indexed content yet."
+
+    # Use browse chunk cap for degenerate queries, otherwise adaptive chunking.
+    max_chunks_per_doc = (
+        _BROWSE_MAX_CHUNKS_PER_DOC if _is_degenerate_query(query) else 0
+    )
+    output_budget = _compute_tool_output_budget(max_input_tokens)
+    result = format_documents_for_context(
+        deduplicated,
+        max_chars=output_budget,
+        max_chunks_per_doc=max_chunks_per_doc,
+    )
+
+    if len(result) > output_budget:
+        perf.warning(
+            "[kb_search] output STILL exceeds budget after format (%d > %d), "
+            "hard truncation should have fired",
+            len(result),
+            output_budget,
+        )
+
+    perf.info(
+        "[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
+        "budget=%d max_input_tokens=%s space=%d",
+        time.perf_counter() - t0,
+        len(deduplicated),
+        len(deduplicated),
+        len(result),
+        output_budget,
+        max_input_tokens,
+        search_space_id,
+    )
+    return result
+
+
+async def search_knowledge_base_raw_async(
+    query: str,
+    search_space_id: int,
+    db_session: AsyncSession,
+    connector_service: ConnectorService,
+    connectors_to_search: list[str] | None = None,
+    top_k: int = 10,
+    start_date: datetime | None = None,
+    end_date: datetime | None = None,
+    available_connectors: list[str] | None = None,
+    available_document_types: list[str] | None = None,
+    query_embedding: list[float] | None = None,
+) -> list[dict[str, Any]]:
+    """Search knowledge base and return raw document dicts (no XML formatting)."""
+    perf = get_perf_logger()
+    t0 = time.perf_counter()
    all_documents: list[dict[str, Any]] = []

-    # Resolve date range (default last 2 years)
+    # Preserve the public signature for compatibility even if values are unused.
+    _ = (db_session, connector_service)
+
    from app.agents.new_chat.utils import resolve_date_range

    resolved_start_date, resolved_end_date = resolve_date_range(
@ -631,144 +695,76 @@ async def search_knowledge_base_async(

    connectors = _normalize_connectors(connectors_to_search, available_connectors)

-    # --- Optimization 1: skip connectors that have zero indexed documents ---
    if available_document_types:
        doc_types_set = set(available_document_types)
-        before_count = len(connectors)
        connectors = [
            c
            for c in connectors
            if c in doc_types_set
            or NATIVE_TO_LEGACY_DOCTYPE.get(c, "") in doc_types_set
        ]
-        skipped = before_count - len(connectors)
-        if skipped:
-            perf.info(
-                "[kb_search] skipped %d empty connectors (had %d, now %d)",
-                skipped,
-                before_count,
-                len(connectors),
-            )

-    perf.info(
-        "[kb_search] searching %d connectors: %s (space=%d, top_k=%d)",
-        len(connectors),
-        connectors[:5],
-        search_space_id,
-        top_k,
-    )
-
-    # --- Fast-path: no connectors left after filtering ---
    if not connectors:
-        perf.info(
-            "[kb_search] TOTAL in %.3fs — no connectors to search, returning empty",
-            time.perf_counter() - t0,
-        )
-        return "No documents found in the knowledge base. The search space has no indexed content yet."
+        return []

-    # --- Fast-path: degenerate queries (*, **, empty, etc.) ---
-    # Semantic embedding of '*' is noise and plainto_tsquery('english', '*')
-    # yields an empty tsquery, so both retrieval signals are useless.
-    # Fall back to a recency-ordered browse that returns diverse results.
    if _is_degenerate_query(query):
        perf.info(
-            "[kb_search] degenerate query %r detected - falling back to recency browse",
+            "[kb_search_raw] degenerate query %r detected - recency browse",
            query,
        )
        browse_connectors = connectors if connectors else [None]  # type: ignore[list-item]
-
        expanded_browse = []
-        for c in browse_connectors:
-            if c is not None and c in NATIVE_TO_LEGACY_DOCTYPE:
-                expanded_browse.append([c, NATIVE_TO_LEGACY_DOCTYPE[c]])
+        for connector in browse_connectors:
+            if connector is not None and connector in NATIVE_TO_LEGACY_DOCTYPE:
+                expanded_browse.append([connector, NATIVE_TO_LEGACY_DOCTYPE[connector]])
            else:
-                expanded_browse.append(c)
-
+                expanded_browse.append(connector)
        browse_results = await asyncio.gather(
            *[
                _browse_recent_documents(
-                    search_space_id=search_space_id,
-                    document_type=c,
-                    top_k=top_k,
-                    start_date=resolved_start_date,
-                    end_date=resolved_end_date,
-                )
-                for c in expanded_browse
-            ]
-        )
-        for docs in browse_results:
-            all_documents.extend(docs)
-
-        # Skip dedup + formatting below (browse already returns unique docs)
-        # but still cap output budget.
-        output_budget = _compute_tool_output_budget(max_input_tokens)
-        result = format_documents_for_context(
-            all_documents,
-            max_chars=output_budget,
-            max_chunks_per_doc=_BROWSE_MAX_CHUNKS_PER_DOC,
-        )
-        perf.info(
-            "[kb_search] TOTAL (browse) in %.3fs total_docs=%d output_chars=%d "
-            "budget=%d space=%d",
-            time.perf_counter() - t0,
-            len(all_documents),
-            len(result),
-            output_budget,
-            search_space_id,
-        )
-        return result
-
-    # --- Optimization 2: compute the query embedding once, share across all local searches ---
-    from app.config import config as app_config
-
-    t_embed = time.perf_counter()
-    precomputed_embedding = app_config.embedding_model_instance.embed(query)
-    perf.info(
-        "[kb_search] shared embedding computed in %.3fs",
-        time.perf_counter() - t_embed,
-    )
-
-    max_parallel_searches = 4
-    semaphore = asyncio.Semaphore(max_parallel_searches)
-
-    async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
-        try:
-            t_conn = time.perf_counter()
-            async with semaphore, shielded_async_session() as isolated_session:
-                svc = ConnectorService(isolated_session, search_space_id)
-                chunks = await svc._combined_rrf_search(
-                    query_text=query,
                    search_space_id=search_space_id,
                    document_type=connector,
                    top_k=top_k,
                    start_date=resolved_start_date,
                    end_date=resolved_end_date,
-                    query_embedding=precomputed_embedding,
                )
-                perf.info(
-                    "[kb_search] connector=%s results=%d in %.3fs",
-                    connector,
-                    len(chunks),
-                    time.perf_counter() - t_conn,
-                )
-                return chunks
-        except Exception as e:
-            perf.warning("[kb_search] connector=%s FAILED: %s", connector, e)
-            return []
+                for connector in expanded_browse
+            ]
+        )
+        for docs in browse_results:
+            all_documents.extend(docs)
+    else:
+        if query_embedding is None:
+            from app.config import config as app_config

-    t_gather = time.perf_counter()
-    connector_results = await asyncio.gather(
-        *[_search_one_connector(connector) for connector in connectors]
-    )
-    perf.info(
-        "[kb_search] all connectors gathered in %.3fs",
-        time.perf_counter() - t_gather,
-    )
-    for chunks in connector_results:
-        all_documents.extend(chunks)
+            query_embedding = app_config.embedding_model_instance.embed(query)
+
+        max_parallel_searches = 4
+        semaphore = asyncio.Semaphore(max_parallel_searches)
+
+        async def _search_one_connector(connector: str) -> list[dict[str, Any]]:
+            try:
+                async with semaphore, shielded_async_session() as isolated_session:
+                    svc = ConnectorService(isolated_session, search_space_id)
+                    return await svc._combined_rrf_search(
+                        query_text=query,
+                        search_space_id=search_space_id,
+                        document_type=connector,
+                        top_k=top_k,
+                        start_date=resolved_start_date,
+                        end_date=resolved_end_date,
+                        query_embedding=query_embedding,
+                    )
+            except Exception as exc:
+                perf.warning("[kb_search_raw] connector=%s FAILED: %s", connector, exc)
+                return []
+
+        connector_results = await asyncio.gather(
+            *[_search_one_connector(connector) for connector in connectors]
+        )
+        for docs in connector_results:
+            all_documents.extend(docs)

-    # Deduplicate primarily by document ID. Only fall back to content hashing
-    # when a document has no ID.
    seen_doc_ids: set[Any] = set()
    seen_content_hashes: set[int] = set()
    deduplicated: list[dict[str, Any]] = []
@ -785,7 +781,6 @@ async def search_knowledge_base_async(
                    chunk_texts.append(chunk_content)
            if chunk_texts:
                return hash("||".join(chunk_texts))
-
        flat_content = (document.get("content") or "").strip()
        if flat_content:
            return hash(flat_content)
@ -793,216 +788,24 @@ async def search_knowledge_base_async(

    for doc in all_documents:
        doc_id = (doc.get("document", {}) or {}).get("id")
-
        if doc_id is not None:
            if doc_id in seen_doc_ids:
                continue
            seen_doc_ids.add(doc_id)
            deduplicated.append(doc)
            continue
-
        content_hash = _content_fingerprint(doc)
+        if content_hash is not None and content_hash in seen_content_hashes:
+            continue
        if content_hash is not None:
-            if content_hash in seen_content_hashes:
-                continue
            seen_content_hashes.add(content_hash)
-
        deduplicated.append(doc)

-    # Sort by RRF score so the most relevant documents from ANY connector
-    # appear first, preventing budget truncation from hiding top results.
-    deduplicated.sort(key=lambda d: d.get("score", 0), reverse=True)
-
-    output_budget = _compute_tool_output_budget(max_input_tokens)
-    result = format_documents_for_context(deduplicated, max_chars=output_budget)
-
-    if len(result) > output_budget:
-        perf.warning(
-            "[kb_search] output STILL exceeds budget after format (%d > %d), "
-            "hard truncation should have fired",
-            len(result),
-            output_budget,
-        )
-
+    deduplicated.sort(key=lambda doc: doc.get("score", 0), reverse=True)
    perf.info(
-        "[kb_search] TOTAL in %.3fs total_docs=%d deduped=%d output_chars=%d "
-        "budget=%d max_input_tokens=%s space=%d",
+        "[kb_search_raw] done in %.3fs total=%d deduped=%d",
        time.perf_counter() - t0,
        len(all_documents),
        len(deduplicated),
-        len(result),
-        output_budget,
-        max_input_tokens,
-        search_space_id,
    )
-    return result
-
-
-def _build_connector_docstring(available_connectors: list[str] | None) -> str:
-    """
-    Build the connector documentation section for the tool docstring.
-
-    Args:
-        available_connectors: List of available connector types, or None for all
-
-    Returns:
-        Formatted docstring section listing available connectors
-    """
-    connectors = available_connectors if available_connectors else list(_ALL_CONNECTORS)
-
-    lines = []
-    for connector in connectors:
-        # Skip internal names, prefer user-facing aliases
-        if connector == "CRAWLED_URL":
-            # Show as WEBCRAWLER_CONNECTOR for user-facing docs
-            description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
-            lines.append(f"- WEBCRAWLER_CONNECTOR: {description}")
-        else:
-            description = CONNECTOR_DESCRIPTIONS.get(connector, connector)
-            lines.append(f"- {connector}: {description}")
-
-    return "\n".join(lines)
-
-
-# =============================================================================
-# Tool Input Schema
-# =============================================================================
-
-
-class SearchKnowledgeBaseInput(BaseModel):
-    """Input schema for the search_knowledge_base tool."""
-
-    query: str = Field(
-        description=(
-            "The search query - use specific natural language terms. "
-            "NEVER use wildcards like '*' or '**'; instead describe what you want "
-            "(e.g. 'recent meeting notes' or 'project architecture overview')."
-        ),
-    )
-    top_k: int = Field(
-        default=10,
-        description="Number of results to retrieve (default: 10). Keep ≤20 for focused searches.",
-    )
-    start_date: str | None = Field(
-        default=None,
-        description="Optional ISO date/datetime (e.g. '2025-12-12' or '2025-12-12T00:00:00+00:00')",
-    )
-    end_date: str | None = Field(
-        default=None,
-        description="Optional ISO date/datetime (e.g. '2025-12-19' or '2025-12-19T23:59:59+00:00')",
-    )
-    connectors_to_search: list[str] | None = Field(
-        default=None,
-        description="Optional list of connector enums to search. If omitted, searches all available.",
-    )
-
-
-def create_search_knowledge_base_tool(
-    search_space_id: int,
-    db_session: AsyncSession,
-    connector_service: ConnectorService,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    max_input_tokens: int | None = None,
-) -> StructuredTool:
-    """
-    Factory function to create the search_knowledge_base tool with injected dependencies.
-
-    Args:
-        search_space_id: The user's search space ID
-        db_session: Database session
-        connector_service: Initialized connector service
-        available_connectors: Optional list of connector types available in the search space.
-                            Used to dynamically generate the tool docstring.
-        available_document_types: Optional list of document types that have data in the search space.
-                                Used to inform the LLM about what data exists.
-        max_input_tokens: Model context window (tokens) from litellm model info.
-                         Used to dynamically size tool output.
-
-    Returns:
-        A configured StructuredTool instance
-    """
-    # Build connector documentation dynamically
-    connector_docs = _build_connector_docstring(available_connectors)
-
-    # Build context about available document types
-    doc_types_info = ""
-    if available_document_types:
-        doc_types_info = f"""
-
-## Document types with indexed content in this search space
-
-The following document types have content available for search:
-{", ".join(available_document_types)}
-
-Focus searches on these types for best results."""
-
-    # Build the dynamic description for the tool
-    # This is what the LLM sees when deciding whether/how to use the tool
-    dynamic_description = f"""Search the user's personal knowledge base for relevant information.
-
-Use this tool to find documents, notes, files, web pages, and other content the user has indexed.
-This searches ONLY local/indexed data (uploaded files, Notion, Slack, browser extension captures, etc.).
-For real-time web search (current events, news, live data), use the `web_search` tool instead.
-
-IMPORTANT:
- Always craft specific, descriptive search queries using natural language keywords.
-  Good: "quarterly sales report Q3", "Python API authentication design".
-  Bad: "*", "**", "everything", single characters. Wildcard/empty queries yield poor results.
- Prefer multiple focused searches over a single broad one with high top_k.
- If the user requests a specific source type (e.g. "my notes", "Slack messages"), pass `connectors_to_search=[...]` using the enums below.
- If `connectors_to_search` is omitted/empty, the system will search broadly.
- Only connectors that are enabled/configured for this search space are available.{doc_types_info}
-
-## Available connector enums for `connectors_to_search`
-
-{connector_docs}
-
-NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`."""
-
-    # Capture for closure
-    _available_connectors = available_connectors
-    _available_document_types = available_document_types
-
-    async def _search_knowledge_base_impl(
-        query: str,
-        top_k: int = 10,
-        start_date: str | None = None,
-        end_date: str | None = None,
-        connectors_to_search: list[str] | None = None,
-    ) -> str:
-        """Implementation function for knowledge base search."""
-        from app.agents.new_chat.utils import parse_date_or_datetime
-
-        parsed_start: datetime | None = None
-        parsed_end: datetime | None = None
-
-        if start_date:
-            parsed_start = parse_date_or_datetime(start_date)
-        if end_date:
-            parsed_end = parse_date_or_datetime(end_date)
-
-        return await search_knowledge_base_async(
-            query=query,
-            search_space_id=search_space_id,
-            db_session=db_session,
-            connector_service=connector_service,
-            connectors_to_search=connectors_to_search,
-            top_k=top_k,
-            start_date=parsed_start,
-            end_date=parsed_end,
-            available_connectors=_available_connectors,
-            available_document_types=_available_document_types,
-            max_input_tokens=max_input_tokens,
-        )
-
-    # Create StructuredTool with dynamic description
-    # This properly sets the description that the LLM sees
-    tool = StructuredTool(
-        name="search_knowledge_base",
-        description=dynamic_description,
-        coroutine=_search_knowledge_base_impl,
-        args_schema=SearchKnowledgeBaseInput,
-    )
-
-    return tool
+    return deduplicated
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -71,7 +71,6 @@ from .jira import (
    create_delete_jira_issue_tool,
    create_update_jira_issue_tool,
 )
-from .knowledge_base import create_search_knowledge_base_tool
 from .linear import (
    create_create_linear_issue_tool,
    create_delete_linear_issue_tool,
@ -128,23 +127,6 @@ class ToolDefinition:
 # Registry of all built-in tools
 # Contributors: Add your new tools here!
 BUILTIN_TOOLS: list[ToolDefinition] = [
-    # Core tool - searches the user's knowledge base
-    # Now supports dynamic connector/document type discovery
-    ToolDefinition(
-        name="search_knowledge_base",
-        description="Search the user's personal knowledge base for relevant information",
-        factory=lambda deps: create_search_knowledge_base_tool(
-            search_space_id=deps["search_space_id"],
-            db_session=deps["db_session"],
-            connector_service=deps["connector_service"],
-            # Optional: dynamically discovered connectors/document types
-            available_connectors=deps.get("available_connectors"),
-            available_document_types=deps.get("available_document_types"),
-            max_input_tokens=deps.get("max_input_tokens"),
-        ),
-        requires=["search_space_id", "db_session", "connector_service"],
-        # Note: available_connectors and available_document_types are optional
-    ),
    # Podcast generation tool
    ToolDefinition(
        name="generate_podcast",
@ -168,8 +150,8 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
        requires=["search_space_id", "db_session", "thread_id"],
    ),
    # Report generation tool (inline, short-lived sessions for DB ops)
-    # Supports internal KB search via source_strategy so the agent doesn't
-    # need to call search_knowledge_base separately before generating.
+    # Supports internal KB search via source_strategy so the agent does not
+    # need a separate search step before generating.
    ToolDefinition(
        name="generate_report",
        description="Generate a structured report from provided content and export it",
@ -551,7 +533,7 @@ def build_tools(
        tools = build_tools(deps)

        # Use only specific tools
-        tools = build_tools(deps, enabled_tools=["search_knowledge_base"])
+        tools = build_tools(deps, enabled_tools=["generate_report"])

        # Use defaults but disable podcast
        tools = build_tools(deps, disabled_tools=["generate_podcast"])
--- a/surfsense_backend/app/agents/new_chat/tools/report.py
+++ b/surfsense_backend/app/agents/new_chat/tools/report.py
@ -584,8 +584,8 @@ def create_generate_report_tool(
        search_space_id: The user's search space ID
        thread_id: The chat thread ID for associating the report
        connector_service: Optional connector service for internal KB search.
-            When provided, the tool can search the knowledge base without the
-            agent having to call search_knowledge_base separately.
+            When provided, the tool can search the knowledge base internally
+            (used by the "kb_search" and "auto" source strategies).
        available_connectors: Optional list of connector types available in the
            search space (used to scope internal KB searches).

@ -639,12 +639,13 @@ def create_generate_report_tool(

        SOURCE STRATEGY (how to collect source material):
        - source_strategy="conversation" — The conversation already has
-          enough context (prior Q&A, pasted text, uploaded files, scraped
-          webpages). Pass a thorough summary as source_content.
-          NEVER call search_knowledge_base separately first.
+          enough context (prior Q&A, filesystem exploration, pasted text,
+          uploaded files, scraped webpages). Pass a thorough summary as
+          source_content.
        - source_strategy="kb_search" — Search the knowledge base
          internally. Provide 1-5 targeted search_queries. The tool
-          handles searching — do NOT call search_knowledge_base first.
+          handles searching internally — do NOT manually read and dump
+          /documents/ files into source_content.
        - source_strategy="provided" — Use only what is in source_content
          (default, backward-compatible).
        - source_strategy="auto" — Use source_content if it has enough
@ -1064,6 +1065,7 @@ def create_generate_report_tool(
                "title": topic,
                "word_count": metadata.get("word_count", 0),
                "is_revision": bool(parent_report_content),
+                "report_markdown": report_content,
                "message": f"Report generated successfully: {topic}",
            }