diff --git a/docker/.env.example b/docker/.env.example
index 63308bc9e..3ccf86431 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -394,7 +394,6 @@ SURFSENSE_ENABLE_TOOL_CALL_REPAIR=true
 SURFSENSE_ENABLE_BUSY_MUTEX=true
 SURFSENSE_ENABLE_SKILLS=true
 SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=true
-SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=true
 SURFSENSE_ENABLE_ACTION_LOG=true
 SURFSENSE_ENABLE_REVERT_ROUTE=true
 SURFSENSE_ENABLE_PERMISSION=true
diff --git a/docs/adr/0001-rag-citation-and-context-architecture.md b/docs/adr/0001-rag-citation-and-context-architecture.md
index 688df2d1a..c377880eb 100644
--- a/docs/adr/0001-rag-citation-and-context-architecture.md
+++ b/docs/adr/0001-rag-citation-and-context-architecture.md
@@ -379,13 +379,18 @@ the ambient plane.
 Remove from the hot path:
 
 - `KnowledgePriorityMiddleware` search branch (planner LLM, embedding, hybrid
-  search in `before_agent`).
+  search in `before_agent`). ✅ **Done** — the whole `knowledge_search.py`
+  module is deleted.
 - `fetch_mentioned_documents` eager chunk pull.
 - `<priority_documents>` pre-injection and `KbContextProjectionMiddleware`
-  priority projection.
+  priority projection. ✅ **Done** — `<priority_documents>` is no longer
+  produced anywhere; `KbContextProjectionMiddleware` is trimmed to a pure
+  `<workspace_tree>` projector. The `enable_kb_priority_preinjection` flag and
+  every `<priority_documents>` prompt reference are removed.
 - `kb_priority` state plumbing (deleted per §8.10; add a dedicated
-  `citation_registry` field instead). `kb_matched_chunk_ids` is already gone
-  (build-order Step 5).
+  `citation_registry` field instead). ✅ **Done** — `kb_priority` /
+  `KbPriorityEntry` are removed from state + reducers. `kb_matched_chunk_ids`
+  is already gone (build-order Step 5).
 
 Keep / add:
 
@@ -486,11 +491,15 @@ behavior tests, and the on-contract prompt `base/citation_contract.md`
 4. **Mentions → scope.** Map `@document`/`@folder` mentions to
    `SearchScope(document_ids=…)` for the tool; retire `kb_priority` mention
    surfacing.
-5. **Remove the old eager path.** Retire `KnowledgePriorityMiddleware`,
-   `kb_context_projection`, and the old `search_knowledge_base` hybrid helper in
-   `knowledge_search.py`; later `ChucksHybridSearchRetriever` (after migrating
-   `ConnectorService`). Migrate `web_search` to register `WEB_RESULT` so all
-   citations unify on `[n]` — **done**, see §12 build-order Step 6.
+5. **Remove the old eager path.** ✅ **Done** — `KnowledgePriorityMiddleware`
+   and the old `search_knowledge_base` hybrid helper in `knowledge_search.py`
+   are deleted (the whole module is gone); `kb_context_projection` is trimmed to
+   a tree-only projector (kept because it still projects `<workspace_tree>` to
+   subagents); `kb_priority` state + the `enable_kb_priority_preinjection` flag +
+   all `<priority_documents>` prompt references are removed. Still pending:
+   `ChucksHybridSearchRetriever` (after migrating `ConnectorService`). Migrate
+   `web_search` to register `WEB_RESULT` so all citations unify on `[n]` —
+   **done**, see §12 build-order Step 6.
 
 ---
 
diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index 6a8f991e4..e49da8b87 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -416,14 +416,6 @@ LANGSMITH_PROJECT=surfsense
 # Skills + subagents
 # SURFSENSE_ENABLE_SKILLS=false
 # SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=false
-# SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=false
-
-# KB retrieval mode (default OFF = lazy). When OFF, the main agent retrieves
-# KB content on demand via the `search_knowledge_base` tool and skips the
-# expensive per-turn pre-injection (planner LLM + embed + hybrid search,
-# ~2.3s); explicit @-mentions are still surfaced cheaply. Set to true to
-# restore the original eager `<priority_documents>` pre-injection.
-# SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION=false
 
 # Snapshot / revert
 # SURFSENSE_ENABLE_ACTION_LOG=false
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
index d29c31230..2bae0742a 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/middleware/anonymous_document/middleware.py
@@ -6,8 +6,6 @@ read-only). This middleware loads it once on the first turn into
 
 * :class:`KnowledgeTreeMiddleware` can render the synthetic ``/documents``
   view without touching the DB.
-* :class:`KnowledgePriorityMiddleware` skips hybrid search and emits a
-  degenerate priority list.
 * :class:`KBPostgresBackend` (``als_info`` / ``aread`` / ``_load_file_data``)
   recognises the synthetic path.
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
index 3dce76981..6c47b03a9 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/private.md
@@ -8,11 +8,6 @@ standing instructions. It also reports current character usage versus the
 hard limit so you can manage the budget. Treat it as background colour for
 your answer, not as the task itself.
 
-`<priority_documents>` lists the workspace documents most relevant to the
-latest user message, ranked by relevance score, with `[USER-MENTIONED]`
-flagged on anything the user explicitly referenced. When the task is about
-workspace content, read these first.
-
 `<workspace_tree>` shows the full `/documents/` folder and file layout. Use
 it to resolve paths the user describes in natural language ("my Q2 roadmap",
 "last week's meeting notes") into concrete document references before
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
index 7657af663..fcce98fd0 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/dynamic_context/team.md
@@ -7,11 +7,6 @@ decisions, conventions, architecture notes, processes, key facts. It also
 reports current character usage versus the hard limit so you can manage the
 budget. Treat it as background colour for your answer, not as the task itself.
 
-`<priority_documents>` lists the workspace documents most relevant to the
-latest user message, ranked by relevance score, with `[USER-MENTIONED]`
-flagged on anything someone in the thread explicitly referenced. When the
-task is about workspace content, read these first.
-
 `<workspace_tree>` shows the full `/documents/` folder and file layout. Use
 it to resolve paths described in natural language ("the Q2 roadmap", "last
 week's planning notes") into concrete document references before delegating
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
index 32ed959c1..2539becce 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/main_agent/system_prompt/prompts/providers/google.md
@@ -14,5 +14,5 @@ Workflow (Understand → Plan → Act → Verify):
 
 Discipline:
 - Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
-- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>` or `<priority_documents>`. Otherwise describe the document in natural language and let the subagent resolve it.
+- Pass paths to **task(knowledge_base, …)** only when you saw them in `<workspace_tree>`. Otherwise describe the document in natural language and let the subagent resolve it.
 </provider_hints>
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
index f5233c7d3..91ee2a4c6 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/feature_flags.py
@@ -53,14 +53,6 @@ class AgentFeatureFlags:
     # Skills + subagents
     enable_skills: bool = True
     enable_specialized_subagents: bool = True
-    enable_kb_planner_runnable: bool = True
-
-    # KB retrieval mode — when False (default), the main agent retrieves KB
-    # content lazily via the on-demand ``search_knowledge_base`` tool and the
-    # expensive per-turn pre-injection (planner LLM + embed + hybrid search,
-    # ~2.3s) is skipped; explicit @-mentions are still surfaced cheaply. Set
-    # True to restore the original eager ``<priority_documents>`` pre-injection.
-    enable_kb_priority_preinjection: bool = False
 
     # Snapshot / revert
     enable_action_log: bool = True
@@ -118,9 +110,6 @@ class AgentFeatureFlags:
                 enable_llm_tool_selector=False,
                 enable_skills=False,
                 enable_specialized_subagents=False,
-                enable_kb_planner_runnable=False,
-                # Full rollback restores the original eager KB pre-injection.
-                enable_kb_priority_preinjection=True,
                 enable_action_log=False,
                 enable_revert_route=False,
                 enable_plugin_loader=False,
@@ -156,12 +145,6 @@ class AgentFeatureFlags:
             enable_specialized_subagents=_env_bool(
                 "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", True
             ),
-            enable_kb_planner_runnable=_env_bool(
-                "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", True
-            ),
-            enable_kb_priority_preinjection=_env_bool(
-                "SURFSENSE_ENABLE_KB_PRIORITY_PREINJECTION", False
-            ),
             # Snapshot / revert
             enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True),
             enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True),
@@ -198,7 +181,6 @@ class AgentFeatureFlags:
                 self.enable_llm_tool_selector,
                 self.enable_skills,
                 self.enable_specialized_subagents,
-                self.enable_kb_planner_runnable,
                 self.enable_action_log,
                 self.enable_revert_route,
                 self.enable_plugin_loader,
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
index 3366ac601..1520668ad 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/cloud.py
@@ -44,12 +44,6 @@ to page through a large document. Cite a passage by writing its `[n]` after the
 statement it supports — the same `[n]` that passage had in
 `search_knowledge_base` results.
 
-## Priority List
-
-You receive a `<priority_documents>` system message each turn listing the
-top-K paths most relevant to the user's query (by hybrid search). Read those
-first.
-
 ## Workspace Tree
 
 You receive a `<workspace_tree>` system message each turn with the current
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
index 712b51c26..d4cae99f0 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/filesystem/system_prompt/desktop.py
@@ -37,13 +37,4 @@ directory (`cwd`).
 - Cross-mount moves are not supported.
 - Desktop deletes hit disk immediately and cannot be undone via the
   agent's revert flow — confirm before calling `rm`/`rmdir`.
-
-## Priority List
-
-You may receive a `<priority_documents>` system message listing the top-K
-documents from the user's SurfSense knowledge base — these are cloud-ingested
-via connectors (Notion, Slack, etc.), not local files. Treat it as a hint:
-consult it when the task spans both local and cloud sources (e.g. drafting a
-local note from a Notion summary); skip when the task is purely about local
-files.
 """
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
index 4667441ab..f15c918be 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/kb_context_projection.py
@@ -1,4 +1,4 @@
-"""Project ``workspace_tree_text`` + ``kb_priority`` from state into SystemMessages."""
+"""Project ``workspace_tree_text`` from state into a SystemMessage."""
 
 from __future__ import annotations
 
@@ -14,18 +14,15 @@ from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
 )
 from app.utils.perf import get_perf_logger
 
-from .knowledge_search import _render_priority_message
-
 _perf_log = get_perf_logger()
 
 
 class KbContextProjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
-    """Emit ``<workspace_tree>`` + ``<priority_documents>`` from shared state.
+    """Emit the ``<workspace_tree>`` from shared state.
 
     Read-only consumer: no DB, no LLM, no state writes. The orchestrator's
-    renderer middlewares populate the source fields; this projection lets any
-    agent (orchestrator or subagent) put the same content in front of its
-    own LLM call.
+    ``KnowledgeTreeMiddleware`` populates ``workspace_tree_text``; this
+    projection lets a subagent put the same tree in front of its own LLM call.
     """
 
     tools = ()
@@ -39,28 +36,19 @@ class KbContextProjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
         del runtime
         start = time.perf_counter()
         tree_text = state.get("workspace_tree_text")
-        priority = state.get("kb_priority")
-        if not tree_text and not priority:
+        if not tree_text:
             _perf_log.info(
-                "[kb_context_projection] tree=0 priority=0 elapsed=%.3fs",
+                "[kb_context_projection] tree=0 elapsed=%.3fs",
                 time.perf_counter() - start,
             )
             return None
 
         messages = list(state.get("messages") or [])
         insert_at = max(len(messages) - 1, 0)
-        tree_chars = 0
-        if tree_text:
-            tree_chars = len(tree_text)
-            messages.insert(insert_at, SystemMessage(content=tree_text))
-        priority_count = 0
-        if priority:
-            priority_count = len(priority) if hasattr(priority, "__len__") else 1
-            messages.insert(insert_at, _render_priority_message(priority))
+        messages.insert(insert_at, SystemMessage(content=tree_text))
         _perf_log.info(
-            "[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs",
-            tree_chars,
-            priority_count,
+            "[kb_context_projection] tree_chars=%d elapsed=%.3fs",
+            len(tree_text),
             time.perf_counter() - start,
         )
         return {"messages": messages}
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
deleted file mode 100644
index efb85a785..000000000
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/middleware/knowledge_search.py
+++ /dev/null
@@ -1,1075 +0,0 @@
-"""Hybrid-search priority middleware for the SurfSense new chat agent.
-
-This middleware runs ``before_agent`` on every turn and writes:
-
-* ``state["kb_priority"]`` — the top-K most relevant documents for the
-  current user message, used to render a ``<priority_documents>`` system
-  message immediately before the user turn.
-
-The previous "scoped filesystem" behaviour (synthetic ``ls`` + state
-``files`` seeding) is intentionally removed: documents are now lazy-loaded
-from Postgres on demand, with the full workspace tree rendered separately
-by :class:`KnowledgeTreeMiddleware`.
-
-In anonymous mode the middleware skips hybrid search entirely and emits a
-single-entry priority list pointing at the Redis-loaded document
-(``state["kb_anon_doc"]``).
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import re
-import time
-from collections.abc import Sequence
-from datetime import UTC, datetime
-from typing import Any
-
-from langchain.agents import create_agent
-from langchain.agents.middleware import AgentMiddleware, AgentState
-from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
-from langchain_core.runnables import Runnable
-from langgraph.runtime import Runtime
-from litellm import token_counter
-from pydantic import BaseModel, Field, ValidationError
-from sqlalchemy import select
-
-from app.agents.chat.multi_agent_chat.shared.date_filters import (
-    parse_date_or_datetime,
-    resolve_date_range,
-)
-from app.agents.chat.multi_agent_chat.shared.feature_flags import get_flags
-from app.agents.chat.multi_agent_chat.shared.filesystem_selection import FilesystemMode
-from app.agents.chat.multi_agent_chat.shared.state.filesystem_state import (
-    SurfSenseFilesystemState,
-)
-from app.agents.chat.runtime.path_resolver import (
-    PathIndex,
-    build_path_index,
-    doc_to_virtual_path,
-)
-from app.db import (
-    NATIVE_TO_LEGACY_DOCTYPE,
-    Chunk,
-    Document,
-    Folder,
-    shielded_async_session,
-)
-from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
-from app.utils.document_converters import embed_texts
-from app.utils.perf import get_perf_logger
-
-logger = logging.getLogger(__name__)
-_perf_log = get_perf_logger()
-
-
-class KBSearchPlan(BaseModel):
-    """Structured internal plan for KB retrieval."""
-
-    optimized_query: str = Field(
-        min_length=1,
-        description="Optimized retrieval query preserving the user's intent.",
-    )
-    start_date: str | None = Field(
-        default=None,
-        description="Optional ISO start date or datetime for KB search filtering.",
-    )
-    end_date: str | None = Field(
-        default=None,
-        description="Optional ISO end date or datetime for KB search filtering.",
-    )
-    is_recency_query: bool = Field(
-        default=False,
-        description=(
-            "True when the user's intent is primarily about recency or temporal "
-            "ordering (e.g. 'latest', 'newest', 'most recent', 'last uploaded') "
-            "rather than topical relevance."
-        ),
-    )
-
-
-def _extract_text_from_message(message: BaseMessage) -> str:
-    content = getattr(message, "content", "")
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict) and item.get("type") == "text":
-                parts.append(str(item.get("text", "")))
-        return "\n".join(p for p in parts if p)
-    return str(content)
-
-
-def _render_recent_conversation(
-    messages: Sequence[BaseMessage],
-    *,
-    llm: BaseChatModel | None = None,
-    user_text: str = "",
-    max_messages: int = 6,
-) -> str:
-    """Render recent dialogue for internal planning under a token budget.
-
-    Filters to ``HumanMessage`` and ``AIMessage`` (without tool_calls) so that
-    injected ``SystemMessage`` artefacts (priority list, workspace tree,
-    file-write contract) don't pollute the planner prompt.
-    """
-    rendered: list[tuple[str, str]] = []
-    for message in messages:
-        role: str | None = None
-        if isinstance(message, HumanMessage):
-            role = "user"
-        elif isinstance(message, AIMessage):
-            if getattr(message, "tool_calls", None):
-                continue
-            role = "assistant"
-        else:
-            continue
-
-        text = _extract_text_from_message(message).strip()
-        if not text:
-            continue
-        text = re.sub(r"\s+", " ", text)
-        rendered.append((role, text))
-
-    if not rendered:
-        return ""
-
-    if rendered and rendered[-1][0] == "user" and rendered[-1][1] == user_text.strip():
-        rendered = rendered[:-1]
-
-    if not rendered:
-        return ""
-
-    def _legacy_render() -> str:
-        legacy_lines: list[str] = []
-        for role, text in rendered[-max_messages:]:
-            clipped = text[:400].rstrip() + "..." if len(text) > 400 else text
-            legacy_lines.append(f"{role}: {clipped}")
-        return "\n".join(legacy_lines)
-
-    def _count_prompt_tokens(conversation_text: str) -> int | None:
-        prompt = _build_kb_planner_prompt(
-            recent_conversation=conversation_text or "(none)",
-            user_text=user_text,
-        )
-        message_payload = [{"role": "user", "content": prompt}]
-
-        count_fn = getattr(llm, "_count_tokens", None) if llm is not None else None
-        if callable(count_fn):
-            try:
-                return count_fn(message_payload)
-            except Exception:
-                pass
-
-        profile = getattr(llm, "profile", None) if llm is not None else None
-        model_names: list[str] = []
-        if isinstance(profile, dict):
-            tcms = profile.get("token_count_models")
-            if isinstance(tcms, list):
-                model_names.extend(
-                    name for name in tcms if isinstance(name, str) and name
-                )
-            tcm = profile.get("token_count_model")
-            if isinstance(tcm, str) and tcm and tcm not in model_names:
-                model_names.append(tcm)
-        model_name = model_names[0] if model_names else getattr(llm, "model", None)
-        if not isinstance(model_name, str) or not model_name:
-            return None
-        try:
-            return token_counter(messages=message_payload, model=model_name)
-        except Exception:
-            return None
-
-    get_max_input_tokens = getattr(llm, "_get_max_input_tokens", None) if llm else None
-    if callable(get_max_input_tokens):
-        try:
-            max_input_tokens = int(get_max_input_tokens())
-        except Exception:
-            max_input_tokens = None
-    else:
-        profile = getattr(llm, "profile", None) if llm is not None else None
-        max_input_tokens = (
-            profile.get("max_input_tokens")
-            if isinstance(profile, dict)
-            and isinstance(profile.get("max_input_tokens"), int)
-            else None
-        )
-
-    if not isinstance(max_input_tokens, int) or max_input_tokens <= 0:
-        return _legacy_render()
-
-    output_reserve = min(max(int(max_input_tokens * 0.02), 256), 1024)
-    budget = max_input_tokens - output_reserve
-    if budget <= 0:
-        return _legacy_render()
-
-    selected_lines: list[str] = []
-    for role, text in reversed(rendered):
-        candidate_line = f"{role}: {text}"
-        candidate_lines = [candidate_line, *selected_lines]
-        candidate_conversation = "\n".join(candidate_lines)
-        token_count = _count_prompt_tokens(candidate_conversation)
-        if token_count is None:
-            return _legacy_render()
-        if token_count <= budget:
-            selected_lines = candidate_lines
-            continue
-
-        lo, hi = 1, len(text)
-        best_line: str | None = None
-        while lo <= hi:
-            mid = (lo + hi) // 2
-            clipped_text = text[:mid].rstrip() + "..."
-            clipped_line = f"{role}: {clipped_text}"
-            clipped_conversation = "\n".join([clipped_line, *selected_lines])
-            clipped_tokens = _count_prompt_tokens(clipped_conversation)
-            if clipped_tokens is None:
-                break
-            if clipped_tokens <= budget:
-                best_line = clipped_line
-                lo = mid + 1
-            else:
-                hi = mid - 1
-
-        if best_line is not None:
-            selected_lines = [best_line, *selected_lines]
-        break
-
-    if not selected_lines:
-        return _legacy_render()
-
-    return "\n".join(selected_lines)
-
-
-def _build_kb_planner_prompt(
-    *,
-    recent_conversation: str,
-    user_text: str,
-) -> str:
-    today = datetime.now(UTC).date().isoformat()
-    return (
-        "You optimize internal knowledge-base search inputs for document retrieval.\n"
-        "Return JSON only with this exact shape:\n"
-        '{"optimized_query":"string","start_date":"ISO string or null","end_date":"ISO string or null","is_recency_query":bool}\n\n'
-        "Rules:\n"
-        "- Preserve the user's intent.\n"
-        "- Rewrite the query to improve retrieval using concrete entities, acronyms, projects, tools, people, and document-specific terms when helpful.\n"
-        "- Keep the query concise and retrieval-focused.\n"
-        "- Only use date filters when the latest user request or recent dialogue clearly implies a time range.\n"
-        "- If you use date filters, prefer returning both bounds.\n"
-        "- If no date filter is useful, return null for both dates.\n"
-        '- Set "is_recency_query" to true ONLY when the user\'s primary intent is about '
-        "recency or temporal ordering rather than topical relevance. Examples: "
-        '"latest file", "newest upload", "most recent document", "what did I save last", '
-        '"show me files from today", "last thing I added". '
-        "When true, results will be sorted by date instead of relevance.\n"
-        "- Do not include markdown, prose, or explanations.\n\n"
-        f"Today's UTC date: {today}\n\n"
-        f"Recent conversation:\n{recent_conversation or '(none)'}\n\n"
-        f"Latest user message:\n{user_text}"
-    )
-
-
-def _extract_json_payload(text: str) -> str:
-    stripped = text.strip()
-    fenced = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", stripped, re.DOTALL)
-    if fenced:
-        return fenced.group(1)
-    start = stripped.find("{")
-    end = stripped.rfind("}")
-    if start != -1 and end != -1 and end > start:
-        return stripped[start : end + 1]
-    return stripped
-
-
-def _parse_kb_search_plan_response(response_text: str) -> KBSearchPlan:
-    payload = json.loads(_extract_json_payload(response_text))
-    return KBSearchPlan.model_validate(payload)
-
-
-def _normalize_optional_date_range(
-    start_date: str | None,
-    end_date: str | None,
-) -> tuple[datetime | None, datetime | None]:
-    parsed_start = parse_date_or_datetime(start_date) if start_date else None
-    parsed_end = parse_date_or_datetime(end_date) if end_date else None
-
-    if parsed_start is None and parsed_end is None:
-        return None, None
-
-    return resolve_date_range(parsed_start, parsed_end)
-
-
-def _resolve_search_types(
-    available_connectors: list[str] | None,
-    available_document_types: list[str] | None,
-) -> list[str] | None:
-    types: set[str] = set()
-    if available_document_types:
-        types.update(available_document_types)
-    if available_connectors:
-        types.update(available_connectors)
-    if not types:
-        return None
-
-    expanded: set[str] = set(types)
-    for t in types:
-        legacy = NATIVE_TO_LEGACY_DOCTYPE.get(t)
-        if legacy:
-            expanded.add(legacy)
-    return list(expanded) if expanded else None
-
-
-_RECENCY_MAX_CHUNKS_PER_DOC = 5
-
-
-async def browse_recent_documents(
-    *,
-    search_space_id: int,
-    document_type: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-) -> list[dict[str, Any]]:
-    """Return documents ordered by recency (newest first), no relevance ranking."""
-    from sqlalchemy import func
-
-    from app.db import DocumentType
-
-    _t0 = time.perf_counter()
-    async with shielded_async_session() as session:
-        base_conditions = [
-            Document.search_space_id == search_space_id,
-            func.coalesce(Document.status["state"].astext, "ready") != "deleting",
-        ]
-
-        if document_type is not None:
-            import contextlib
-
-            doc_type_enums = []
-            for dt in document_type:
-                if isinstance(dt, str):
-                    with contextlib.suppress(KeyError):
-                        doc_type_enums.append(DocumentType[dt])
-                else:
-                    doc_type_enums.append(dt)
-            if doc_type_enums:
-                if len(doc_type_enums) == 1:
-                    base_conditions.append(Document.document_type == doc_type_enums[0])
-                else:
-                    base_conditions.append(Document.document_type.in_(doc_type_enums))
-
-        if start_date is not None:
-            base_conditions.append(Document.updated_at >= start_date)
-        if end_date is not None:
-            base_conditions.append(Document.updated_at <= end_date)
-
-        doc_query = (
-            select(Document)
-            .where(*base_conditions)
-            .order_by(Document.updated_at.desc())
-            .limit(top_k)
-        )
-        result = await session.execute(doc_query)
-        documents = result.scalars().unique().all()
-
-        if not documents:
-            return []
-
-        doc_ids = [d.id for d in documents]
-        numbered = (
-            select(
-                Chunk.id.label("chunk_id"),
-                Chunk.document_id,
-                Chunk.content,
-                func.row_number()
-                .over(
-                    partition_by=Chunk.document_id,
-                    order_by=(Chunk.position, Chunk.id),
-                )
-                .label("rn"),
-            )
-            .where(Chunk.document_id.in_(doc_ids))
-            .subquery("numbered")
-        )
-
-        chunk_query = (
-            select(numbered.c.chunk_id, numbered.c.document_id, numbered.c.content)
-            .where(numbered.c.rn <= _RECENCY_MAX_CHUNKS_PER_DOC)
-            .order_by(numbered.c.document_id, numbered.c.rn)
-        )
-        chunk_result = await session.execute(chunk_query)
-        fetched_chunks = chunk_result.all()
-
-    doc_chunks: dict[int, list[dict[str, Any]]] = {d.id: [] for d in documents}
-    for row in fetched_chunks:
-        if row.document_id in doc_chunks:
-            doc_chunks[row.document_id].append(
-                {"chunk_id": row.chunk_id, "content": row.content}
-            )
-
-    results: list[dict[str, Any]] = []
-    for doc in documents:
-        chunks_list = doc_chunks.get(doc.id, [])
-        metadata = doc.document_metadata or {}
-        results.append(
-            {
-                "document_id": doc.id,
-                "content": "\n\n".join(
-                    c["content"] for c in chunks_list if c.get("content")
-                ),
-                "score": 0.0,
-                "chunks": chunks_list,
-                "matched_chunk_ids": [],
-                "document": {
-                    "id": doc.id,
-                    "title": doc.title,
-                    "document_type": (
-                        doc.document_type.value
-                        if getattr(doc, "document_type", None)
-                        else None
-                    ),
-                    "metadata": metadata,
-                    "folder_id": getattr(doc, "folder_id", None),
-                },
-                "source": (
-                    doc.document_type.value
-                    if getattr(doc, "document_type", None)
-                    else None
-                ),
-            }
-        )
-    _perf_log.info(
-        "[kb_priority.recent] db=%.3fs docs=%d space=%d",
-        time.perf_counter() - _t0,
-        len(results),
-        search_space_id,
-    )
-    return results
-
-
-async def search_knowledge_base(
-    *,
-    query: str,
-    search_space_id: int,
-    available_connectors: list[str] | None = None,
-    available_document_types: list[str] | None = None,
-    top_k: int = 10,
-    start_date: datetime | None = None,
-    end_date: datetime | None = None,
-) -> list[dict[str, Any]]:
-    """Run a single unified hybrid search against the knowledge base."""
-    if not query:
-        return []
-
-    # ``embed_texts`` serializes behind a global embedding lock and, for API
-    # models, makes a network round-trip — so this can stall while another
-    # turn is embedding. Timed separately from the DB search to tell the two
-    # apart when debugging slow time-to-first-token.
-    _t_embed = time.perf_counter()
-    [embedding] = await asyncio.to_thread(embed_texts, [query])
-    _embed_elapsed = time.perf_counter() - _t_embed
-
-    doc_types = _resolve_search_types(available_connectors, available_document_types)
-    retriever_top_k = min(top_k * 3, 30)
-
-    _t_search = time.perf_counter()
-    async with shielded_async_session() as session:
-        retriever = ChucksHybridSearchRetriever(session)
-        results = await retriever.hybrid_search(
-            query_text=query,
-            top_k=retriever_top_k,
-            search_space_id=search_space_id,
-            document_type=doc_types,
-            start_date=start_date,
-            end_date=end_date,
-            query_embedding=embedding.tolist(),
-        )
-    _search_elapsed = time.perf_counter() - _t_search
-
-    _perf_log.info(
-        "[kb_priority.search] embed=%.3fs hybrid_search=%.3fs results=%d space=%d query=%r",
-        _embed_elapsed,
-        _search_elapsed,
-        len(results),
-        search_space_id,
-        query[:80],
-    )
-    return results[:top_k]
-
-
-async def fetch_mentioned_documents(
-    *,
-    document_ids: list[int],
-    search_space_id: int,
-) -> list[dict[str, Any]]:
-    """Fetch explicitly mentioned documents."""
-    if not document_ids:
-        return []
-
-    _t0 = time.perf_counter()
-    async with shielded_async_session() as session:
-        doc_result = await session.execute(
-            select(Document).where(
-                Document.id.in_(document_ids),
-                Document.search_space_id == search_space_id,
-            )
-        )
-        docs = {doc.id: doc for doc in doc_result.scalars().all()}
-
-        if not docs:
-            return []
-
-        chunk_result = await session.execute(
-            select(Chunk.id, Chunk.content, Chunk.document_id)
-            .where(Chunk.document_id.in_(list(docs.keys())))
-            .order_by(Chunk.document_id, Chunk.position, Chunk.id)
-        )
-        chunks_by_doc: dict[int, list[dict[str, Any]]] = {doc_id: [] for doc_id in docs}
-        for row in chunk_result.all():
-            if row.document_id in chunks_by_doc:
-                chunks_by_doc[row.document_id].append(
-                    {"chunk_id": row.id, "content": row.content}
-                )
-
-    results: list[dict[str, Any]] = []
-    for doc_id in document_ids:
-        doc = docs.get(doc_id)
-        if doc is None:
-            continue
-        metadata = doc.document_metadata or {}
-        results.append(
-            {
-                "document_id": doc.id,
-                "content": "",
-                "score": 1.0,
-                "chunks": chunks_by_doc.get(doc.id, []),
-                "matched_chunk_ids": [],
-                "document": {
-                    "id": doc.id,
-                    "title": doc.title,
-                    "document_type": (
-                        doc.document_type.value
-                        if getattr(doc, "document_type", None)
-                        else None
-                    ),
-                    "metadata": metadata,
-                    "folder_id": getattr(doc, "folder_id", None),
-                },
-                "source": (
-                    doc.document_type.value
-                    if getattr(doc, "document_type", None)
-                    else None
-                ),
-                "_user_mentioned": True,
-            }
-        )
-    _perf_log.info(
-        "[kb_priority.mentioned] db=%.3fs requested=%d resolved=%d",
-        time.perf_counter() - _t0,
-        len(document_ids),
-        len(results),
-    )
-    return results
-
-
-def _render_priority_message(priority: list[dict[str, Any]]) -> SystemMessage:
-    """Render the priority list as a single ``<priority_documents>`` system message."""
-    if not priority:
-        body = "(no priority documents for this turn)"
-    else:
-        lines: list[str] = []
-        for entry in priority:
-            score = entry.get("score")
-            mentioned = entry.get("mentioned")
-            score_str = f"{score:.3f}" if isinstance(score, int | float) else "n/a"
-            mark = " [USER-MENTIONED]" if mentioned else ""
-            lines.append(f"- {entry.get('path', '')} (score={score_str}){mark}")
-        body = "\n".join(lines)
-    return SystemMessage(
-        content=(
-            "<priority_documents>\n"
-            "These documents are most relevant to the latest user message; "
-            "read them first. Matched sections are flagged inside each "
-            "document's <chunk_index>.\n"
-            f"{body}\n"
-            "</priority_documents>"
-        )
-    )
-
-
-class KnowledgePriorityMiddleware(AgentMiddleware):  # type: ignore[type-arg]
-    """Compute hybrid-search priority hints for the current turn."""
-
-    tools = ()
-    state_schema = SurfSenseFilesystemState
-
-    def __init__(
-        self,
-        *,
-        llm: BaseChatModel | None = None,
-        planner_llm: BaseChatModel | None = None,
-        search_space_id: int,
-        filesystem_mode: FilesystemMode = FilesystemMode.CLOUD,
-        available_connectors: list[str] | None = None,
-        available_document_types: list[str] | None = None,
-        top_k: int = 10,
-        mentioned_document_ids: list[int] | None = None,
-        inject_system_message: bool = True,  # For backwards compatibility
-        mentions_only: bool = False,
-    ) -> None:
-        self.llm = llm
-        # Cheap model for structured internal tasks (query rewrite, date
-        # extraction, recency classification) when one is configured; falls back
-        # to the chat LLM otherwise.
-        self.planner_llm = planner_llm or llm
-        self.search_space_id = search_space_id
-        self.filesystem_mode = filesystem_mode
-        self.available_connectors = available_connectors
-        self.available_document_types = available_document_types
-        self.top_k = top_k
-        self.mentioned_document_ids = mentioned_document_ids or []
-        self.inject_system_message = inject_system_message
-        # Lazy mode: skip the planner LLM + embedding + hybrid search and only
-        # surface explicit @-mentions. The agent retrieves topical KB content on
-        # demand via the ``search_knowledge_base`` tool instead.
-        self.mentions_only = mentions_only
-        # Compiled lazily and memoized to avoid the per-turn create_agent cost.
-        self._planner: Runnable | None = None
-        self._planner_compile_failed = False
-
-    def _build_kb_planner_runnable(self) -> Runnable | None:
-        """Lazily compile and memoize the kb-planner Runnable.
-
-        Returns ``None`` (and the caller falls back to ``planner_llm.ainvoke``)
-        when the flag is off, the LLM is missing, or ``create_agent`` raises.
-        Built without tools but with RetryAfterMiddleware so a transient
-        rate-limit on the planner call doesn't fail the whole turn.
-        """
-        if self._planner is not None or self._planner_compile_failed:
-            return self._planner
-        if self.planner_llm is None:
-            return None
-        flags = get_flags()
-        if not flags.enable_kb_planner_runnable or flags.disable_new_agent_stack:
-            return None
-
-        from app.agents.chat.shared.middleware.retry_after import RetryAfterMiddleware
-
-        try:
-            self._planner = create_agent(
-                self.planner_llm,
-                tools=[],
-                middleware=[RetryAfterMiddleware(max_retries=2)],
-            )
-        except Exception as exc:  # pragma: no cover - defensive
-            logger.warning(
-                "kb-planner Runnable compile failed; falling back to planner_llm.ainvoke: %s",
-                exc,
-            )
-            self._planner_compile_failed = True
-            self._planner = None
-        return self._planner
-
-    async def _plan_search_inputs(
-        self,
-        *,
-        messages: Sequence[BaseMessage],
-        user_text: str,
-    ) -> tuple[str, datetime | None, datetime | None, bool]:
-        if self.planner_llm is None:
-            return user_text, None, None, False
-
-        recent_conversation = _render_recent_conversation(
-            messages,
-            llm=self.planner_llm,
-            user_text=user_text,
-        )
-        prompt = _build_kb_planner_prompt(
-            recent_conversation=recent_conversation,
-            user_text=user_text,
-        )
-        loop = asyncio.get_running_loop()
-        t0 = loop.time()
-
-        # Both paths tag surfsense:internal so the planner's intermediate
-        # events stay suppressed from the UI.
-        planner = self._build_kb_planner_runnable()
-        try:
-            if planner is not None:
-                planner_state = await planner.ainvoke(
-                    {"messages": [HumanMessage(content=prompt)]},
-                    config={"tags": ["surfsense:internal"]},
-                )
-                response_messages = (
-                    planner_state.get("messages", [])
-                    if isinstance(planner_state, dict)
-                    else []
-                )
-                response = (
-                    response_messages[-1]
-                    if response_messages
-                    else AIMessage(content="")
-                )
-            else:
-                response = await self.planner_llm.ainvoke(
-                    [HumanMessage(content=prompt)],
-                    config={"tags": ["surfsense:internal"]},
-                )
-            plan = _parse_kb_search_plan_response(_extract_text_from_message(response))
-            optimized_query = (
-                re.sub(r"\s+", " ", plan.optimized_query).strip() or user_text
-            )
-            start_date, end_date = _normalize_optional_date_range(
-                plan.start_date,
-                plan.end_date,
-            )
-            is_recency = plan.is_recency_query
-            _perf_log.info(
-                "[kb_priority] planner in %.3fs query=%r optimized=%r "
-                "start=%s end=%s recency=%s",
-                loop.time() - t0,
-                user_text[:80],
-                optimized_query[:120],
-                start_date.isoformat() if start_date else None,
-                end_date.isoformat() if end_date else None,
-                is_recency,
-            )
-            return optimized_query, start_date, end_date, is_recency
-        except (json.JSONDecodeError, ValidationError, ValueError) as exc:
-            logger.warning(
-                "KB planner returned invalid output, using raw query: %s", exc
-            )
-        except Exception as exc:  # pragma: no cover - defensive fallback
-            logger.warning("KB planner failed, using raw query: %s", exc)
-
-        return user_text, None, None, False
-
-    def before_agent(  # type: ignore[override]
-        self,
-        state: AgentState,
-        runtime: Runtime[Any],
-    ) -> dict[str, Any] | None:
-        try:
-            loop = asyncio.get_running_loop()
-            if loop.is_running():
-                return None
-        except RuntimeError:
-            pass
-        return asyncio.run(self.abefore_agent(state, runtime))
-
-    async def abefore_agent(  # type: ignore[override]
-        self,
-        state: AgentState,
-        runtime: Runtime[Any],
-    ) -> dict[str, Any] | None:
-        if self.filesystem_mode != FilesystemMode.CLOUD:
-            return None
-
-        messages = state.get("messages") or []
-        if not messages:
-            return None
-
-        last_human: HumanMessage | None = None
-        for msg in reversed(messages):
-            if isinstance(msg, HumanMessage):
-                last_human = msg
-                break
-        if last_human is None:
-            return None
-        user_text = _extract_text_from_message(last_human).strip()
-        if not user_text:
-            return None
-
-        anon_doc = state.get("kb_anon_doc")
-        if anon_doc:
-            return self._anon_priority(state, anon_doc)
-
-        return await self._authenticated_priority(state, messages, user_text, runtime)
-
-    def _anon_priority(
-        self,
-        state: AgentState,
-        anon_doc: dict[str, Any],
-    ) -> dict[str, Any]:
-        path = str(anon_doc.get("path") or "")
-        title = str(anon_doc.get("title") or "uploaded_document")
-        priority = [
-            {
-                "path": path,
-                "score": 1.0,
-                "document_id": None,
-                "title": title,
-                "mentioned": True,
-            }
-        ]
-        update: dict[str, Any] = {
-            "kb_priority": priority,
-        }
-        if self.inject_system_message:
-            new_messages = list(state.get("messages") or [])
-            insert_at = max(len(new_messages) - 1, 0)
-            new_messages.insert(insert_at, _render_priority_message(priority))
-            update["messages"] = new_messages
-        return update
-
-    async def _authenticated_priority(
-        self,
-        state: AgentState,
-        messages: Sequence[BaseMessage],
-        user_text: str,
-        runtime: Runtime[Any] | None = None,
-    ) -> dict[str, Any]:
-        t0 = asyncio.get_event_loop().time()
-
-        # Prefer per-turn mentions from runtime.context (lets a cached graph
-        # serve different turns); fall back to the constructor closure, draining
-        # it after one read so stale mentions can't replay.
-        #
-        # CRITICAL: test ``ctx_mentions is not None``, not truthiness — an empty
-        # list means "this turn has no mentions", not "use the closure".
-        mention_ids: list[int] = []
-        ctx = getattr(runtime, "context", None) if runtime is not None else None
-        ctx_mentions = getattr(ctx, "mentioned_document_ids", None) if ctx else None
-        if ctx_mentions is not None:
-            mention_ids = list(ctx_mentions)
-            if self.mentioned_document_ids:
-                self.mentioned_document_ids = []
-        elif self.mentioned_document_ids:
-            mention_ids = list(self.mentioned_document_ids)
-            self.mentioned_document_ids = []
-
-        # Folder mentions aren't embedded, so they skip hybrid search and are
-        # surfaced only as [USER-MENTIONED] entries. Cloud mode only.
-        folder_mention_ids: list[int] = []
-        if (
-            ctx is not None
-            and getattr(self, "filesystem_mode", FilesystemMode.CLOUD)
-            == FilesystemMode.CLOUD
-        ):
-            ctx_folders = getattr(ctx, "mentioned_folder_ids", None)
-            if ctx_folders:
-                folder_mention_ids = list(ctx_folders)
-
-        # Lazy mode: skip the planner LLM + embedding + hybrid search entirely.
-        # With no explicit mentions there is nothing cheap to surface, so we bail
-        # out early and let the agent decide to call ``search_knowledge_base``.
-        if self.mentions_only:
-            if not mention_ids and not folder_mention_ids:
-                return None
-            planned_query = user_text
-            start_date = end_date = None
-            is_recency = False
-            search_results: list[dict[str, Any]] = []
-            _search_phase_elapsed = 0.0
-        else:
-            (
-                planned_query,
-                start_date,
-                end_date,
-                is_recency,
-            ) = await self._plan_search_inputs(
-                messages=messages,
-                user_text=user_text,
-            )
-
-            _t_search_phase = time.perf_counter()
-            if is_recency:
-                doc_types = _resolve_search_types(
-                    self.available_connectors, self.available_document_types
-                )
-                search_results = await browse_recent_documents(
-                    search_space_id=self.search_space_id,
-                    document_type=doc_types,
-                    top_k=self.top_k,
-                    start_date=start_date,
-                    end_date=end_date,
-                )
-            else:
-                search_results = await search_knowledge_base(
-                    query=planned_query,
-                    search_space_id=self.search_space_id,
-                    available_connectors=self.available_connectors,
-                    available_document_types=self.available_document_types,
-                    top_k=self.top_k,
-                    start_date=start_date,
-                    end_date=end_date,
-                )
-            _search_phase_elapsed = time.perf_counter() - _t_search_phase
-
-        mentioned_results: list[dict[str, Any]] = []
-        if mention_ids:
-            mentioned_results = await fetch_mentioned_documents(
-                document_ids=mention_ids,
-                search_space_id=self.search_space_id,
-            )
-
-        seen_doc_ids: set[int] = set()
-        merged: list[dict[str, Any]] = []
-        for doc in mentioned_results:
-            doc_id = (doc.get("document") or {}).get("id")
-            if isinstance(doc_id, int):
-                seen_doc_ids.add(doc_id)
-            merged.append(doc)
-        for doc in search_results:
-            doc_id = (doc.get("document") or {}).get("id")
-            if isinstance(doc_id, int) and doc_id in seen_doc_ids:
-                continue
-            merged.append(doc)
-
-        _t_materialize = time.perf_counter()
-        priority = await self._materialize_priority(merged)
-
-        if folder_mention_ids:
-            folder_entries = await self._materialize_folder_priority(folder_mention_ids)
-            priority = folder_entries + priority
-        _materialize_elapsed = time.perf_counter() - _t_materialize
-
-        # ``recency=...`` reflects which retrieval path ran (recency browse vs
-        # hybrid search). The planner phase is logged separately by
-        # ``_plan_search_inputs``; here ``search_phase`` and ``materialize``
-        # break down the remaining DB-bound work so a slow turn can be
-        # attributed to planner / search / materialize at a glance.
-        _perf_log.info(
-            "[kb_priority] completed in %.3fs (search_phase=%.3fs materialize=%.3fs "
-            "recency=%s) query=%r priority=%d mentioned=%d folders=%d",
-            asyncio.get_event_loop().time() - t0,
-            _search_phase_elapsed,
-            _materialize_elapsed,
-            is_recency,
-            user_text[:80],
-            len(priority),
-            len(mentioned_results),
-            len(folder_mention_ids),
-        )
-
-        update: dict[str, Any] = {
-            "kb_priority": priority,
-        }
-        if self.inject_system_message:
-            new_messages = list(messages)
-            insert_at = max(len(new_messages) - 1, 0)
-            new_messages.insert(insert_at, _render_priority_message(priority))
-            update["messages"] = new_messages
-        return update
-
-    async def _materialize_folder_priority(
-        self, folder_ids: list[int]
-    ) -> list[dict[str, Any]]:
-        """Resolve mentioned folder ids to canonical-path priority entries.
-
-        Flagged ``mentioned=True`` with ``score=None`` (folders aren't ranked;
-        the agent decides which children to read).
-        """
-        if not folder_ids:
-            return []
-        async with shielded_async_session() as session:
-            index: PathIndex = await build_path_index(session, self.search_space_id)
-            folder_rows = await session.execute(
-                select(Folder.id, Folder.name).where(
-                    Folder.search_space_id == self.search_space_id,
-                    Folder.id.in_(folder_ids),
-                )
-            )
-            folder_titles: dict[int, str] = {
-                row.id: row.name for row in folder_rows.all()
-            }
-
-        entries: list[dict[str, Any]] = []
-        seen: set[int] = set()
-        for folder_id in folder_ids:
-            if folder_id in seen:
-                continue
-            seen.add(folder_id)
-            base = index.folder_paths.get(folder_id)
-            if base is None:
-                logger.debug(
-                    "kb_priority: dropping folder id=%s (missing from path index)",
-                    folder_id,
-                )
-                continue
-            path = base if base.endswith("/") else f"{base}/"
-            entries.append(
-                {
-                    "path": path,
-                    "score": None,
-                    "document_id": None,
-                    "folder_id": folder_id,
-                    "title": folder_titles.get(folder_id, ""),
-                    "mentioned": True,
-                }
-            )
-        return entries
-
-    async def _materialize_priority(
-        self, merged: list[dict[str, Any]]
-    ) -> list[dict[str, Any]]:
-        """Resolve canonical paths for the priority list."""
-        priority: list[dict[str, Any]] = []
-
-        if not merged:
-            return priority
-
-        _t0 = time.perf_counter()
-        async with shielded_async_session() as session:
-            index: PathIndex = await build_path_index(session, self.search_space_id)
-            doc_ids = [
-                (doc.get("document") or {}).get("id")
-                for doc in merged
-                if isinstance(doc, dict)
-            ]
-            doc_ids = [doc_id for doc_id in doc_ids if isinstance(doc_id, int)]
-            folder_by_doc_id: dict[int, int | None] = {}
-            if doc_ids:
-                folder_rows = await session.execute(
-                    select(Document.id, Document.folder_id).where(
-                        Document.search_space_id == self.search_space_id,
-                        Document.id.in_(doc_ids),
-                    )
-                )
-                folder_by_doc_id = {row.id: row.folder_id for row in folder_rows.all()}
-
-        for doc in merged:
-            doc_meta = doc.get("document") or {}
-            doc_id = doc_meta.get("id")
-            title = doc_meta.get("title") or "untitled"
-            folder_id = (
-                folder_by_doc_id.get(doc_id)
-                if isinstance(doc_id, int)
-                else doc_meta.get("folder_id")
-            )
-            path = doc_to_virtual_path(
-                doc_id=doc_id if isinstance(doc_id, int) else None,
-                title=str(title),
-                folder_id=folder_id if isinstance(folder_id, int) else None,
-                index=index,
-            )
-            priority.append(
-                {
-                    "path": path,
-                    "score": float(doc.get("score") or 0.0),
-                    "document_id": doc_id if isinstance(doc_id, int) else None,
-                    "title": str(title),
-                    "mentioned": bool(doc.get("_user_mentioned")),
-                }
-            )
-        _perf_log.info(
-            "[kb_priority.materialize] db=%.3fs docs=%d",
-            time.perf_counter() - _t0,
-            len(merged),
-        )
-        return priority
-
-
-__all__ = [
-    "KnowledgePriorityMiddleware",
-    "browse_recent_documents",
-    "fetch_mentioned_documents",
-    "search_knowledge_base",
-]
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
index f0708ccaf..b00670615 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/filesystem_state.py
@@ -13,7 +13,6 @@ extra fields needed to implement Postgres-backed virtual filesystem semantics:
 * ``dirty_paths`` — paths whose state file content differs from DB.
 * ``dirty_path_tool_calls`` — sidecar map ``path -> latest tool_call_id`` for
   dirty paths; used to bind the per-path snapshot to an action_id.
-* ``kb_priority`` — top-K priority hints rendered into a system message.
 * ``kb_anon_doc`` — Redis-loaded anonymous document (if any).
 * ``citation_registry`` — per-conversation ``[n]`` -> source map for citations.
 * ``tree_version`` — bumped by persistence; invalidates the tree render cache.
@@ -69,14 +68,6 @@ class PendingDelete(TypedDict, total=False):
     tool_call_id: str
 
 
-class KbPriorityEntry(TypedDict, total=False):
-    path: str
-    score: float
-    document_id: int | None
-    title: str
-    mentioned: bool
-
-
 class KbAnonDoc(TypedDict, total=False):
     """In-memory anonymous-session document loaded from Redis."""
 
@@ -161,9 +152,6 @@ class SurfSenseFilesystemState(FilesystemState):
     to the latest action_id (the one the user is most likely to revert).
     """
 
-    kb_priority: NotRequired[Annotated[list[KbPriorityEntry], _replace_reducer]]
-    """Top-K priority hints rendered as a system message before the user turn."""
-
     kb_anon_doc: NotRequired[Annotated[KbAnonDoc | None, _replace_reducer]]
     """Anonymous-session document loaded from Redis (read-only, no DB row)."""
 
@@ -212,7 +200,6 @@ class SurfSenseFilesystemState(FilesystemState):
 
 __all__ = [
     "KbAnonDoc",
-    "KbPriorityEntry",
     "PendingDelete",
     "PendingMove",
     "SurfSenseFilesystemState",
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
index 8a9590723..3a9cc67b1 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/shared/state/reducers.py
@@ -2,7 +2,7 @@
 
 These reducers back the extra state fields used by the cloud-mode filesystem
 agent (`cwd`, `staged_dirs`, `pending_moves`, `dirty_paths`, `doc_id_by_path`,
-`kb_priority`, `kb_anon_doc`, `tree_version`).
+`kb_anon_doc`, `tree_version`).
 
 Tools mutate these fields ONLY via `Command(update={...})` returns; the
 reducers are responsible for merging successive updates atomically and for
@@ -258,7 +258,6 @@ def _initial_filesystem_state() -> dict[str, Any]:
         "doc_id_by_path": {},
         "dirty_paths": [],
         "dirty_path_tool_calls": {},
-        "kb_priority": [],
         "kb_anon_doc": None,
         "tree_version": 0,
     }
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
index c77bd5bb4..04be2f321 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md
@@ -6,10 +6,9 @@ You are the SurfSense knowledge base specialist for the user's `/documents/` wor
 
 - If the supervisor already provided a precise path (e.g. `/documents/notes/2026-05-11.md`), use it directly — skip the lookup steps below.
 - Otherwise, most requests reference documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
-  1. Consult `<priority_documents>` — it's a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit the task.
-  2. Walk `<workspace_tree>` for descriptive folder/filename matches.
-  3. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
-  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
+  1. Walk `<workspace_tree>` for descriptive folder/filename matches.
+  2. Use the `glob` tool for filename patterns the tree didn't surface, and the `grep` tool when the description points at *content* rather than a name.
+  3. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 
 For writes (where you choose the path yourself):
 
@@ -89,7 +88,7 @@ A KB document reads back like this — only the bracketed `[n]` is a citation la
 **Example 2 — edit by inference:**
 
 - *Supervisor task:* `"Add a bullet about the new feature flag to my Q2 roadmap"`
-- *You:* search for the roadmap doc — check `<priority_documents>` and `<workspace_tree>` first; if neither surfaces it, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose `<priority_documents>` hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
+- *You:* search for the roadmap doc — check `<workspace_tree>` first; if it doesn't surface the doc, widen with the `glob` tool (try filename patterns the user's language suggests) or the `grep` tool (search by content). Suppose the tree hits `/documents/planning/q2-roadmap.md` → `read_file("/documents/planning/q2-roadmap.md")` → `edit_file("/documents/planning/q2-roadmap.md", old, new)` → success.
 - *Output:* `status=success`, evidence includes path and the inserted snippet.
 
 **Example 3 — blocked, multiple candidates:**
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
index d10a08282..e0f368bb2 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md
@@ -9,8 +9,7 @@ You are the SurfSense workspace specialist for the user's local folders.
   1. If you do not know which mounts exist, call `ls('/')` first.
   2. Walk likely folders with the `ls` and `list_tree` tools.
   3. Use the `glob` tool for filename patterns; use the `grep` tool when the description points at *content* rather than a name.
-  4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
-  5. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
+  4. Only return `status=blocked` with `missing_fields=["path"]` when the description is genuinely ambiguous after a thorough lookup.
 
 For writes (where you choose the path yourself):
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
index ae6ba3cfb..10dd0c763 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md
@@ -6,9 +6,8 @@ You answer workspace questions for another agent. The end user does **not** see
 
 The caller's question often references documents by description (`"my meeting notes from last week"`, `"the design doc"`). Resolve them yourself:
 
-1. Consult `<priority_documents>` — a hint about top-K likely matches, not a directive. Skip when the ranked entries don't fit.
-2. Walk `<workspace_tree>` for descriptive folder/filename matches.
-3. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
+1. Walk `<workspace_tree>` for descriptive folder/filename matches.
+2. Use `glob` for filename patterns the tree didn't surface, and `grep` when the description points at *content* rather than a name.
 
 If a precise path was already given, use it directly — skip the lookup.
 
diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
index 8704754a2..6e11aea4f 100644
--- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
+++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md
@@ -9,7 +9,6 @@ The caller's question often references files by description (`"my meeting notes
 1. If you do not know which mounts exist, call `ls('/')` first.
 2. Walk likely folders with the `ls` and `list_tree` tools.
 3. Use `glob` for filename patterns; use `grep` when the description points at *content* rather than a name.
-4. `<priority_documents>` lists top-K cloud-ingested docs, not local files — consult it only when the task spans both worlds (e.g. drafting a local note from a Notion source). Skip otherwise.
 
 If a precise path was already given, use it directly — skip the lookup.
 
diff --git a/surfsense_backend/app/agents/chat/runtime/mention_resolver.py b/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
index a47ed8f36..4f2f47b24 100644
--- a/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/mention_resolver.py
@@ -74,8 +74,9 @@ class ResolvedMentionSet:
     ``@Project``).
 
     ``mentioned_document_ids`` is an ordered, deduped list consumed by
-    the priority middleware downstream — see
-    ``KnowledgePriorityMiddleware._compute_priority_paths``.
+    the on-demand ``search_knowledge_base`` tool downstream (via
+    ``referenced_document_ids``) to pin @-mentioned docs into the
+    retrieval scope.
     """
 
     mentions: list[ResolvedMention] = field(default_factory=list)
@@ -113,8 +114,8 @@ async def resolve_mentions(
 
     * Legacy clients that haven't migrated to the unified chip list
       still send the id arrays — we treat the union as authoritative.
-    * The id arrays are the canonical input to
-      ``KnowledgePriorityMiddleware`` (via ``SurfSenseContextSchema``);
+    * The id arrays are the canonical input to the retrieval scope
+      (via ``SurfSenseContextSchema`` → ``referenced_document_ids``);
       returning the deduped, validated lists lets the route forward
       them unchanged.
 
diff --git a/surfsense_backend/app/agents/chat/runtime/path_resolver.py b/surfsense_backend/app/agents/chat/runtime/path_resolver.py
index 861f48ee7..84282b63b 100644
--- a/surfsense_backend/app/agents/chat/runtime/path_resolver.py
+++ b/surfsense_backend/app/agents/chat/runtime/path_resolver.py
@@ -4,7 +4,6 @@ This module is the single source of truth for mapping ``Document`` rows to
 virtual paths under ``/documents/`` and back. It is used by:
 
 * :class:`KnowledgeTreeMiddleware` (rendering the workspace tree)
-* :class:`KnowledgePriorityMiddleware` (computing priority paths)
 * :class:`KBPostgresBackend` (``als_info`` / ``aread`` / move operations)
 * :class:`KnowledgeBasePersistenceMiddleware` (resolving moves and creates)
 
diff --git a/surfsense_backend/app/agents/chat/shared/context.py b/surfsense_backend/app/agents/chat/shared/context.py
index 50b761f5b..b543eb6b6 100644
--- a/surfsense_backend/app/agents/chat/shared/context.py
+++ b/surfsense_backend/app/agents/chat/shared/context.py
@@ -11,9 +11,9 @@ MUST live on this context object instead of being captured into a
 middleware ``__init__`` closure. Middlewares read fields back via
 ``runtime.context.<field>``; tools read them via ``runtime.context``.
 
-This object is read inside both ``KnowledgePriorityMiddleware`` (for
-``mentioned_document_ids``) and any future middleware that needs
-per-request state without invalidating the compiled-agent cache.
+This object is read by the ``search_knowledge_base`` tool (for
+``mentioned_document_ids``) and any middleware that needs per-request
+state without invalidating the compiled-agent cache.
 """
 
 from __future__ import annotations
@@ -43,13 +43,12 @@ class SurfSenseContextSchema:
     Phase 1.5 fields:
         search_space_id: Search space the request is scoped to.
         mentioned_document_ids: KB documents the user @-mentioned this turn.
-            Read by ``KnowledgePriorityMiddleware`` to seed its priority
-            list. Stays out of the compiled-agent cache key — that's the
-            whole point of putting it here.
+            Read by the ``search_knowledge_base`` tool to pin these docs
+            into the retrieval scope. Stays out of the compiled-agent cache
+            key — that's the whole point of putting it here.
         mentioned_folder_ids: KB folders the user @-mentioned this turn
-            (cloud filesystem mode). Surfaced as ``[USER-MENTIONED]``
-            entries in ``<priority_documents>`` so the agent prioritises
-            walking those folders with ``ls`` / ``find_documents``.
+            (cloud filesystem mode). Pinned into the ``search_knowledge_base``
+            retrieval scope so matches from those folders are prioritised.
         file_operation_contract: One-shot file operation contract for the
             upcoming turn (reserved; not currently populated).
         turn_id / request_id: Correlation IDs surfaced by the streaming
diff --git a/surfsense_backend/app/agents/chat/shared/middleware/compaction.py b/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
index f91af6a70..907d2f27b 100644
--- a/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
+++ b/surfsense_backend/app/agents/chat/shared/middleware/compaction.py
@@ -4,7 +4,7 @@ Extends ``SummarizationMiddleware`` with three SurfSense behaviors:
 
 1. A structured summary template (:data:`SURFSENSE_SUMMARY_PROMPT`) instead of
    the base freeform prompt.
-2. Protected SystemMessages (injected hints like ``<priority_documents>``) are
+2. Protected SystemMessages (injected hints like ``<workspace_tree>``) are
    kept verbatim instead of being summarized away.
 3. ``content=None`` is sanitized before ``get_buffer_string`` (some providers
    stream tool-only AIMessages with ``None`` content, which would crash it).
@@ -77,7 +77,6 @@ Respond ONLY with the structured summary. Do not include any text before or afte
 # compaction step happens *before* re-injection in some paths, so we
 # must preserve them verbatim across the cutoff.
 PROTECTED_SYSTEM_PREFIXES: tuple[str, ...] = (
-    "<priority_documents>",  # KnowledgePriorityMiddleware
     "<workspace_tree>",  # KnowledgeTreeMiddleware
     "<file_operation_contract>",  # reserved file-operation contract prefix
     "<user_memory>",  # MemoryInjectionMiddleware
diff --git a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
index b2f441961..e1ba32ce9 100644
--- a/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
+++ b/surfsense_backend/app/automations/actions/builtin/agent_task/invoke.py
@@ -78,7 +78,7 @@ async def _resolve_mention_context(
     Automation always runs in cloud filesystem mode, so we mirror the chat
     ``new_chat`` flow: substitute ``@title`` tokens with canonical
     ``/documents/...`` paths, prepend a ``<mentioned_connectors>`` block, and
-    build a ``SurfSenseContextSchema`` that ``KnowledgePriorityMiddleware``
+    build a ``SurfSenseContextSchema`` that the ``search_knowledge_base`` tool
     reads via ``runtime.context``. Returns ``(query, None)`` unchanged when
     there are no mentions.
     """
@@ -210,7 +210,7 @@ async def run_agent_task(
             runtime_context.turn_id = turn_id
 
         # The compiled graph declares ``context_schema=SurfSenseContextSchema``;
-        # mentions only reach ``KnowledgePriorityMiddleware`` via ``context=``.
+        # mentions only reach the ``search_knowledge_base`` tool via ``context=``.
         invoke_kwargs: dict[str, Any] = {"config": config}
         if runtime_context is not None:
             invoke_kwargs["context"] = runtime_context
diff --git a/surfsense_backend/app/routes/agent_flags_route.py b/surfsense_backend/app/routes/agent_flags_route.py
index 222909c59..c57a6b5ef 100644
--- a/surfsense_backend/app/routes/agent_flags_route.py
+++ b/surfsense_backend/app/routes/agent_flags_route.py
@@ -53,7 +53,6 @@ class AgentFeatureFlagsRead(BaseModel):
 
     enable_skills: bool
     enable_specialized_subagents: bool
-    enable_kb_planner_runnable: bool
 
     enable_action_log: bool
     enable_revert_route: bool
diff --git a/surfsense_backend/app/schemas/new_chat.py b/surfsense_backend/app/schemas/new_chat.py
index d45303e97..e486b3dda 100644
--- a/surfsense_backend/app/schemas/new_chat.py
+++ b/surfsense_backend/app/schemas/new_chat.py
@@ -246,10 +246,10 @@ class NewChatRequest(BaseModel):
         description=(
             "Optional knowledge-base folder IDs the user mentioned with "
             "@. Resolved to virtual paths (``/documents/.../``) by "
-            "``mention_resolver`` and surfaced to the agent via "
-            "(a) backtick-wrapped substitution in ``user_query`` and "
-            "(b) a ``[USER-MENTIONED]`` entry in ``<priority_documents>``. "
-            "The agent's ``ls`` tool can then walk the folder itself."
+            "``mention_resolver``, surfaced to the agent via backtick-wrapped "
+            "substitution in ``user_query`` and pinned into the "
+            "``search_knowledge_base`` retrieval scope. The agent's ``ls`` "
+            "tool can then walk the folder itself."
         ),
     )
     mentioned_documents: list[MentionedDocumentInfo] | None = Field(
diff --git a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
index 195a16b1e..5ef2b8ad1 100644
--- a/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
+++ b/surfsense_backend/app/tasks/chat/streaming/flows/new_chat/runtime_context.py
@@ -22,7 +22,8 @@ def build_new_chat_runtime_context(
     request_id: str | None,
     turn_id: str,
 ) -> SurfSenseContextSchema:
-    """``mentioned_document_ids`` is consumed by ``KnowledgePriorityMiddleware``.
+    """``mentioned_document_ids`` is consumed by the ``search_knowledge_base``
+    tool (via ``referenced_document_ids``) to pin mentioned docs into scope.
 
     ``accepted_folder_ids`` (post-resolve) wins over the raw
     ``mentioned_folder_ids`` from the request: the resolver drops chips that
diff --git a/surfsense_backend/tests/integration/retriever/test_knowledge_search_date_filters.py b/surfsense_backend/tests/integration/retriever/test_knowledge_search_date_filters.py
deleted file mode 100644
index ce076b147..000000000
--- a/surfsense_backend/tests/integration/retriever/test_knowledge_search_date_filters.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Integration smoke tests for KB search query/date scoping."""
-
-from __future__ import annotations
-
-from contextlib import asynccontextmanager
-from datetime import UTC, datetime, timedelta
-
-import numpy as np
-import pytest
-
-from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    search_knowledge_base,
-)
-
-from .conftest import DUMMY_EMBEDDING
-
-pytestmark = pytest.mark.integration
-
-
-async def test_search_knowledge_base_applies_date_filters(
-    db_session,
-    seed_date_filtered_docs,
-    monkeypatch,
-):
-    """Date filters should remove older matching documents from scoped KB results."""
-
-    @asynccontextmanager
-    async def fake_shielded_async_session():
-        yield db_session
-
-    monkeypatch.setattr(ks, "shielded_async_session", fake_shielded_async_session)
-    monkeypatch.setattr(
-        ks, "embed_texts", lambda texts: [np.array(DUMMY_EMBEDDING) for _ in texts]
-    )
-
-    space_id = seed_date_filtered_docs["search_space"].id
-    recent_cutoff = datetime.now(UTC) - timedelta(days=30)
-
-    unfiltered_results = await search_knowledge_base(
-        query="ocv meeting decisions",
-        search_space_id=space_id,
-        available_document_types=["FILE"],
-        top_k=10,
-    )
-    filtered_results = await search_knowledge_base(
-        query="ocv meeting decisions",
-        search_space_id=space_id,
-        available_document_types=["FILE"],
-        top_k=10,
-        start_date=recent_cutoff,
-        end_date=datetime.now(UTC),
-    )
-
-    unfiltered_ids = {result["document"]["id"] for result in unfiltered_results}
-    filtered_ids = {result["document"]["id"] for result in filtered_results}
-
-    assert seed_date_filtered_docs["recent_doc"].id in unfiltered_ids
-    assert seed_date_filtered_docs["old_doc"].id in unfiltered_ids
-    assert seed_date_filtered_docs["recent_doc"].id in filtered_ids
-    assert seed_date_filtered_docs["old_doc"].id not in filtered_ids
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py b/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py
index 2ac462959..9db13ea8a 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_compaction.py
@@ -38,7 +38,7 @@ class TestIsProtectedSystemMessage:
         )
 
     def test_tolerates_leading_whitespace(self) -> None:
-        msg = SystemMessage(content="   \n<priority_documents>\n...")
+        msg = SystemMessage(content="   \n<workspace_tree>\n...")
         assert _is_protected_system_message(msg) is True
 
 
@@ -89,7 +89,7 @@ class TestPartitionMessages:
 
     def test_protected_system_message_preserved_even_in_summarize_half(self) -> None:
         partitioner = self._build_partitioner()
-        protected = SystemMessage(content="<priority_documents>\n...")
+        protected = SystemMessage(content="<workspace_tree>\n...")
         msgs = [
             HumanMessage(content="old human"),
             AIMessage(content="old ai"),
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
index e715a80c6..627dcb99c 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_feature_flags.py
@@ -28,7 +28,6 @@ def _clear_all(monkeypatch: pytest.MonkeyPatch) -> None:
         "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR",
         "SURFSENSE_ENABLE_SKILLS",
         "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
-        "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
         "SURFSENSE_ENABLE_ACTION_LOG",
         "SURFSENSE_ENABLE_REVERT_ROUTE",
         "SURFSENSE_ENABLE_PLUGIN_LOADER",
@@ -57,7 +56,6 @@ def test_defaults_match_shipped_agent_stack(monkeypatch: pytest.MonkeyPatch) ->
     assert flags.enable_llm_tool_selector is False
     assert flags.enable_skills is True
     assert flags.enable_specialized_subagents is True
-    assert flags.enable_kb_planner_runnable is True
     assert flags.enable_action_log is True
     assert flags.enable_revert_route is True
     assert flags.enable_plugin_loader is False
@@ -122,7 +120,6 @@ def test_each_flag_can_be_set_independently(monkeypatch: pytest.MonkeyPatch) ->
         "enable_llm_tool_selector": "SURFSENSE_ENABLE_LLM_TOOL_SELECTOR",
         "enable_skills": "SURFSENSE_ENABLE_SKILLS",
         "enable_specialized_subagents": "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
-        "enable_kb_planner_runnable": "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
         "enable_action_log": "SURFSENSE_ENABLE_ACTION_LOG",
         "enable_revert_route": "SURFSENSE_ENABLE_REVERT_ROUTE",
         "enable_plugin_loader": "SURFSENSE_ENABLE_PLUGIN_LOADER",
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py b/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py
index 4130c9d4e..6aebee093 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_mention_resolver.py
@@ -90,8 +90,8 @@ class TestSubstituteInText:
 
 class TestResolveMentions:
     """``resolve_mentions`` resolves chip ids → virtual paths and emits
-    a ``ResolvedMentionSet`` whose id partitions feed
-    ``KnowledgePriorityMiddleware``."""
+    a ``ResolvedMentionSet`` whose id partitions feed the
+    ``search_knowledge_base`` retrieval scope."""
 
     @pytest.mark.asyncio
     async def test_returns_empty_when_no_mentions(self):
diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
index 7398fce6a..f5d322781 100644
--- a/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_state_reducers.py
@@ -161,7 +161,6 @@ class TestInitialFilesystemState:
         assert state["doc_id_by_path"] == {}
         assert state["dirty_paths"] == []
         assert state["dirty_path_tool_calls"] == {}
-        assert state["kb_priority"] == []
         assert state["kb_anon_doc"] is None
         assert state["tree_version"] == 0
 
diff --git a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
deleted file mode 100644
index b128c35e7..000000000
--- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py
+++ /dev/null
@@ -1,604 +0,0 @@
-"""Unit tests for knowledge_search middleware helpers."""
-
-import json
-
-import pytest
-from langchain_core.messages import AIMessage, HumanMessage
-
-from app.agents.chat.multi_agent_chat.shared.middleware import knowledge_search as ks
-from app.agents.chat.multi_agent_chat.shared.middleware.knowledge_search import (
-    KBSearchPlan,
-    KnowledgePriorityMiddleware,
-    _normalize_optional_date_range,
-    _parse_kb_search_plan_response,
-    _render_recent_conversation,
-    _resolve_search_types,
-)
-
-pytestmark = pytest.mark.unit
-
-
-# ── _resolve_search_types ──────────────────────────────────────────────
-
-
-class TestResolveSearchTypes:
-    def test_returns_none_when_no_inputs(self):
-        assert _resolve_search_types(None, None) is None
-
-    def test_returns_none_when_both_empty(self):
-        assert _resolve_search_types([], []) is None
-
-    def test_includes_legacy_type_for_google_gmail(self):
-        result = _resolve_search_types(["GOOGLE_GMAIL_CONNECTOR"], None)
-        assert "GOOGLE_GMAIL_CONNECTOR" in result
-        assert "COMPOSIO_GMAIL_CONNECTOR" in result
-
-    def test_includes_legacy_type_for_google_drive(self):
-        result = _resolve_search_types(None, ["GOOGLE_DRIVE_FILE"])
-        assert "GOOGLE_DRIVE_FILE" in result
-        assert "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" in result
-
-    def test_includes_legacy_type_for_google_calendar(self):
-        result = _resolve_search_types(["GOOGLE_CALENDAR_CONNECTOR"], None)
-        assert "GOOGLE_CALENDAR_CONNECTOR" in result
-        assert "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" in result
-
-    def test_no_legacy_expansion_for_unrelated_types(self):
-        result = _resolve_search_types(["FILE", "NOTE"], None)
-        assert set(result) == {"FILE", "NOTE"}
-
-    def test_combines_connectors_and_document_types(self):
-        result = _resolve_search_types(["FILE"], ["NOTE", "CRAWLED_URL"])
-        assert {"FILE", "NOTE", "CRAWLED_URL"}.issubset(set(result))
-
-    def test_deduplicates(self):
-        result = _resolve_search_types(["FILE", "FILE"], ["FILE"])
-        assert result.count("FILE") == 1
-
-
-# ── planner parsing / date normalization ───────────────────────────────
-
-
-class TestPlannerHelpers:
-    def test_parse_kb_search_plan_response_accepts_plain_json(self):
-        plan = _parse_kb_search_plan_response(
-            json.dumps(
-                {
-                    "optimized_query": "ocv meeting decisions summary",
-                    "start_date": "2026-03-01",
-                    "end_date": "2026-03-31",
-                }
-            )
-        )
-        assert plan.optimized_query == "ocv meeting decisions summary"
-        assert plan.start_date == "2026-03-01"
-        assert plan.end_date == "2026-03-31"
-
-    def test_parse_kb_search_plan_response_accepts_fenced_json(self):
-        plan = _parse_kb_search_plan_response(
-            """```json
-            {"optimized_query":"deel founders guide","start_date":null,"end_date":null}
-            ```"""
-        )
-        assert plan.optimized_query == "deel founders guide"
-        assert plan.start_date is None
-        assert plan.end_date is None
-
-    def test_normalize_optional_date_range_returns_none_when_absent(self):
-        start_date, end_date = _normalize_optional_date_range(None, None)
-        assert start_date is None
-        assert end_date is None
-
-    def test_normalize_optional_date_range_resolves_single_bound(self):
-        start_date, end_date = _normalize_optional_date_range("2026-03-01", None)
-        assert start_date is not None
-        assert end_date is not None
-        assert start_date.date().isoformat() == "2026-03-01"
-        assert end_date >= start_date
-
-
-class FakeLLM:
-    def __init__(self, response_text: str):
-        self.response_text = response_text
-        self.calls: list[dict] = []
-
-    async def ainvoke(self, messages, config=None):
-        self.calls.append({"messages": messages, "config": config})
-        return AIMessage(content=self.response_text)
-
-
-class FakeBudgetLLM:
-    def __init__(self, *, max_input_tokens: int):
-        self._max_input_tokens_value = max_input_tokens
-
-    def _get_max_input_tokens(self) -> int:
-        return self._max_input_tokens_value
-
-    def _count_tokens(self, messages) -> int:
-        # Deterministic, simple proxy for tests: count characters as tokens.
-        return sum(len(msg.get("content", "")) for msg in messages)
-
-
-class TestKnowledgePriorityMiddlewarePlanner:
-    @pytest.fixture(autouse=True)
-    def _disable_planner_runnable(self, monkeypatch):
-        # ``FakeLLM`` is a duck-typed mock; ``create_agent`` (used when the
-        # planner Runnable path is enabled) calls ``.bind()`` on the LLM,
-        # which the mock does not implement. Pin the flag off so the
-        # planner falls through to the legacy ``self.llm.ainvoke`` path
-        # these tests assert against (``llm.calls[0]["config"]``).
-        monkeypatch.setenv("SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", "false")
-
-    def test_render_recent_conversation_prefers_latest_messages_under_budget(self):
-        messages = [
-            HumanMessage(content="old user context " * 40),
-            AIMessage(content="old assistant answer " * 35),
-            HumanMessage(content="recent user context " * 20),
-            AIMessage(content="recent assistant answer " * 18),
-            HumanMessage(content="latest question"),
-        ]
-
-        rendered = _render_recent_conversation(
-            messages,
-            llm=FakeBudgetLLM(max_input_tokens=900),
-            user_text="latest question",
-        )
-
-        assert "recent user context" in rendered
-        assert "recent assistant answer" in rendered
-        assert "latest question" not in rendered
-        assert rendered.index("recent user context") < rendered.index(
-            "recent assistant answer"
-        )
-
-    def test_render_recent_conversation_falls_back_to_legacy_without_budgeting(self):
-        messages = [
-            HumanMessage(content="message one"),
-            AIMessage(content="message two"),
-            HumanMessage(content="latest question"),
-        ]
-
-        rendered = _render_recent_conversation(
-            messages,
-            llm=None,
-            user_text="latest question",
-        )
-
-        assert "user: message one" in rendered
-        assert "assistant: message two" in rendered
-        assert "latest question" not in rendered
-
-    async def test_middleware_uses_optimized_query_and_dates(self, monkeypatch):
-        captured: dict = {}
-
-        async def fake_search_knowledge_base(**kwargs):
-            captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        llm = FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "ocv meeting decisions action items",
-                    "start_date": "2026-03-01",
-                    "end_date": "2026-03-31",
-                }
-            )
-        )
-        middleware = KnowledgePriorityMiddleware(llm=llm, search_space_id=37)
-
-        result = await middleware.abefore_agent(
-            {
-                "messages": [
-                    HumanMessage(content="what happened in our OCV meeting last month?")
-                ]
-            },
-            runtime=None,
-        )
-
-        assert result is not None
-        assert captured["query"] == "ocv meeting decisions action items"
-        assert captured["start_date"] is not None
-        assert captured["end_date"] is not None
-        assert captured["start_date"].date().isoformat() == "2026-03-01"
-        assert captured["end_date"].date().isoformat() == "2026-03-31"
-        assert llm.calls[0]["config"] == {"tags": ["surfsense:internal"]}
-
-    async def test_middleware_falls_back_when_planner_returns_invalid_json(
-        self,
-        monkeypatch,
-    ):
-        captured: dict = {}
-
-        async def fake_search_knowledge_base(**kwargs):
-            captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=FakeLLM("not json"),
-            search_space_id=37,
-        )
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="summarize founders guide by deel")]},
-            runtime=None,
-        )
-
-        assert captured["query"] == "summarize founders guide by deel"
-        assert captured["start_date"] is None
-        assert captured["end_date"] is None
-
-    async def test_middleware_passes_none_dates_when_planner_returns_nulls(
-        self,
-        monkeypatch,
-    ):
-        captured: dict = {}
-
-        async def fake_search_knowledge_base(**kwargs):
-            captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=FakeLLM(
-                json.dumps(
-                    {
-                        "optimized_query": "deel founders guide summary",
-                        "start_date": None,
-                        "end_date": None,
-                    }
-                )
-            ),
-            search_space_id=37,
-        )
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="summarize founders guide by deel")]},
-            runtime=None,
-        )
-
-        assert captured["query"] == "deel founders guide summary"
-        assert captured["start_date"] is None
-        assert captured["end_date"] is None
-
-    async def test_middleware_routes_to_recency_browse_when_flagged(
-        self,
-        monkeypatch,
-    ):
-        """When the planner sets is_recency_query=true, browse_recent_documents
-        is called instead of search_knowledge_base."""
-        browse_captured: dict = {}
-        search_called = False
-
-        async def fake_browse_recent_documents(**kwargs):
-            browse_captured.update(kwargs)
-            return []
-
-        async def fake_search_knowledge_base(**kwargs):
-            nonlocal search_called
-            search_called = True
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "browse_recent_documents",
-            fake_browse_recent_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        llm = FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "latest uploaded file",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": True,
-                }
-            )
-        )
-        middleware = KnowledgePriorityMiddleware(llm=llm, search_space_id=42)
-
-        result = await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="what's my latest file?")]},
-            runtime=None,
-        )
-
-        assert result is not None
-        assert browse_captured["search_space_id"] == 42
-        assert not search_called
-
-    async def test_middleware_uses_hybrid_search_when_not_recency(
-        self,
-        monkeypatch,
-    ):
-        """When is_recency_query is false (default), hybrid search is used."""
-        search_captured: dict = {}
-        browse_called = False
-
-        async def fake_browse_recent_documents(**kwargs):
-            nonlocal browse_called
-            browse_called = True
-            return []
-
-        async def fake_search_knowledge_base(**kwargs):
-            search_captured.update(kwargs)
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "browse_recent_documents",
-            fake_browse_recent_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        llm = FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "quarterly revenue report analysis",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": False,
-                }
-            )
-        )
-        middleware = KnowledgePriorityMiddleware(llm=llm, search_space_id=42)
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="find the quarterly revenue report")]},
-            runtime=None,
-        )
-
-        assert search_captured["query"] == "quarterly revenue report analysis"
-        assert not browse_called
-
-
-# ── KBSearchPlan schema ────────────────────────────────────────────────
-
-
-class TestKBSearchPlanSchema:
-    def test_is_recency_query_defaults_to_false(self):
-        plan = KBSearchPlan(optimized_query="test query")
-        assert plan.is_recency_query is False
-
-    def test_is_recency_query_parses_true(self):
-        plan = _parse_kb_search_plan_response(
-            json.dumps(
-                {
-                    "optimized_query": "latest uploaded file",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": True,
-                }
-            )
-        )
-        assert plan.is_recency_query is True
-        assert plan.optimized_query == "latest uploaded file"
-
-    def test_missing_is_recency_query_defaults_to_false(self):
-        plan = _parse_kb_search_plan_response(
-            json.dumps(
-                {
-                    "optimized_query": "meeting notes",
-                    "start_date": None,
-                    "end_date": None,
-                }
-            )
-        )
-        assert plan.is_recency_query is False
-
-
-# ── mentioned_document_ids cross-turn drain ────────────────────────────
-
-
-class TestKnowledgePriorityMentionDrain:
-    """Regression tests for the cross-turn ``mentioned_document_ids`` drain.
-
-    The compiled-agent cache reuses a single :class:`KnowledgePriorityMiddleware`
-    instance across turns of the same thread. ``mentioned_document_ids``
-    can therefore enter the middleware via two paths:
-
-    1. The constructor closure (``__init__(mentioned_document_ids=...)``) —
-       seeded by the cache-miss build on turn 1.
-    2. ``runtime.context.mentioned_document_ids`` — supplied freshly per
-       turn by the streaming task.
-
-    Without the drain fix, an empty ``runtime.context.mentioned_document_ids``
-    on turn 2 would fall through to the closure (because ``[]`` is falsy in
-    Python) and replay turn 1's mentions. This class pins down the
-    correct behaviour: the runtime path is authoritative even when empty,
-    and the closure is drained the first time the runtime path fires so
-    no later turn can ever resurrect stale state.
-    """
-
-    @staticmethod
-    def _make_runtime(mention_ids: list[int]):
-        """Minimal runtime stub exposing only ``runtime.context.mentioned_document_ids``."""
-        from types import SimpleNamespace
-
-        return SimpleNamespace(
-            context=SimpleNamespace(mentioned_document_ids=mention_ids),
-        )
-
-    @staticmethod
-    def _planner_llm() -> "FakeLLM":
-        # Planner returns a stable, non-recency plan so we always land in
-        # the hybrid-search branch (where ``fetch_mentioned_documents`` is
-        # invoked alongside the main search).
-        return FakeLLM(
-            json.dumps(
-                {
-                    "optimized_query": "follow up question",
-                    "start_date": None,
-                    "end_date": None,
-                    "is_recency_query": False,
-                }
-            )
-        )
-
-    async def test_runtime_context_overrides_closure_and_drains_it(self, monkeypatch):
-        """Turn 1 with mentions in BOTH closure and runtime context: the
-        runtime path wins AND the closure is drained so a future turn
-        cannot replay it.
-        """
-        fetched_ids: list[list[int]] = []
-
-        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
-            fetched_ids.append(list(document_ids))
-            return []
-
-        async def fake_search_knowledge_base(**_kwargs):
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "fetch_mentioned_documents",
-            fake_fetch_mentioned_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=self._planner_llm(),
-            search_space_id=42,
-            mentioned_document_ids=[1, 2, 3],
-        )
-
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="what is in those docs?")]},
-            runtime=self._make_runtime([1, 2, 3]),
-        )
-
-        assert fetched_ids == [[1, 2, 3]], (
-            "runtime.context mentions must be the source of truth on turn 1"
-        )
-        assert middleware.mentioned_document_ids == [], (
-            "closure must be drained the first time the runtime path fires "
-            "so no later turn can replay stale mentions"
-        )
-
-    async def test_empty_runtime_context_does_not_replay_closure_mentions(
-        self, monkeypatch
-    ):
-        """Regression: turn 2 with NO mentions must not surface turn 1's
-        mentions from the constructor closure.
-
-        Before the fix, ``if ctx_mentions:`` treated an empty list as
-        absent and fell through to ``elif self.mentioned_document_ids:``,
-        replaying turn 1's mentions. This test pins down the corrected
-        behaviour.
-        """
-        fetched_ids: list[list[int]] = []
-
-        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
-            fetched_ids.append(list(document_ids))
-            return []
-
-        async def fake_search_knowledge_base(**_kwargs):
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "fetch_mentioned_documents",
-            fake_fetch_mentioned_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        # Simulate a cached middleware instance whose closure was seeded
-        # by a previous turn's cache-miss build (mentions=[1,2,3]).
-        middleware = KnowledgePriorityMiddleware(
-            llm=self._planner_llm(),
-            search_space_id=42,
-            mentioned_document_ids=[1, 2, 3],
-        )
-
-        # Turn 2: streaming task supplies an EMPTY mention list (no
-        # mentions on this follow-up turn).
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="what about the next steps?")]},
-            runtime=self._make_runtime([]),
-        )
-
-        assert fetched_ids == [], (
-            "fetch_mentioned_documents must NOT be called when the runtime "
-            "context says there are no mentions for this turn"
-        )
-
-    async def test_legacy_path_fires_only_when_runtime_context_absent(
-        self, monkeypatch
-    ):
-        """Backward-compat: if a caller doesn't supply runtime.context (old
-        non-streaming code path), the closure-injected mentions are still
-        honoured exactly once and then drained.
-        """
-        fetched_ids: list[list[int]] = []
-
-        async def fake_fetch_mentioned_documents(*, document_ids, search_space_id):
-            fetched_ids.append(list(document_ids))
-            return []
-
-        async def fake_search_knowledge_base(**_kwargs):
-            return []
-
-        monkeypatch.setattr(
-            ks,
-            "fetch_mentioned_documents",
-            fake_fetch_mentioned_documents,
-        )
-        monkeypatch.setattr(
-            ks,
-            "search_knowledge_base",
-            fake_search_knowledge_base,
-        )
-
-        middleware = KnowledgePriorityMiddleware(
-            llm=self._planner_llm(),
-            search_space_id=42,
-            mentioned_document_ids=[7, 8],
-        )
-
-        # First call: no runtime → legacy path uses the closure.
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="initial question")]},
-            runtime=None,
-        )
-        # Second call: still no runtime — closure already drained, so no replay.
-        await middleware.abefore_agent(
-            {"messages": [HumanMessage(content="follow up")]},
-            runtime=None,
-        )
-
-        assert fetched_ids == [[7, 8]], (
-            "legacy path must honour the closure exactly once and then drain it"
-        )
-        assert middleware.mentioned_document_ids == []
diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx
index fd7be1a23..bc31dffed 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/AgentStatusContent.tsx
@@ -125,12 +125,6 @@ const FLAG_GROUPS: FlagGroup[] = [
 				description: "Spin up explore / report_writer / connector_negotiator subagents.",
 				envVar: "SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS",
 			},
-			{
-				key: "enable_kb_planner_runnable",
-				label: "KB planner runnable",
-				description: "Compile a private planner sub-agent for KB search.",
-				envVar: "SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE",
-			},
 		],
 	},
 	{
diff --git a/surfsense_web/lib/apis/agent-flags-api.service.ts b/surfsense_web/lib/apis/agent-flags-api.service.ts
index 534810c0e..5895d9924 100644
--- a/surfsense_web/lib/apis/agent-flags-api.service.ts
+++ b/surfsense_web/lib/apis/agent-flags-api.service.ts
@@ -19,7 +19,6 @@ const AgentFeatureFlagsSchema = z.object({
 
 	enable_skills: z.boolean(),
 	enable_specialized_subagents: z.boolean(),
-	enable_kb_planner_runnable: z.boolean(),
 
 	enable_action_log: z.boolean(),
 	enable_revert_route: z.boolean(),