From 9e886f8274d84f98a7edb14962cdde1230078d1a Mon Sep 17 00:00:00 2001 From: Varun Shukla Date: Tue, 19 May 2026 01:16:02 +0530 Subject: [PATCH 01/63] refactor(env): replace inline process.env reads with BACKEND_URL in connector forms and hooks --- .../components/obsidian-connect-form.tsx | 2 +- .../components/circleback-config.tsx | 4 ++-- .../connector-configs/views/connector-edit-view.tsx | 4 ++-- .../connector-popup/hooks/use-connector-dialog.ts | 4 ++-- .../views/connector-accounts-list-view.tsx | 4 ++-- surfsense_web/hooks/use-search-source-connectors.ts | 12 ++++++------ 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx index ecbb09fae..bbc150753 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx @@ -9,11 +9,11 @@ import { useApiKey } from "@/hooks/use-api-key"; import { copyToClipboard as copyToClipboardUtil } from "@/lib/utils"; import { getConnectorBenefits } from "../connector-benefits"; import type { ConnectFormProps } from "../index"; +import { BACKEND_URL } from "@/lib/env-config"; const PLUGIN_RELEASES_URL = "https://github.com/MODSetter/SurfSense/releases?q=obsidian&expanded=true"; -const BACKEND_URL = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL ?? "https://surfsense.com"; /** * Obsidian connect form for the plugin-only architecture. diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx index 268ab0f98..fb879d4df 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx @@ -10,7 +10,7 @@ import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { authenticatedFetch } from "@/lib/auth-utils"; import type { ConnectorConfigProps } from "../index"; - +import { BACKEND_URL } from "@/lib/env-config"; export interface CirclebackConfigProps extends ConnectorConfigProps { onNameChange?: (name: string) => void; } @@ -42,7 +42,7 @@ export const CirclebackConfig: FC = ({ connector, onNameC const doFetch = async () => { if (!connector.search_space_id) return; - const baseUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL; + const baseUrl = BACKEND_URL; if (!baseUrl) { console.error("NEXT_PUBLIC_FASTAPI_BACKEND_URL is not configured"); setIsLoading(false); diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx index c104f140a..2d8e1b797 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx @@ -20,7 +20,7 @@ import { getReauthEndpoint, LIVE_CONNECTOR_TYPES } from "../../constants/connect import { getConnectorDisplayName } from "../../tabs/all-connectors-tab"; import { MCPServiceConfig } from "../components/mcp-service-config"; import { getConnectorConfigComponent } from "../index"; - +import { BACKEND_URL } from "@/lib/env-config"; const VISION_LLM_CONNECTOR_TYPES = new Set([ EnumConnectorName.GOOGLE_DRIVE_CONNECTOR, EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, @@ -93,7 +93,7 @@ export const ConnectorEditView: FC = ({ if (!spaceId || !reauthEndpoint) return; setReauthing(true); try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; const url = new URL(`${backendUrl}${reauthEndpoint}`); url.searchParams.set("connector_id", String(connector.id)); url.searchParams.set("space_id", String(spaceId)); diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts index b49bfda96..d1d675ad1 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts @@ -43,7 +43,7 @@ import { parseOAuthAuthResponse, validateIndexingConfigState, } from "../constants/connector-popup.schemas"; - +import { BACKEND_URL } from "@/lib/env-config"; const OAUTH_RESULT_COOKIE = "connector_oauth_result"; function readOAuthResultCookie(): string | null { @@ -364,7 +364,7 @@ export const useConnectorDialog = () => { try { // Check if authEndpoint already has query parameters const separator = connector.authEndpoint.includes("?") ? "&" : "?"; - const url = `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${connector.authEndpoint}${separator}space_id=${searchSpaceId}`; + const url = `${BACKEND_URL}${connector.authEndpoint}${separator}space_id=${searchSpaceId}`; const response = await authenticatedFetch(url, { method: "GET" }); diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx index f6291b64d..78ad76ca6 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx @@ -16,7 +16,7 @@ import { cn } from "@/lib/utils"; import { getReauthEndpoint, LIVE_CONNECTOR_TYPES } from "../constants/connector-constants"; import { useConnectorStatus } from "../hooks/use-connector-status"; import { getConnectorDisplayName } from "../tabs/all-connectors-tab"; - +import { BACKEND_URL } from "@/lib/env-config"; interface ConnectorAccountsListViewProps { connectorType: string; connectorTitle: string; @@ -55,7 +55,7 @@ export const ConnectorAccountsListView: FC = ({ if (!searchSpaceId || !endpoint) return; setReauthingId(connector.id); try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; const url = new URL(`${backendUrl}${endpoint}`); url.searchParams.set("connector_id", String(connector.id)); url.searchParams.set("space_id", String(searchSpaceId)); diff --git a/surfsense_web/hooks/use-search-source-connectors.ts b/surfsense_web/hooks/use-search-source-connectors.ts index e53629235..bc1ec49b5 100644 --- a/surfsense_web/hooks/use-search-source-connectors.ts +++ b/surfsense_web/hooks/use-search-source-connectors.ts @@ -1,6 +1,6 @@ import { useCallback, useEffect, useState } from "react"; import { authenticatedFetch } from "@/lib/auth-utils"; - +import { BACKEND_URL } from "@/lib/env-config"; export interface SearchSourceConnector { id: number; name: string; @@ -108,7 +108,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: // Build URL with optional search_space_id query parameter const url = new URL( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors` + `${BACKEND_URL}/api/v1/search-source-connectors` ); if (spaceId !== undefined) { url.searchParams.append("search_space_id", spaceId.toString()); @@ -170,7 +170,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: try { // Add search_space_id as a query parameter const url = new URL( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors` + `${BACKEND_URL}/api/v1/search-source-connectors` ); url.searchParams.append("search_space_id", spaceId.toString()); @@ -208,7 +208,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: ) => { try { const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, { method: "PUT", headers: { "Content-Type": "application/json" }, @@ -239,7 +239,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: const deleteConnector = async (connectorId: number) => { try { const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, { method: "DELETE", headers: { "Content-Type": "application/json" }, @@ -284,7 +284,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: const response = await authenticatedFetch( `${ - process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL + BACKEND_URL }/api/v1/search-source-connectors/${connectorId}/index?${params.toString()}`, { method: "POST", From 9bfba34e8e76554314dc857cce1e2757a7910993 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:29:56 +0200 Subject: [PATCH 02/63] perf(mcp): add per-call, discovery, and oauth-refresh timing logs --- .../app/agents/new_chat/tools/mcp_tool.py | 76 ++++++++++++++++++- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py index 64368a878..b3c26f331 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py @@ -38,6 +38,9 @@ from app.agents.new_chat.tools.hitl import request_approval from app.agents.new_chat.tools.mcp_client import MCPClient from app.db import SearchSourceConnector from app.services.mcp_oauth.registry import MCP_SERVICES, get_service_by_connector_type +from app.utils.perf import get_perf_logger + +_perf_log = get_perf_logger() logger = logging.getLogger(__name__) @@ -293,15 +296,21 @@ async def _create_mcp_tool_from_definition_http( timeout: float = 60.0, ) -> str: """Execute a single MCP HTTP call with the given headers.""" + call_start = time.perf_counter() async with ( streamablehttp_client(url, headers=call_headers) as (read, write, _), ClientSession(read, write) as session, ): + init_start = time.perf_counter() await session.initialize() + init_elapsed = time.perf_counter() - init_start + + tool_start = time.perf_counter() response = await asyncio.wait_for( session.call_tool(original_tool_name, arguments=call_kwargs), timeout=timeout, ) + tool_elapsed = time.perf_counter() - tool_start result = [] for content in response.content: @@ -312,7 +321,18 @@ async def _create_mcp_tool_from_definition_http( else: result.append(str(content)) - return "\n".join(result) if result else "" + payload = "\n".join(result) if result else "" + + _perf_log.info( + "[mcp_http_call] connector=%s tool=%s init=%.3fs call=%.3fs total=%.3fs out_chars=%d", + connector_id, + original_tool_name, + init_elapsed, + tool_elapsed, + time.perf_counter() - call_start, + len(payload), + ) + return payload async def mcp_http_tool_call(**kwargs) -> str: """Execute the MCP tool call via HTTP transport.""" @@ -792,14 +812,25 @@ async def _maybe_refresh_mcp_oauth_token( except (ValueError, TypeError): return server_config + refresh_start = time.perf_counter() try: new_access = await _refresh_connector_token(session, connector) if not new_access: + _perf_log.info( + "[mcp_oauth_refresh] connector=%s elapsed=%.3fs outcome=no_token", + connector.id, + time.perf_counter() - refresh_start, + ) return server_config logger.info( "Proactively refreshed MCP OAuth token for connector %s", connector.id ) + _perf_log.info( + "[mcp_oauth_refresh] connector=%s elapsed=%.3fs outcome=refreshed", + connector.id, + time.perf_counter() - refresh_start, + ) refreshed_config = dict(server_config) refreshed_config["headers"] = { @@ -809,6 +840,11 @@ async def _maybe_refresh_mcp_oauth_token( return refreshed_config except Exception: + _perf_log.info( + "[mcp_oauth_refresh] connector=%s elapsed=%.3fs outcome=failed", + connector.id, + time.perf_counter() - refresh_start, + ) logger.warning( "Failed to refresh MCP OAuth token for connector %s", connector.id, @@ -1074,9 +1110,11 @@ async def load_mcp_tools( ) async def _discover_one(task: dict[str, Any]) -> list[StructuredTool]: + discover_start = time.perf_counter() + transport = task["transport"] try: - if task["transport"] in ("streamable-http", "http", "sse"): - return await asyncio.wait_for( + if transport in ("streamable-http", "http", "sse"): + result = await asyncio.wait_for( _load_http_mcp_tools( task["connector_id"], task["connector_name"], @@ -1091,7 +1129,7 @@ async def load_mcp_tools( timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, ) else: - return await asyncio.wait_for( + result = await asyncio.wait_for( _load_stdio_mcp_tools( task["connector_id"], task["connector_name"], @@ -1101,7 +1139,23 @@ async def load_mcp_tools( ), timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, ) + _perf_log.info( + "[mcp_discover] connector=%s name=%r transport=%s tools=%d elapsed=%.3fs", + task["connector_id"], + task["connector_name"], + transport, + len(result), + time.perf_counter() - discover_start, + ) + return result except TimeoutError: + _perf_log.info( + "[mcp_discover] connector=%s name=%r transport=%s elapsed=%.3fs outcome=timeout", + task["connector_id"], + task["connector_name"], + transport, + time.perf_counter() - discover_start, + ) logger.error( "MCP connector %d timed out after %ds during discovery", task["connector_id"], @@ -1109,6 +1163,13 @@ async def load_mcp_tools( ) return [] except Exception as e: + _perf_log.info( + "[mcp_discover] connector=%s name=%r transport=%s elapsed=%.3fs outcome=error", + task["connector_id"], + task["connector_name"], + transport, + time.perf_counter() - discover_start, + ) logger.exception( "Failed to load tools from MCP connector %d: %s", task["connector_id"], @@ -1116,7 +1177,14 @@ async def load_mcp_tools( ) return [] + gather_start = time.perf_counter() results = await asyncio.gather(*[_discover_one(t) for t in discovery_tasks]) + _perf_log.info( + "[mcp_discover] gather_wall=%.3fs connectors=%d total_tools=%d", + time.perf_counter() - gather_start, + len(discovery_tasks), + sum(len(r) for r in results), + ) tools: list[StructuredTool] = [tool for sublist in results for tool in sublist] _mcp_tools_cache[cache_key] = (now, tools) From 9e81f2a35bfeb6d2893b1707423cd3186d8b0bb6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:01 +0200 Subject: [PATCH 03/63] perf(subagent): add subagent compile timing log --- .../middleware.py | 51 ++++++++++++++++--- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py index 8f51ffed7..0119752c1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from typing import Any, cast from deepagents.backends.protocol import BackendFactory, BackendProtocol @@ -15,8 +16,12 @@ from langchain.agents import create_agent from langchain.chat_models import init_chat_model from langgraph.types import Checkpointer +from app.utils.perf import get_perf_logger + from .task_tool import build_task_tool_with_parent_config +_perf_log = get_perf_logger() + class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): """``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer.""" @@ -54,8 +59,11 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): def _surf_compile_subagent_graphs(self) -> list[dict[str, Any]]: """Mirror of ``SubAgentMiddleware._get_subagents`` that threads the parent checkpointer.""" specs: list[dict[str, Any]] = [] + loop_start = time.perf_counter() + timings: list[tuple[str, float, str]] = [] # (name, elapsed, source) for spec in self._subagents: + spec_start = time.perf_counter() if "runnable" in spec: compiled = cast(CompiledSubAgent, spec) specs.append( @@ -65,6 +73,9 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): "runnable": compiled["runnable"], } ) + timings.append( + (compiled["name"], time.perf_counter() - spec_start, "precompiled") + ) continue if "model" not in spec: @@ -79,20 +90,44 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): model = init_chat_model(model) middleware: list[Any] = list(spec.get("middleware", [])) + tools_count = len(spec.get("tools") or []) + mw_count = len(middleware) + compile_start = time.perf_counter() + runnable = create_agent( + model, + system_prompt=spec["system_prompt"], + tools=spec["tools"], + middleware=middleware, + name=spec["name"], + checkpointer=self._surf_checkpointer, + ) + compile_elapsed = time.perf_counter() - compile_start specs.append( { "name": spec["name"], "description": spec["description"], - "runnable": create_agent( - model, - system_prompt=spec["system_prompt"], - tools=spec["tools"], - middleware=middleware, - name=spec["name"], - checkpointer=self._surf_checkpointer, - ), + "runnable": runnable, } ) + timings.append( + ( + spec["name"], + compile_elapsed, + f"compiled tools={tools_count} mw={mw_count}", + ) + ) + + total_elapsed = time.perf_counter() - loop_start + per_subagent = ", ".join( + f"{name}={elapsed * 1000:.0f}ms[{source}]" + for name, elapsed, source in timings + ) + _perf_log.info( + "[subagent_compile] total=%.3fs count=%d details=[%s]", + total_elapsed, + len(timings), + per_subagent, + ) return specs From 33bfce4406b906191649792d61e3763520ff28e4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:05 +0200 Subject: [PATCH 04/63] perf(subagent): add atask EXIT breakdown timing log --- .../task_tool.py | 103 +++++++++++++----- 1 file changed, 78 insertions(+), 25 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py index f9b316e23..f6a9ff146 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py @@ -9,6 +9,7 @@ re-raises any new pending interrupt back to the parent. from __future__ import annotations import logging +import time from typing import Annotated, Any, NoReturn from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION @@ -19,6 +20,8 @@ from langchain_core.tools import StructuredTool from langgraph.errors import GraphInterrupt from langgraph.types import Command, Interrupt +from app.utils.perf import get_perf_logger + from .config import ( consume_surfsense_resume, drain_parent_null_resume, @@ -35,6 +38,7 @@ from .resume import ( ) logger = logging.getLogger(__name__) +_perf_log = get_perf_logger() def _reraise_stamped_subagent_interrupt( @@ -209,6 +213,7 @@ def build_task_tool_with_parent_config( ], runtime: ToolRuntime, ) -> str | Command: + atask_start = time.perf_counter() logger.info( "[hitl_route] atask ENTRY: subagent_type=%r tool_call_id=%s", subagent_type, @@ -230,8 +235,10 @@ def build_task_tool_with_parent_config( # Resume bridge — see ``task`` above. pending_id: str | None = None pending_value: Any = None + aget_state_elapsed = 0.0 aget_state = getattr(subagent, "aget_state", None) if callable(aget_state): + aget_state_start = time.perf_counter() try: snapshot = await aget_state(sub_config) pending_id, pending_value = get_first_pending_subagent_interrupt( @@ -248,32 +255,78 @@ def build_task_tool_with_parent_config( "Subagent aget_state failed; falling back to fresh ainvoke", exc_info=True, ) + finally: + aget_state_elapsed = time.perf_counter() - aget_state_start - if pending_value is not None: - resume_value = consume_surfsense_resume(runtime) - if resume_value is None: - raise RuntimeError( - f"Subagent {subagent_type!r} has a pending interrupt but no " - "surfsense_resume_value on config; resume bridge is broken." - ) - expected = hitlrequest_action_count(pending_value) - resume_value = fan_out_decisions_to_match(resume_value, expected) - # Prevent the parent's resume payload from leaking into subagent - # interrupts via langgraph's parent_scratchpad fallback. - drain_parent_null_resume(runtime) - try: - result = await subagent.ainvoke( - build_resume_command(resume_value, pending_id), - config=sub_config, - ) - except GraphInterrupt as gi: - _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) - else: - try: - result = await subagent.ainvoke(subagent_state, config=sub_config) - except GraphInterrupt as gi: - _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) - return _return_command_with_state_update(result, runtime.tool_call_id) + invoke_path = "resume" if pending_value is not None else "fresh" + ainvoke_start = time.perf_counter() + ainvoke_outcome = "ok" + try: + if pending_value is not None: + resume_value = consume_surfsense_resume(runtime) + if resume_value is None: + raise RuntimeError( + f"Subagent {subagent_type!r} has a pending interrupt but no " + "surfsense_resume_value on config; resume bridge is broken." + ) + expected = hitlrequest_action_count(pending_value) + resume_value = fan_out_decisions_to_match(resume_value, expected) + # Prevent the parent's resume payload from leaking into subagent + # interrupts via langgraph's parent_scratchpad fallback. + drain_parent_null_resume(runtime) + try: + result = await subagent.ainvoke( + build_resume_command(resume_value, pending_id), + config=sub_config, + ) + except GraphInterrupt as gi: + ainvoke_outcome = "interrupted" + _perf_log.info( + "[hitl_route] atask EXIT subagent_type=%r path=%s outcome=%s " + "aget_state=%.3fs ainvoke=%.3fs total=%.3fs", + subagent_type, + invoke_path, + ainvoke_outcome, + aget_state_elapsed, + time.perf_counter() - ainvoke_start, + time.perf_counter() - atask_start, + ) + _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) + else: + try: + result = await subagent.ainvoke(subagent_state, config=sub_config) + except GraphInterrupt as gi: + ainvoke_outcome = "interrupted" + _perf_log.info( + "[hitl_route] atask EXIT subagent_type=%r path=%s outcome=%s " + "aget_state=%.3fs ainvoke=%.3fs total=%.3fs", + subagent_type, + invoke_path, + ainvoke_outcome, + aget_state_elapsed, + time.perf_counter() - ainvoke_start, + time.perf_counter() - atask_start, + ) + _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) + ainvoke_elapsed = time.perf_counter() - ainvoke_start + except GraphInterrupt: + raise + + merge_start = time.perf_counter() + cmd = _return_command_with_state_update(result, runtime.tool_call_id) + merge_elapsed = time.perf_counter() - merge_start + _perf_log.info( + "[hitl_route] atask EXIT subagent_type=%r path=%s outcome=%s " + "aget_state=%.3fs ainvoke=%.3fs merge=%.3fs total=%.3fs", + subagent_type, + invoke_path, + ainvoke_outcome, + aget_state_elapsed, + ainvoke_elapsed, + merge_elapsed, + time.perf_counter() - atask_start, + ) + return cmd return StructuredTool.from_function( name="task", From bd153d3cdb9d0a4cf6086754189c01627a2d61f3 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:09 +0200 Subject: [PATCH 05/63] perf(multi-agent): add kb_context_projection timing log --- .../shared/kb_context_projection.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py index 01c77bb84..e8a4c9899 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from typing import Any from langchain.agents.middleware import AgentMiddleware, AgentState @@ -10,6 +11,9 @@ from langgraph.runtime import Runtime from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.new_chat.middleware.knowledge_search import _render_priority_message +from app.utils.perf import get_perf_logger + +_perf_log = get_perf_logger() class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] @@ -30,17 +34,34 @@ class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] runtime: Runtime[Any], ) -> dict[str, Any] | None: del runtime + start = time.perf_counter() tree_text = state.get("workspace_tree_text") priority = state.get("kb_priority") if not tree_text and not priority: + _perf_log.info( + "[kb_context_projection] tree=0 priority=0 elapsed=%.3fs", + time.perf_counter() - start, + ) return None messages = list(state.get("messages") or []) insert_at = max(len(messages) - 1, 0) + tree_chars = 0 if tree_text: + tree_chars = len(tree_text) messages.insert(insert_at, SystemMessage(content=tree_text)) + priority_count = 0 if priority: + priority_count = ( + len(priority) if hasattr(priority, "__len__") else 1 + ) messages.insert(insert_at, _render_priority_message(priority)) + _perf_log.info( + "[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs", + tree_chars, + priority_count, + time.perf_counter() - start, + ) return {"messages": messages} From 1df40fbe3136c2563558b76c10a705292f50193a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:14 +0200 Subject: [PATCH 06/63] perf(new-chat): add knowledge_tree middleware timing log --- .../new_chat/middleware/knowledge_tree.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py index 4ed1e53f4..6bd6430d1 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py +++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py @@ -24,6 +24,7 @@ from __future__ import annotations import asyncio import logging +import time from typing import Any from langchain.agents.middleware import AgentMiddleware, AgentState @@ -41,6 +42,9 @@ from app.agents.new_chat.path_resolver import ( doc_to_virtual_path, ) from app.db import Document, shielded_async_session +from app.utils.perf import get_perf_logger + +_perf_log = get_perf_logger() try: from litellm import token_counter @@ -124,6 +128,7 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg] if self.filesystem_mode != FilesystemMode.CLOUD: return None + start = time.perf_counter() update: dict[str, Any] = {} if not state.get("cwd"): update["cwd"] = DOCUMENTS_ROOT @@ -131,7 +136,11 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg] anon_doc = state.get("kb_anon_doc") if anon_doc: tree_msg = self._render_anon_tree(anon_doc) + cache_outcome = "anon" else: + version = int(state.get("tree_version") or 0) + cache_key = (self.search_space_id, version, False) + cache_outcome = "hit" if cache_key in self._cache else "miss" tree_msg = await self._render_kb_tree(state) update["workspace_tree_text"] = tree_msg @@ -141,6 +150,14 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg] insert_at = max(len(messages) - 1, 0) messages.insert(insert_at, SystemMessage(content=tree_msg)) update["messages"] = messages + + _perf_log.info( + "[knowledge_tree] cache=%s chars=%d elapsed=%.3fs space=%d", + cache_outcome, + len(tree_msg), + time.perf_counter() - start, + self.search_space_id, + ) return update def before_agent( # type: ignore[override] From b3b66e4c487bc899a0e58efca8b72b127861a976 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:19 +0200 Subject: [PATCH 07/63] perf(new-chat): add memory_injection middleware timing log --- .../new_chat/middleware/memory_injection.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py index 6179adccd..659d1bea2 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py +++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py @@ -8,6 +8,7 @@ Injects memory markdown into the system prompt on every turn: from __future__ import annotations import logging +import time from typing import Any from uuid import UUID @@ -19,8 +20,10 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT from app.db import ChatVisibility, SearchSpace, User, shielded_async_session +from app.utils.perf import get_perf_logger logger = logging.getLogger(__name__) +_perf_log = get_perf_logger() class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] @@ -53,9 +56,13 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] if not isinstance(last_message, HumanMessage): return None + start = time.perf_counter() + db_elapsed = 0.0 memory_blocks: list[str] = [] + scope = "team" if self.visibility == ChatVisibility.SEARCH_SPACE else "user" async with shielded_async_session() as session: + db_start = time.perf_counter() if self.visibility == ChatVisibility.SEARCH_SPACE: team_memory = await self._load_team_memory(session) if team_memory: @@ -96,7 +103,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] f"" ) + db_elapsed = time.perf_counter() - db_start + if not memory_blocks: + _perf_log.info( + "[memory_injection] scope=%s injected=0 db=%.3fs total=%.3fs", + scope, + db_elapsed, + time.perf_counter() - start, + ) return None memory_text = "\n\n".join(memory_blocks) @@ -106,6 +121,13 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] insert_idx = 1 if len(new_messages) > 1 else 0 new_messages.insert(insert_idx, memory_msg) + _perf_log.info( + "[memory_injection] scope=%s injected=1 chars=%d db=%.3fs total=%.3fs", + scope, + len(memory_text), + db_elapsed, + time.perf_counter() - start, + ) return {"messages": new_messages} async def _load_user_memory( From 581bbfb5c1301d84afa1f6c402ac724657894cd1 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:25 +0200 Subject: [PATCH 08/63] perf(tokens): add per-call latency to capture log --- .../app/services/token_tracking_service.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 9406d9be4..58d06ba31 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -357,9 +357,19 @@ class TokenTrackingCallback(CustomLogger): cost_micros=cost_micros, call_kind=call_kind, ) + + # Per-LLM-call wall-clock latency (LiteLLM passes datetime objects). + call_latency_s: float | None = None + try: + if start_time is not None and end_time is not None: + delta = end_time - start_time + call_latency_s = getattr(delta, "total_seconds", lambda: float(delta))() + except Exception: + call_latency_s = None + logger.info( "[TokenTracking] Captured: model=%s kind=%s prompt=%d completion=%d total=%d " - "cost=$%.6f (%d micros) (accumulator now has %d calls)", + "cost=$%.6f (%d micros) (accumulator now has %d calls)%s", model, call_kind, prompt_tokens, @@ -368,6 +378,7 @@ class TokenTrackingCallback(CustomLogger): cost_usd, cost_micros, len(acc.calls), + f" latency={call_latency_s:.3f}s" if call_latency_s is not None else "", ) From 3a5e16e868b152fbfe895290f99f9b8078c6a2ae Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:28 +0200 Subject: [PATCH 09/63] perf(calendar): stop echoing raw events into evidence.items --- .../subagents/connectors/calendar/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md index 5b27c18ba..a663f5b37 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md @@ -29,7 +29,7 @@ You are a Google Calendar specialist for the user's connected calendar. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Calendar tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `event_id`, `title` / `summary`, `start_at`, `end_at`, and `html_link` inside `evidence` when the tool returned them. For `search_calendar_events`, place the raw `events` array inside `evidence.items`. Never invent a field the tool did not return. +Surface the tool's `event_id`, `title` / `summary`, `start_at`, `end_at`, and `html_link` inside `evidence` when the tool returned them. For `search_calendar_events`, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; one line per event; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples @@ -115,7 +115,7 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. -- For `search_calendar_events` results, populate `evidence.items` with `{ "events": [...], "total": N }`. +- For `search_calendar_events` results, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; up to 10 entries, then `"...and N more"`). - For ambiguous matches across `update_calendar_event` / `delete_calendar_event`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`, where `label` should include the event title and start time for human readability). Infer before you call; map every tool outcome faithfully. From 148139401722f05efe960ac676d974ed92a6d803 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:34 +0200 Subject: [PATCH 10/63] chore(scripts): add MCP session lifetime probe --- .../scripts/probe_mcp_session_lifetime.py | 563 ++++++++++++++++++ 1 file changed, 563 insertions(+) create mode 100644 surfsense_backend/scripts/probe_mcp_session_lifetime.py diff --git a/surfsense_backend/scripts/probe_mcp_session_lifetime.py b/surfsense_backend/scripts/probe_mcp_session_lifetime.py new file mode 100644 index 000000000..66be5bc14 --- /dev/null +++ b/surfsense_backend/scripts/probe_mcp_session_lifetime.py @@ -0,0 +1,563 @@ +"""Probe MCP server session lifetime / staleness behavior — read-only. + +Goal +---- +Empirically answer two questions for our actual third-party MCP servers +(Atlassian, Linear, Slack, ClickUp, Airtable, ...): + +1. How expensive is the initial ``initialize`` handshake (``init=`` cost)? +2. How long can a ``ClientSession`` sit idle and still survive a + subsequent ``list_tools()`` call? + +This script informs the design choice between + +* per-call sessions (current, ~1s init tax per call), +* per-turn session reuse (LangChain-style, holds a session for the + duration of a chat turn), +* a long-lived session pool (IBM-style, sessions reused across turns). + +The probe is read-only: it only ever calls ``session.list_tools()``, +which is the safest MCP method. No tool calls against user data are +performed. + +Usage +----- +Run from the repo root or from ``surfsense_backend/``:: + + uv run python -m scripts.probe_mcp_session_lifetime + uv run python -m scripts.probe_mcp_session_lifetime --quick + uv run python -m scripts.probe_mcp_session_lifetime --connectors 7,19,20 + uv run python -m scripts.probe_mcp_session_lifetime --intervals 5,30,60,300 + +Output +------ +* Live progress to stderr (``[connector=7 t=+30s] OK 0.142s``). +* Final per-connector table to stdout. +* Raw results JSON to ``./mcp_session_probe_.json``. + +The default test reaches 1800s of idle (~30 min). Use ``--quick`` to +stop at 60s for fast iteration. All connectors probe concurrently so +total wall-clock time equals the longest interval, not the sum. +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import logging +import os +import sys +import time +from dataclasses import asdict, dataclass, field +from datetime import datetime +from typing import Any + +_HERE = os.path.dirname(os.path.abspath(__file__)) +_BACKEND_ROOT = os.path.dirname(_HERE) +if _BACKEND_ROOT not in sys.path: + sys.path.insert(0, _BACKEND_ROOT) + +import httpx # noqa: E402 +from mcp import ClientSession # noqa: E402 +from mcp.client.streamable_http import streamable_http_client # noqa: E402 +from sqlalchemy import cast, select # noqa: E402 +from sqlalchemy.dialects.postgresql import JSONB # noqa: E402 + +from app.agents.new_chat.tools.mcp_tool import ( # noqa: E402 + _inject_oauth_headers, + _maybe_refresh_mcp_oauth_token, +) +from app.db import SearchSourceConnector, async_session_maker # noqa: E402 + +logging.basicConfig( + level=logging.WARNING, + format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s", + stream=sys.stderr, +) +logging.getLogger("httpx").setLevel(logging.ERROR) +logging.getLogger("mcp").setLevel(logging.ERROR) +logger = logging.getLogger("mcp_probe") +logger.setLevel(logging.INFO) + + +DEFAULT_INTERVALS_SECONDS = [5, 30, 60, 300, 900, 1800] +QUICK_INTERVALS_SECONDS = [5, 30, 60] +PER_CALL_TIMEOUT_SECONDS = 60.0 + + +@dataclass +class CheckpointResult: + """One ``list_tools()`` call against a long-lived session.""" + + idle_seconds_target: int + elapsed_since_open_seconds: float + elapsed_since_last_call_seconds: float + success: bool + latency_seconds: float | None + tools_returned: int | None + error_type: str | None + error_message: str | None + + +@dataclass +class ConnectorProbeResult: + """Per-connector aggregated probe outcome.""" + + connector_id: int + connector_name: str + connector_type: str + url: str + init_latency_seconds: float | None + first_call_latency_seconds: float | None + checkpoints: list[CheckpointResult] = field(default_factory=list) + fatal_error: str | None = None + + +# --------------------------------------------------------------------------- +# Connector loading + auth +# --------------------------------------------------------------------------- + + +async def _fetch_connectors( + connector_ids: list[int] | None, +) -> list[SearchSourceConnector]: + """Pull every MCP-shaped connector (or only the requested IDs).""" + async with async_session_maker() as session: + stmt = select(SearchSourceConnector).filter( + cast(SearchSourceConnector.config, JSONB).has_key("server_config"), + ) + if connector_ids: + stmt = stmt.filter(SearchSourceConnector.id.in_(connector_ids)) + result = await session.execute(stmt) + connectors = list(result.scalars()) + + if connector_ids: + found_ids = {c.id for c in connectors} + missing = [cid for cid in connector_ids if cid not in found_ids] + if missing: + logger.warning("Requested connector IDs not found: %s", missing) + return connectors + + +async def _resolve_authed_server_config( + connector: SearchSourceConnector, +) -> dict[str, Any] | None: + """Refresh OAuth (if needed) and return a server_config with auth headers. + + Returns ``None`` if the connector cannot be probed (missing url, + decrypt failure, no refresh token, etc.). + """ + cfg = connector.config or {} + server_config = cfg.get("server_config", {}) + if not isinstance(server_config, dict): + return None + + if cfg.get("mcp_oauth"): + async with async_session_maker() as session: + attached = await session.get(SearchSourceConnector, connector.id) + if attached is None: + return None + refreshed = await _maybe_refresh_mcp_oauth_token( + session, + attached, + attached.config or {}, + server_config, + ) + attached_cfg = attached.config or {} + server_config = _inject_oauth_headers(attached_cfg, refreshed) + if server_config is None: + return None + return server_config + + +# --------------------------------------------------------------------------- +# The actual probe +# --------------------------------------------------------------------------- + + +def _classify_error(exc: BaseException) -> tuple[str, str]: + """Return ``(short_label, human_message)`` for a failed call.""" + name = type(exc).__name__ + msg = str(exc) or repr(exc) + if isinstance(exc, asyncio.TimeoutError): + return "timeout", f"call exceeded {PER_CALL_TIMEOUT_SECONDS}s" + if "404" in msg or "Not Found" in msg or "session" in msg.lower(): + return "session_expired", msg + if "401" in msg or "Unauthorized" in msg: + return "auth_401", msg + if "ClosedResourceError" in name or "Closed" in name: + return "stream_closed", msg + if "Connection" in name or "ConnectError" in name: + return "connection_error", msg + return name, msg + + +async def _probe_one_connector( + connector: SearchSourceConnector, + intervals: list[int], +) -> ConnectorProbeResult: + """Open a single long-lived session, call ``list_tools`` at each interval.""" + connector_type = ( + connector.connector_type.value + if hasattr(connector.connector_type, "value") + else str(connector.connector_type) + ) + server_config = await _resolve_authed_server_config(connector) + if server_config is None: + return ConnectorProbeResult( + connector_id=connector.id, + connector_name=connector.name, + connector_type=connector_type, + url="(unresolved)", + init_latency_seconds=None, + first_call_latency_seconds=None, + fatal_error="failed_to_resolve_server_config", + ) + + url = server_config.get("url") + headers = server_config.get("headers", {}) + if not url: + return ConnectorProbeResult( + connector_id=connector.id, + connector_name=connector.name, + connector_type=connector_type, + url="(missing)", + init_latency_seconds=None, + first_call_latency_seconds=None, + fatal_error="missing_url", + ) + + transport = server_config.get("transport", "streamable-http") + if transport not in ("streamable-http", "http", "sse"): + return ConnectorProbeResult( + connector_id=connector.id, + connector_name=connector.name, + connector_type=connector_type, + url=url, + init_latency_seconds=None, + first_call_latency_seconds=None, + fatal_error=f"unsupported_transport:{transport}", + ) + + result = ConnectorProbeResult( + connector_id=connector.id, + connector_name=connector.name, + connector_type=connector_type, + url=url, + init_latency_seconds=None, + first_call_latency_seconds=None, + ) + + open_started = time.perf_counter() + last_call_at: float | None = None + + # Manually drive the context-manager protocol so the session lives + # across our sleep intervals. ``streamable_http_client`` spawns a + # background task for the SSE receive loop; ``ClientSession`` spawns + # another for request multiplexing. We must close them in reverse order. + http_client = httpx.AsyncClient(headers=headers, timeout=PER_CALL_TIMEOUT_SECONDS) + transport_cm = None + session_cm = None + session = None + try: + transport_cm = streamable_http_client(url, http_client=http_client) + read, write, _ = await transport_cm.__aenter__() + session_cm = ClientSession(read, write) + session = await session_cm.__aenter__() + + init_start = time.perf_counter() + await asyncio.wait_for(session.initialize(), timeout=PER_CALL_TIMEOUT_SECONDS) + result.init_latency_seconds = time.perf_counter() - init_start + logger.info( + "[connector=%s name=%r] init=%.3fs", + connector.id, + connector.name, + result.init_latency_seconds, + ) + + first_call_start = time.perf_counter() + first_response = await asyncio.wait_for( + session.list_tools(), timeout=PER_CALL_TIMEOUT_SECONDS + ) + result.first_call_latency_seconds = time.perf_counter() - first_call_start + last_call_at = time.perf_counter() + logger.info( + "[connector=%s name=%r] first_call=%.3fs tools=%d", + connector.id, + connector.name, + result.first_call_latency_seconds, + len(first_response.tools), + ) + + for interval in intervals: + target_elapsed = open_started + ( + result.init_latency_seconds + result.first_call_latency_seconds + interval + ) + sleep_for = max(0.0, target_elapsed - time.perf_counter()) + await asyncio.sleep(sleep_for) + + call_start = time.perf_counter() + elapsed_since_open = call_start - open_started + elapsed_since_last = call_start - (last_call_at or call_start) + try: + response = await asyncio.wait_for( + session.list_tools(), timeout=PER_CALL_TIMEOUT_SECONDS + ) + latency = time.perf_counter() - call_start + last_call_at = time.perf_counter() + checkpoint = CheckpointResult( + idle_seconds_target=interval, + elapsed_since_open_seconds=round(elapsed_since_open, 3), + elapsed_since_last_call_seconds=round(elapsed_since_last, 3), + success=True, + latency_seconds=round(latency, 3), + tools_returned=len(response.tools), + error_type=None, + error_message=None, + ) + logger.info( + "[connector=%s t=+%ds] OK %.3fs (tools=%d)", + connector.id, + interval, + latency, + len(response.tools), + ) + result.checkpoints.append(checkpoint) + except Exception as exc: # noqa: BLE001 + label, msg = _classify_error(exc) + latency_at_failure = time.perf_counter() - call_start + checkpoint = CheckpointResult( + idle_seconds_target=interval, + elapsed_since_open_seconds=round(elapsed_since_open, 3), + elapsed_since_last_call_seconds=round(elapsed_since_last, 3), + success=False, + latency_seconds=round(latency_at_failure, 3), + tools_returned=None, + error_type=label, + error_message=msg[:300], + ) + logger.warning( + "[connector=%s t=+%ds] FAILED %s after %.3fs: %s", + connector.id, + interval, + label, + latency_at_failure, + msg[:200], + ) + result.checkpoints.append(checkpoint) + # Session is presumed dead — further checkpoints would all + # fail the same way and just waste wall time. + break + + except Exception as exc: # noqa: BLE001 + label, msg = _classify_error(exc) + result.fatal_error = f"{label}: {msg[:200]}" + logger.exception( + "[connector=%s] fatal during open/init: %s", + connector.id, + exc, + ) + finally: + if session_cm is not None: + try: + await session_cm.__aexit__(None, None, None) + except Exception: + pass + if transport_cm is not None: + try: + await transport_cm.__aexit__(None, None, None) + except Exception: + pass + try: + await http_client.aclose() + except Exception: + pass + + return result + + +# --------------------------------------------------------------------------- +# Reporting +# --------------------------------------------------------------------------- + + +def _render_table(results: list[ConnectorProbeResult]) -> str: + """Pretty-print a per-connector summary suitable for the terminal.""" + lines: list[str] = [] + lines.append("=" * 100) + lines.append("MCP Session Lifetime Probe Results") + lines.append("=" * 100) + + for result in results: + lines.append("") + lines.append( + f"Connector {result.connector_id} | {result.connector_type} | " + f"{result.connector_name!r}" + ) + lines.append(f" url: {result.url}") + if result.fatal_error: + lines.append(f" FATAL: {result.fatal_error}") + continue + lines.append( + f" init handshake: " + f"{result.init_latency_seconds:.3f}s" + if result.init_latency_seconds is not None + else " init handshake: (failed)" + ) + lines.append( + f" first list_tools (cold): " + f"{result.first_call_latency_seconds:.3f}s" + if result.first_call_latency_seconds is not None + else " first list_tools: (failed)" + ) + if not result.checkpoints: + lines.append(" (no idle checkpoints recorded)") + continue + lines.append( + f" {'idle_s':>8} | {'since_last':>10} | {'outcome':>16} | " + f"{'latency':>9} | {'tools':>5}" + ) + for cp in result.checkpoints: + outcome = "OK" if cp.success else (cp.error_type or "FAIL") + latency = f"{cp.latency_seconds:.3f}s" if cp.latency_seconds is not None else "-" + tools = str(cp.tools_returned) if cp.tools_returned is not None else "-" + lines.append( + f" {cp.idle_seconds_target:>8} | " + f"{cp.elapsed_since_last_call_seconds:>10.1f} | " + f"{outcome:>16} | " + f"{latency:>9} | " + f"{tools:>5}" + ) + + lines.append("") + lines.append("=" * 100) + lines.append("Summary") + lines.append("=" * 100) + survived: dict[int, list[int]] = {} + for result in results: + for cp in result.checkpoints: + if cp.success: + survived.setdefault(cp.idle_seconds_target, []).append( + result.connector_id + ) + if survived: + for interval in sorted(survived): + ids = sorted(survived[interval]) + lines.append( + f" Idle {interval:>5}s: {len(ids)}/{len(results)} connectors " + f"survived ({ids})" + ) + else: + lines.append(" (no successful checkpoints)") + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def _parse_int_list(value: str) -> list[int]: + return [int(x) for x in value.split(",") if x.strip()] + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Probe MCP server session lifetime (read-only)", + ) + parser.add_argument( + "--connectors", + type=_parse_int_list, + default=None, + help="Comma-separated connector IDs to probe. Default: all MCP connectors.", + ) + parser.add_argument( + "--intervals", + type=_parse_int_list, + default=None, + help="Comma-separated idle intervals in seconds. " + f"Default: {DEFAULT_INTERVALS_SECONDS}", + ) + parser.add_argument( + "--quick", + action="store_true", + help=f"Short run (intervals={QUICK_INTERVALS_SECONDS}) for fast iteration.", + ) + parser.add_argument( + "--output", + type=str, + default=None, + help="Optional path for the raw JSON results.", + ) + return parser.parse_args() + + +async def _async_main() -> int: + args = _parse_args() + if args.intervals is not None: + intervals = args.intervals + elif args.quick: + intervals = QUICK_INTERVALS_SECONDS + else: + intervals = DEFAULT_INTERVALS_SECONDS + + longest = max(intervals) if intervals else 0 + logger.info( + "Probing intervals=%s (longest=%ds, ~%dmin total wall time)", + intervals, + longest, + (longest + 30) // 60, + ) + + connectors = await _fetch_connectors(args.connectors) + if not connectors: + logger.error("No MCP connectors found to probe.") + return 2 + logger.info( + "Probing %d connector(s): %s", + len(connectors), + [f"{c.id}:{c.name}" for c in connectors], + ) + + started_at = time.time() + results = await asyncio.gather( + *[_probe_one_connector(c, intervals) for c in connectors], + return_exceptions=False, + ) + elapsed = time.time() - started_at + logger.info("All probes complete in %.1fs", elapsed) + + table = _render_table(results) + print(table) + + output_path = ( + args.output + or f"mcp_session_probe_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" + ) + with open(output_path, "w", encoding="utf-8") as fp: + json.dump( + { + "started_at": datetime.fromtimestamp(started_at).isoformat(), + "elapsed_seconds": round(elapsed, 1), + "intervals_tested": intervals, + "results": [asdict(r) for r in results], + }, + fp, + indent=2, + ) + logger.info("Raw results saved to %s", output_path) + return 0 + + +def main() -> None: + try: + exit_code = asyncio.run(_async_main()) + except KeyboardInterrupt: + logger.warning("Interrupted by user") + exit_code = 130 + sys.exit(exit_code) + + +if __name__ == "__main__": + main() From ceedd02353c74f063188fc103500fb848909a810 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 02:01:36 +0530 Subject: [PATCH 11/63] refactor: extract shared memory service --- .../builtins/memory/tools/update_memory.py | 351 ++------------- .../app/agents/new_chat/memory_extraction.py | 196 +-------- .../agents/new_chat/tools/update_memory.py | 414 ++---------------- .../app/services/memory/__init__.py | 29 ++ .../app/services/memory/prompts.py | 110 +++++ .../app/services/memory/rewrite.py | 35 ++ .../app/services/memory/schemas.py | 23 + .../app/services/memory/service.py | 300 +++++++++++++ .../app/services/memory/validation.py | 158 +++++++ .../unit/services/test_memory_service.py | 204 +++++++++ 10 files changed, 946 insertions(+), 874 deletions(-) create mode 100644 surfsense_backend/app/services/memory/__init__.py create mode 100644 surfsense_backend/app/services/memory/prompts.py create mode 100644 surfsense_backend/app/services/memory/rewrite.py create mode 100644 surfsense_backend/app/services/memory/schemas.py create mode 100644 surfsense_backend/app/services/memory/service.py create mode 100644 surfsense_backend/app/services/memory/validation.py create mode 100644 surfsense_backend/tests/unit/services/test_memory_service.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py index 23375a081..67bcc3e06 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py @@ -1,280 +1,23 @@ -"""Overwrite one markdown memory document per user or team, with size and shrink guards.""" +"""Memory update tools backed by the canonical memory service.""" from __future__ import annotations import logging -import re -from typing import Any, Literal +from typing import Any from uuid import UUID -from langchain_core.messages import HumanMessage from langchain_core.tools import tool -from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.db import SearchSpace, User +from app.services.memory import ( + MEMORY_HARD_LIMIT, + MEMORY_SOFT_LIMIT, + MemoryScope, + save_memory, +) logger = logging.getLogger(__name__) -MEMORY_SOFT_LIMIT = 18_000 -MEMORY_HARD_LIMIT = 25_000 - -_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) -_HEADING_NORMALIZE_RE = re.compile(r"\s+") - -_MARKER_RE = re.compile(r"\[(fact|pref|instr)\]") -_BULLET_FORMAT_RE = re.compile(r"^- \(\d{4}-\d{2}-\d{2}\) \[(fact|pref|instr)\] .+$") -_PERSONAL_ONLY_MARKERS = {"pref", "instr"} - - -# --------------------------------------------------------------------------- -# Diff validation -# --------------------------------------------------------------------------- - - -def _extract_headings(memory: str) -> set[str]: - """Return all ``## …`` heading texts (without the ``## `` prefix).""" - return set(_SECTION_HEADING_RE.findall(memory)) - - -def _normalize_heading(heading: str) -> str: - """Normalize heading text for robust scope checks.""" - return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower()) - - -def _validate_memory_scope( - content: str, scope: Literal["user", "team"] -) -> dict[str, Any] | None: - """Reject personal-only markers ([pref], [instr]) in team memory.""" - if scope != "team": - return None - - markers = set(_MARKER_RE.findall(content)) - leaked = sorted(markers & _PERSONAL_ONLY_MARKERS) - if leaked: - tags = ", ".join(f"[{m}]" for m in leaked) - return { - "status": "error", - "message": ( - f"Team memory cannot include personal markers: {tags}. " - "Use [fact] only in team memory." - ), - } - return None - - -def _validate_bullet_format(content: str) -> list[str]: - """Return warnings for bullet lines that don't match the required format. - - Expected: ``- (YYYY-MM-DD) [fact|pref|instr] text`` - """ - warnings: list[str] = [] - for line in content.splitlines(): - stripped = line.strip() - if not stripped.startswith("- "): - continue - if not _BULLET_FORMAT_RE.match(stripped): - short = stripped[:80] + ("..." if len(stripped) > 80 else "") - warnings.append(f"Malformed bullet: {short}") - return warnings - - -def _validate_diff(old_memory: str | None, new_memory: str) -> list[str]: - """Return a list of warning strings about suspicious changes.""" - if not old_memory: - return [] - - warnings: list[str] = [] - old_headings = _extract_headings(old_memory) - new_headings = _extract_headings(new_memory) - dropped = old_headings - new_headings - if dropped: - names = ", ".join(sorted(dropped)) - warnings.append( - f"Sections removed: {names}. " - "If unintentional, the user can restore from the settings page." - ) - - old_len = len(old_memory) - new_len = len(new_memory) - if old_len > 0 and new_len < old_len * 0.4: - warnings.append( - f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). " - "Possible data loss." - ) - return warnings - - -# --------------------------------------------------------------------------- -# Size validation & soft warning -# --------------------------------------------------------------------------- - - -def _validate_memory_size(content: str) -> dict[str, Any] | None: - """Return an error/warning dict if *content* is too large, else None.""" - length = len(content) - if length > MEMORY_HARD_LIMIT: - return { - "status": "error", - "message": ( - f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit " - f"({length:,} chars). Consolidate by merging related items, " - "removing outdated entries, and shortening descriptions. " - "Then call update_memory again." - ), - } - return None - - -def _soft_warning(content: str) -> str | None: - """Return a warning string if content exceeds the soft limit.""" - length = len(content) - if length > MEMORY_SOFT_LIMIT: - return ( - f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. " - "Consolidate by merging related items and removing less important " - "entries on your next update." - ) - return None - - -# --------------------------------------------------------------------------- -# Forced rewrite when memory exceeds the hard limit -# --------------------------------------------------------------------------- - -_FORCED_REWRITE_PROMPT = """\ -You are a memory curator. The following memory document exceeds the character \ -limit and must be shortened. - -RULES: -1. Rewrite the document to be under {target} characters. -2. Preserve existing ## headings. Every entry must remain under a heading. You may merge - or rename headings to consolidate, but keep names personal and descriptive. -3. Priority for keeping content: [instr] > [pref] > [fact]. -4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. -5. Every bullet MUST have format: - (YYYY-MM-DD) [fact|pref|instr] text -6. Preserve the user's first name in entries — do not replace it with "the user". -7. Output ONLY the consolidated markdown — no explanations, no wrapping. - - -{content} -""" - - -async def _forced_rewrite(content: str, llm: Any) -> str | None: - """Use a focused LLM call to compress *content* under the hard limit. - - Returns the rewritten string, or ``None`` if the call fails. - """ - try: - prompt = _FORCED_REWRITE_PROMPT.format( - target=MEMORY_HARD_LIMIT, content=content - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal"]}, - ) - text = ( - response.content - if isinstance(response.content, str) - else str(response.content) - ) - return text.strip() - except Exception: - logger.exception("Forced rewrite LLM call failed") - return None - - -# --------------------------------------------------------------------------- -# Shared save-and-respond logic -# --------------------------------------------------------------------------- - - -async def _save_memory( - *, - updated_memory: str, - old_memory: str | None, - llm: Any | None, - apply_fn, - commit_fn, - rollback_fn, - label: str, - scope: Literal["user", "team"], -) -> dict[str, Any]: - """Validate, optionally force-rewrite if over the hard limit, save, and - return a response dict. - - Parameters - ---------- - updated_memory : str - The new document the agent submitted. - old_memory : str | None - The previously persisted document (for diff checks). - llm : Any | None - LLM instance for forced rewrite (may be ``None``). - apply_fn : callable(str) -> None - Callback that sets the new memory on the ORM object. - commit_fn : coroutine - ``session.commit``. - rollback_fn : coroutine - ``session.rollback``. - label : str - Human label for log messages (e.g. "user memory", "team memory"). - """ - content = updated_memory - - # --- forced rewrite if over the hard limit --- - if len(content) > MEMORY_HARD_LIMIT and llm is not None: - rewritten = await _forced_rewrite(content, llm) - if rewritten is not None and len(rewritten) < len(content): - content = rewritten - - # --- hard-limit gate (reject if still too large after rewrite) --- - size_err = _validate_memory_size(content) - if size_err: - return size_err - - scope_err = _validate_memory_scope(content, scope) - if scope_err: - return scope_err - - # --- persist --- - try: - apply_fn(content) - await commit_fn() - except Exception as e: - logger.exception("Failed to update %s: %s", label, e) - await rollback_fn() - return {"status": "error", "message": f"Failed to update {label}: {e}"} - - # --- build response --- - resp: dict[str, Any] = { - "status": "saved", - "message": f"{label.capitalize()} updated.", - } - - if content is not updated_memory: - resp["notice"] = "Memory was automatically rewritten to fit within limits." - - diff_warnings = _validate_diff(old_memory, content) - if diff_warnings: - resp["diff_warnings"] = diff_warnings - - format_warnings = _validate_bullet_format(content) - if format_warnings: - resp["format_warnings"] = format_warnings - - warning = _soft_warning(content) - if warning: - resp["warning"] = warning - - return resp - - -# --------------------------------------------------------------------------- -# Tool factories -# --------------------------------------------------------------------------- - def create_update_memory_tool( user_id: str | UUID, @@ -287,40 +30,22 @@ def create_update_memory_tool( async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the user's personal memory document. - Your current memory is shown in in the system prompt. - When the user shares important long-term information (preferences, - facts, instructions, context), rewrite the memory document to include - the new information. Merge new facts with existing ones, update - contradictions, remove outdated entries, and keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: - result = await db_session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return {"status": "error", "message": "User not found."} - - old_memory = user.memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.USER, + target_id=uid, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="memory", - scope="user", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update user memory: %s", e) await db_session.rollback() - return { - "status": "error", - "message": f"Failed to update memory: {e}", - } + return {"status": "error", "message": f"Failed to update memory: {e}"} return update_memory @@ -334,36 +59,18 @@ def create_update_team_memory_tool( async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the team's shared memory document for this search space. - Your current team memory is shown in in the system - prompt. When the team shares important long-term information - (decisions, conventions, key facts, priorities), rewrite the memory - document to include the new information. Merge new facts with - existing ones, update contradictions, remove outdated entries, and - keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current team memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: - result = await db_session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return {"status": "error", "message": "Search space not found."} - - old_memory = space.shared_memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr(space, "shared_memory_md", content), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="team memory", - scope="team", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update team memory: %s", e) await db_session.rollback() @@ -373,3 +80,11 @@ def create_update_team_memory_tool( } return update_memory + + +__all__ = [ + "MEMORY_HARD_LIMIT", + "MEMORY_SOFT_LIMIT", + "create_update_memory_tool", + "create_update_team_memory_tool", +] diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py index e31774a7c..d44b58f7b 100644 --- a/surfsense_backend/app/agents/new_chat/memory_extraction.py +++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py @@ -1,9 +1,4 @@ -"""Background memory extraction for the SurfSense agent. - -After each agent response, if the agent did not call ``update_memory`` during -the turn, this module can run a lightweight LLM call to decide whether the -latest message contains long-term information worth persisting. -""" +"""Background memory extraction for the SurfSense agent.""" from __future__ import annotations @@ -11,102 +6,11 @@ import logging from typing import Any from uuid import UUID -from langchain_core.messages import HumanMessage -from sqlalchemy import select - -from app.agents.new_chat.tools.update_memory import _save_memory -from app.db import SearchSpace, User, shielded_async_session -from app.utils.content_utils import extract_text_content +from app.db import User, shielded_async_session +from app.services.memory import MemoryScope, extract_and_save logger = logging.getLogger(__name__) -_MEMORY_EXTRACT_PROMPT = """\ -You are a memory extraction assistant. Analyze the user's message and decide \ -if it contains any long-term information worth persisting to memory. - -Worth remembering: preferences, background/identity, goals, projects, \ -instructions, tools/languages they use, decisions, expertise, workplace — \ -durable facts that will matter in future conversations. - -NOT worth remembering: greetings, one-off factual questions, session \ -logistics, ephemeral requests, follow-up clarifications with no new personal \ -info, things that only matter for the current task. - -If the message contains memorizable information, output the FULL updated \ -memory document with the new facts merged into the existing content. Follow \ -these rules: -- Every entry MUST be under a ## heading. Preserve existing headings; create new ones - freely. Keep heading names short (2-3 words) and natural. Do NOT include the user's - name in headings. -- Keep entries as single bullet points. Be descriptive but concise — include relevant - details and context rather than just a few words. -- Every bullet MUST use format: - (YYYY-MM-DD) [fact|pref|instr] text - [fact] = durable facts, [pref] = preferences, [instr] = standing instructions. -- Use the user's first name (from ) in entry text, not "the user". -- If a new fact contradicts an existing entry, update the existing entry. -- Do not duplicate information that is already present. - -If nothing is worth remembering, output exactly: NO_UPDATE - -{user_name} - - -{current_memory} - - - -{user_message} -""" - -_TEAM_MEMORY_EXTRACT_PROMPT = """\ -You are a team-memory extraction assistant. Analyze the latest message and \ -decide if it contains durable TEAM-level information worth persisting. - -Decision policy: -- Prioritize recall for durable team context, while avoiding personal-only facts. -- Do NOT require explicit consensus language. A direct team-level statement can - be stored if it is stable and broadly useful for future team chats. -- If evidence is weak or clearly tentative, output NO_UPDATE. - -Worth remembering (team-level only): -- Decisions and defaults that guide future team work -- Team conventions/standards (naming, review policy, coding norms) -- Stable org/project facts (locations, ownership, constraints) -- Long-lived architecture/process facts -- Ongoing priorities that are likely relevant beyond this turn - -NOT worth remembering: -- Personal preferences or biography of one person -- Questions, brainstorming, tentative ideas, or speculation -- One-off requests, status updates, TODOs, logistics for this session -- Information scoped only to a single ephemeral task - -If the message contains memorizable team information, output the FULL updated \ -team memory document with new facts merged into existing content. Follow rules: -- Every entry MUST be under a ## heading. Preserve existing headings; create new ones - freely. Keep heading names short (2-3 words) and natural. -- Keep entries as single bullet points. Be descriptive but concise — include relevant - details and context rather than just a few words. -- Every bullet MUST use format: - (YYYY-MM-DD) [fact] text - Team memory uses ONLY the [fact] marker. Never use [pref] or [instr]. -- If a new fact contradicts an existing entry, update the existing entry. -- Do not duplicate existing information. -- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored. - -If nothing is worth remembering, output exactly: NO_UPDATE - - -{current_memory} - - - -{author} - - - -{user_message} -""" - async def extract_and_save_memory( *, @@ -114,57 +18,31 @@ async def extract_and_save_memory( user_id: str | None, llm: Any, ) -> None: - """Background task: extract memorizable info and persist it. + """Fire-and-forget personal memory extraction. - Designed to be fire-and-forget — catches all exceptions internally. + The service uses structured output, so free-form ``NO_UPDATE`` text can no + longer be accidentally persisted as memory. """ if not user_id: return try: uid = UUID(user_id) if isinstance(user_id, str) else user_id - async with shielded_async_session() as session: - result = await session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return - - old_memory = user.memory_md - first_name = ( - user.display_name.strip().split()[0] - if user.display_name and user.display_name.strip() - else "The user" - ) - prompt = _MEMORY_EXTRACT_PROMPT.format( - current_memory=old_memory or "(empty)", + user = await session.get(User, uid) + actor_display_name = user.display_name if user else None + result = await extract_and_save( + scope=MemoryScope.USER, + target_id=uid, user_message=user_message, - user_name=first_name, - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-extraction"]}, - ) - text = extract_text_content(response.content).strip() - - if text == "NO_UPDATE" or not text: - logger.debug("Memory extraction: no update needed (user %s)", uid) - return - - save_result = await _save_memory( - updated_memory=text, - old_memory=old_memory, + actor_display_name=actor_display_name, + session=session, llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="memory", - scope="user", ) logger.info( "Background memory extraction for user %s: %s", uid, - save_result.get("status"), + result.status, ) except Exception: logger.exception("Background user memory extraction failed") @@ -177,56 +55,24 @@ async def extract_and_save_team_memory( llm: Any, author_display_name: str | None = None, ) -> None: - """Background task: extract team-level memory and persist it. - - Runs only for shared threads. Designed to be fire-and-forget and catches - exceptions internally. - """ + """Fire-and-forget team-level memory extraction.""" if not search_space_id: return try: async with shielded_async_session() as session: - result = await session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return - - old_memory = space.shared_memory_md - prompt = _TEAM_MEMORY_EXTRACT_PROMPT.format( - current_memory=old_memory or "(empty)", - author=author_display_name or "Unknown team member", + result = await extract_and_save( + scope=MemoryScope.TEAM, + target_id=search_space_id, user_message=user_message, - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "team-memory-extraction"]}, - ) - text = extract_text_content(response.content).strip() - - if text == "NO_UPDATE" or not text: - logger.debug( - "Team memory extraction: no update needed (space %s)", - search_space_id, - ) - return - - save_result = await _save_memory( - updated_memory=text, - old_memory=old_memory, + actor_display_name=author_display_name, + session=session, llm=llm, - apply_fn=lambda content: setattr(space, "shared_memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="team memory", - scope="team", ) logger.info( "Background team memory extraction for space %s: %s", search_space_id, - save_result.get("status"), + result.status, ) except Exception: logger.exception("Background team memory extraction failed") diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py index 062668aac..78a65201b 100644 --- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py +++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py @@ -1,369 +1,53 @@ -"""Markdown-document memory tool for the SurfSense agent. - -Replaces the old row-per-fact save_memory / recall_memory tools with a single -update_memory tool that overwrites a freeform markdown TEXT column. The LLM -always sees the current memory in / tags injected -by MemoryInjectionMiddleware, so it passes the FULL updated document each time. - -Overflow handling: - - Soft limit (18K chars): a warning is returned telling the agent to - consolidate on the next update. - - Hard limit (25K chars): a forced LLM-driven rewrite compresses the document. - If it still exceeds the limit after rewriting, the save is rejected. - - Diff validation: warns when entire ``##`` sections are dropped or when the - document shrinks by more than 60%. -""" +"""Memory update tools backed by the canonical memory service.""" from __future__ import annotations import logging -import re -from typing import Any, Literal +from typing import Any from uuid import UUID -from langchain_core.messages import HumanMessage from langchain_core.tools import tool -from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.db import SearchSpace, User, async_session_maker -from app.utils.content_utils import extract_text_content +from app.db import async_session_maker +from app.services.memory import MemoryScope, save_memory logger = logging.getLogger(__name__) -MEMORY_SOFT_LIMIT = 18_000 -MEMORY_HARD_LIMIT = 25_000 - -_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) -_HEADING_NORMALIZE_RE = re.compile(r"\s+") - -_MARKER_RE = re.compile(r"\[(fact|pref|instr)\]") -_BULLET_FORMAT_RE = re.compile(r"^- \(\d{4}-\d{2}-\d{2}\) \[(fact|pref|instr)\] .+$") -_PERSONAL_ONLY_MARKERS = {"pref", "instr"} - - -# --------------------------------------------------------------------------- -# Diff validation -# --------------------------------------------------------------------------- - - -def _extract_headings(memory: str) -> set[str]: - """Return all ``## …`` heading texts (without the ``## `` prefix).""" - return set(_SECTION_HEADING_RE.findall(memory)) - - -def _normalize_heading(heading: str) -> str: - """Normalize heading text for robust scope checks.""" - return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower()) - - -def _validate_memory_scope( - content: str, scope: Literal["user", "team"] -) -> dict[str, Any] | None: - """Reject personal-only markers ([pref], [instr]) in team memory.""" - if scope != "team": - return None - - markers = set(_MARKER_RE.findall(content)) - leaked = sorted(markers & _PERSONAL_ONLY_MARKERS) - if leaked: - tags = ", ".join(f"[{m}]" for m in leaked) - return { - "status": "error", - "message": ( - f"Team memory cannot include personal markers: {tags}. " - "Use [fact] only in team memory." - ), - } - return None - - -def _validate_bullet_format(content: str) -> list[str]: - """Return warnings for bullet lines that don't match the required format. - - Expected: ``- (YYYY-MM-DD) [fact|pref|instr] text`` - """ - warnings: list[str] = [] - for line in content.splitlines(): - stripped = line.strip() - if not stripped.startswith("- "): - continue - if not _BULLET_FORMAT_RE.match(stripped): - short = stripped[:80] + ("..." if len(stripped) > 80 else "") - warnings.append(f"Malformed bullet: {short}") - return warnings - - -def _validate_diff(old_memory: str | None, new_memory: str) -> list[str]: - """Return a list of warning strings about suspicious changes.""" - if not old_memory: - return [] - - warnings: list[str] = [] - old_headings = _extract_headings(old_memory) - new_headings = _extract_headings(new_memory) - dropped = old_headings - new_headings - if dropped: - names = ", ".join(sorted(dropped)) - warnings.append( - f"Sections removed: {names}. " - "If unintentional, the user can restore from the settings page." - ) - - old_len = len(old_memory) - new_len = len(new_memory) - if old_len > 0 and new_len < old_len * 0.4: - warnings.append( - f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). " - "Possible data loss." - ) - return warnings - - -# --------------------------------------------------------------------------- -# Size validation & soft warning -# --------------------------------------------------------------------------- - - -def _validate_memory_size(content: str) -> dict[str, Any] | None: - """Return an error/warning dict if *content* is too large, else None.""" - length = len(content) - if length > MEMORY_HARD_LIMIT: - return { - "status": "error", - "message": ( - f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit " - f"({length:,} chars). Consolidate by merging related items, " - "removing outdated entries, and shortening descriptions. " - "Then call update_memory again." - ), - } - return None - - -def _soft_warning(content: str) -> str | None: - """Return a warning string if content exceeds the soft limit.""" - length = len(content) - if length > MEMORY_SOFT_LIMIT: - return ( - f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. " - "Consolidate by merging related items and removing less important " - "entries on your next update." - ) - return None - - -# --------------------------------------------------------------------------- -# Forced rewrite when memory exceeds the hard limit -# --------------------------------------------------------------------------- - -_FORCED_REWRITE_PROMPT = """\ -You are a memory curator. The following memory document exceeds the character \ -limit and must be shortened. - -RULES: -1. Rewrite the document to be under {target} characters. -2. Preserve existing ## headings. Every entry must remain under a heading. You may merge - or rename headings to consolidate, but keep names personal and descriptive. -3. Priority for keeping content: [instr] > [pref] > [fact]. -4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. -5. Every bullet MUST have format: - (YYYY-MM-DD) [fact|pref|instr] text -6. Preserve the user's first name in entries — do not replace it with "the user". -7. Output ONLY the consolidated markdown — no explanations, no wrapping. - - -{content} -""" - - -async def _forced_rewrite(content: str, llm: Any) -> str | None: - """Use a focused LLM call to compress *content* under the hard limit. - - Returns the rewritten string, or ``None`` if the call fails. - """ - try: - prompt = _FORCED_REWRITE_PROMPT.format( - target=MEMORY_HARD_LIMIT, content=content - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal"]}, - ) - text = extract_text_content(response.content).strip() - if not text: - logger.warning("Forced rewrite returned empty text; aborting rewrite") - return None - return text - except Exception: - logger.exception("Forced rewrite LLM call failed") - return None - - -# --------------------------------------------------------------------------- -# Shared save-and-respond logic -# --------------------------------------------------------------------------- - - -async def _save_memory( - *, - updated_memory: str, - old_memory: str | None, - llm: Any | None, - apply_fn, - commit_fn, - rollback_fn, - label: str, - scope: Literal["user", "team"], -) -> dict[str, Any]: - """Validate, optionally force-rewrite if over the hard limit, save, and - return a response dict. - - Parameters - ---------- - updated_memory : str - The new document the agent submitted. - old_memory : str | None - The previously persisted document (for diff checks). - llm : Any | None - LLM instance for forced rewrite (may be ``None``). - apply_fn : callable(str) -> None - Callback that sets the new memory on the ORM object. - commit_fn : coroutine - ``session.commit``. - rollback_fn : coroutine - ``session.rollback``. - label : str - Human label for log messages (e.g. "user memory", "team memory"). - """ - if not isinstance(updated_memory, str): - logger.warning( - "Refusing non-string memory payload (type=%s)", - type(updated_memory).__name__, - ) - return { - "status": "error", - "message": "Internal error: memory payload must be a string.", - } - - content = updated_memory - - # --- forced rewrite if over the hard limit --- - if len(content) > MEMORY_HARD_LIMIT and llm is not None: - rewritten = await _forced_rewrite(content, llm) - if rewritten is not None and len(rewritten) < len(content): - content = rewritten - - # --- hard-limit gate (reject if still too large after rewrite) --- - size_err = _validate_memory_size(content) - if size_err: - return size_err - - scope_err = _validate_memory_scope(content, scope) - if scope_err: - return scope_err - - # --- persist --- - try: - apply_fn(content) - await commit_fn() - except Exception as e: - logger.exception("Failed to update %s: %s", label, e) - await rollback_fn() - return {"status": "error", "message": f"Failed to update {label}: {e}"} - - # --- build response --- - resp: dict[str, Any] = { - "status": "saved", - "message": f"{label.capitalize()} updated.", - } - - if content is not updated_memory: - resp["notice"] = "Memory was automatically rewritten to fit within limits." - - diff_warnings = _validate_diff(old_memory, content) - if diff_warnings: - resp["diff_warnings"] = diff_warnings - - format_warnings = _validate_bullet_format(content) - if format_warnings: - resp["format_warnings"] = format_warnings - - warning = _soft_warning(content) - if warning: - resp["warning"] = warning - - return resp - - -# --------------------------------------------------------------------------- -# Tool factories -# --------------------------------------------------------------------------- - def create_update_memory_tool( user_id: str | UUID, db_session: AsyncSession, llm: Any | None = None, ): - """Factory function to create the user-memory update tool. + """Factory for the user-memory update tool. - The tool acquires its own short-lived ``AsyncSession`` per call via - :data:`async_session_maker` so the closure is safe to share across - HTTP requests by the compiled-agent cache. Capturing a per-request - session here would surface stale/closed sessions on cache hits. - The session's bound ``commit``/``rollback`` methods are captured at - call time, after ``async with`` has bound ``db_session`` locally. - - Args: - user_id: ID of the user whose memory document is being updated. - db_session: Reserved for registry compatibility. Per-call sessions - are opened via :data:`async_session_maker` inside the tool body. - llm: Optional LLM for the forced-rewrite path. - - Returns: - Configured update_memory tool for the user-memory scope. + Uses a fresh short-lived session per call so compiled-agent caches never + retain a stale request-scoped session. """ - del db_session # per-call session — see docstring + del db_session uid = UUID(user_id) if isinstance(user_id, str) else user_id @tool async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the user's personal memory document. - Your current memory is shown in in the system prompt. - When the user shares important long-term information (preferences, - facts, instructions, context), rewrite the memory document to include - the new information. Merge new facts with existing ones, update - contradictions, remove outdated entries, and keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: async with async_session_maker() as db_session: - result = await db_session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return {"status": "error", "message": "User not found."} - - old_memory = user.memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.USER, + target_id=uid, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="memory", - scope="user", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update user memory: %s", e) - return { - "status": "error", - "message": f"Failed to update memory: {e}", - } + return {"status": "error", "message": f"Failed to update memory: {e}"} return update_memory @@ -373,64 +57,26 @@ def create_update_team_memory_tool( db_session: AsyncSession, llm: Any | None = None, ): - """Factory function to create the team-memory update tool. - - The tool acquires its own short-lived ``AsyncSession`` per call via - :data:`async_session_maker` so the closure is safe to share across - HTTP requests by the compiled-agent cache. Capturing a per-request - session here would surface stale/closed sessions on cache hits. - The session's bound ``commit``/``rollback`` methods are captured at - call time, after ``async with`` has bound ``db_session`` locally. - - Args: - search_space_id: ID of the search space whose team memory is being - updated. - db_session: Reserved for registry compatibility. Per-call sessions - are opened via :data:`async_session_maker` inside the tool body. - llm: Optional LLM for the forced-rewrite path. - - Returns: - Configured update_memory tool for the team-memory scope. - """ - del db_session # per-call session — see docstring + """Factory for the team-memory update tool.""" + del db_session @tool async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the team's shared memory document for this search space. - Your current team memory is shown in in the system - prompt. When the team shares important long-term information - (decisions, conventions, key facts, priorities), rewrite the memory - document to include the new information. Merge new facts with - existing ones, update contradictions, remove outdated entries, and - keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current team memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: async with async_session_maker() as db_session: - result = await db_session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return {"status": "error", "message": "Search space not found."} - - old_memory = space.shared_memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr( - space, "shared_memory_md", content - ), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="team memory", - scope="team", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update team memory: %s", e) return { @@ -439,3 +85,9 @@ def create_update_team_memory_tool( } return update_memory + + +__all__ = [ + "create_update_memory_tool", + "create_update_team_memory_tool", +] diff --git a/surfsense_backend/app/services/memory/__init__.py b/surfsense_backend/app/services/memory/__init__.py new file mode 100644 index 000000000..d72f45e1f --- /dev/null +++ b/surfsense_backend/app/services/memory/__init__.py @@ -0,0 +1,29 @@ +"""First-class memory service for user and team markdown memory.""" + +from .service import ( + MemoryScope, + SaveResult, + extract_and_save, + read_memory, + reset_memory, + save_memory, +) +from .validation import ( + MEMORY_HARD_LIMIT, + MEMORY_SOFT_LIMIT, + validate_bullet_format, + validate_memory_scope, +) + +__all__ = [ + "MEMORY_HARD_LIMIT", + "MEMORY_SOFT_LIMIT", + "MemoryScope", + "SaveResult", + "extract_and_save", + "read_memory", + "reset_memory", + "save_memory", + "validate_bullet_format", + "validate_memory_scope", +] diff --git a/surfsense_backend/app/services/memory/prompts.py b/surfsense_backend/app/services/memory/prompts.py new file mode 100644 index 000000000..fbf27fd08 --- /dev/null +++ b/surfsense_backend/app/services/memory/prompts.py @@ -0,0 +1,110 @@ +"""Prompts used by the memory service.""" + +FORCED_REWRITE_PROMPT = """\ +You are a memory curator. The following memory document exceeds the character \ +limit and must be shortened. + +RULES: +1. Rewrite the document to be under {target} characters. +2. Output Markdown only. Use clear `##` headings and concise bullet points. +3. New-format bullets should look like: `- YYYY-MM-DD: memory text`. +4. If the input contains legacy markers like `(YYYY-MM-DD) [fact]`, preserve the + information but remove the inline marker in the output. +5. Preserve durable instructions and preferences before generic facts when + compressing personal memory. +6. Preserve existing headings when useful; merge duplicate headings and bullets. +7. Output ONLY the consolidated markdown — no explanations, no wrapping. + + +{content} +""" + +USER_MEMORY_EXTRACT_PROMPT = """\ +You are a memory extraction assistant. Analyze the user's message and decide \ +if it contains any long-term information worth persisting to personal memory. + +Worth remembering: preferences, background/identity, goals, projects, \ +instructions, tools/languages they use, decisions, expertise, workplace — \ +durable facts that will matter in future conversations. + +NOT worth remembering: greetings, one-off factual questions, session \ +logistics, ephemeral requests, follow-up clarifications with no new personal \ +info, things that only matter for the current task. + +If there is nothing durable to remember, choose `action = no_update`. + +If the message contains memorizable information, choose `action = save` and \ +return the FULL updated memory document with the new information merged into \ +existing content. + +FORMAT RULES FOR `updated_memory`: +- Markdown only. +- Every entry should be under a `##` heading. +- Recommended headings: `## Facts`, `## Preferences`, `## Instructions`. +- New bullets should use: `- YYYY-MM-DD: memory text`. +- If current memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers, + preserve the information but write the updated document in the new + heading-based format. +- Use the user's first name from `` when helpful, not "the user". +- Do not duplicate existing information. + +{user_name} + + +{current_memory} + + + +{user_message} +""" + +TEAM_MEMORY_EXTRACT_PROMPT = """\ +You are a team-memory extraction assistant. Analyze the latest message and \ +decide if it contains durable TEAM-level information worth persisting. + +Decision policy: +- Prioritize recall for durable team context, while avoiding personal-only facts. +- Do NOT require explicit consensus language. A direct team-level statement can + be stored if it is stable and broadly useful for future team chats. +- If evidence is weak or clearly tentative, choose `action = no_update`. + +Worth remembering (team-level only): +- Decisions and defaults that guide future team work +- Team conventions/standards (naming, review policy, coding norms) +- Stable org/project facts (locations, ownership, constraints) +- Long-lived architecture/process facts +- Ongoing priorities that are likely relevant beyond this turn + +NOT worth remembering: +- Personal preferences or biography of one person +- Questions, brainstorming, tentative ideas, or speculation +- One-off requests, status updates, TODOs, logistics for this session +- Information scoped only to a single ephemeral task + +If the message contains memorizable team information, choose `action = save` \ +and return the FULL updated team memory document with new facts merged into \ +existing content. + +FORMAT RULES FOR `updated_memory`: +- Markdown only. +- Every entry should be under a `##` heading. +- Recommended headings: `## Product Decisions`, `## Engineering Conventions`, + `## Project Facts`, `## Open Questions`. +- New bullets should use: `- YYYY-MM-DD: memory text`. +- If current memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the + information but write the updated document in the new heading-based format. +- Do not create personal headings such as `## Preferences`, `## Instructions`, + or `## Personal Notes`. +- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored. + + +{current_memory} + + + +{author} + + + +{user_message} +""" diff --git a/surfsense_backend/app/services/memory/rewrite.py b/surfsense_backend/app/services/memory/rewrite.py new file mode 100644 index 000000000..270904ce7 --- /dev/null +++ b/surfsense_backend/app/services/memory/rewrite.py @@ -0,0 +1,35 @@ +"""LLM-backed memory rewrite helpers.""" + +from __future__ import annotations + +import logging +from typing import Any + +from langchain_core.messages import HumanMessage + +from app.services.memory.prompts import FORCED_REWRITE_PROMPT +from app.services.memory.validation import MEMORY_HARD_LIMIT +from app.utils.content_utils import extract_text_content + +logger = logging.getLogger(__name__) + + +async def forced_rewrite(content: str, llm: Any) -> str | None: + """Use a focused LLM call to compress memory under the hard limit.""" + try: + prompt = FORCED_REWRITE_PROMPT.format( + target=MEMORY_HARD_LIMIT, + content=content, + ) + response = await llm.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-rewrite"]}, + ) + text = extract_text_content(response.content).strip() + if not text: + logger.warning("Forced memory rewrite returned empty text") + return None + return text + except Exception: + logger.exception("Forced memory rewrite LLM call failed") + return None diff --git a/surfsense_backend/app/services/memory/schemas.py b/surfsense_backend/app/services/memory/schemas.py new file mode 100644 index 000000000..9b40ee5b1 --- /dev/null +++ b/surfsense_backend/app/services/memory/schemas.py @@ -0,0 +1,23 @@ +"""Structured output schemas for memory extraction.""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, Field + + +class MemoryExtractionDecision(BaseModel): + """Structured extraction result; avoids string sentinel parsing.""" + + action: Literal["no_update", "save"] = Field( + description="Choose no_update when nothing durable should be saved; choose save otherwise." + ) + reason: str | None = Field( + default=None, + description="Short reason for no_update, or brief summary of the memory update.", + ) + updated_memory: str | None = Field( + default=None, + description="The full updated markdown memory document when action is save.", + ) diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py new file mode 100644 index 000000000..85459c28c --- /dev/null +++ b/surfsense_backend/app/services/memory/service.py @@ -0,0 +1,300 @@ +"""Canonical read/write/reset/extract service for markdown memory.""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from enum import StrEnum +from typing import Any, Literal +from uuid import UUID + +from langchain_core.messages import HumanMessage +from pydantic import BaseModel +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import SearchSpace, User +from app.services.memory.prompts import ( + TEAM_MEMORY_EXTRACT_PROMPT, + USER_MEMORY_EXTRACT_PROMPT, +) +from app.services.memory.rewrite import forced_rewrite +from app.services.memory.schemas import MemoryExtractionDecision +from app.services.memory.validation import ( + MEMORY_HARD_LIMIT, + soft_limit_warning, + strip_preamble_to_first_heading, + validate_bullet_format, + validate_diff, + validate_heading_sanity, + validate_memory_scope, + validate_memory_size, +) + +logger = logging.getLogger(__name__) + + +class MemoryScope(StrEnum): + USER = "user" + TEAM = "team" + + +@dataclass(frozen=True) +class SaveResult: + status: Literal["saved", "error", "no_op"] + message: str + memory_md: str = "" + warnings: list[str] = field(default_factory=list) + diff_warnings: list[str] = field(default_factory=list) + format_warnings: list[str] = field(default_factory=list) + notice: str | None = None + + def to_dict(self) -> dict[str, Any]: + data: dict[str, Any] = { + "status": self.status, + "message": self.message, + "memory_md": self.memory_md, + } + if self.notice: + data["notice"] = self.notice + if self.warnings: + data["warnings"] = self.warnings + if len(self.warnings) == 1: + data["warning"] = self.warnings[0] + if self.diff_warnings: + data["diff_warnings"] = self.diff_warnings + if self.format_warnings: + data["format_warnings"] = self.format_warnings + return data + + +class MemoryRead(BaseModel): + memory_md: str + + +def _normalize_scope(scope: MemoryScope | str) -> MemoryScope: + return scope if isinstance(scope, MemoryScope) else MemoryScope(scope) + + +def _normalize_user_id(target_id: str | UUID) -> UUID: + return UUID(target_id) if isinstance(target_id, str) else target_id + + +async def _load_target( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + session: AsyncSession, +) -> User | SearchSpace | None: + normalized = _normalize_scope(scope) + if normalized is MemoryScope.USER: + result = await session.execute( + select(User).where(User.id == _normalize_user_id(target_id)) # type: ignore[arg-type] + ) + return result.scalars().first() + result = await session.execute(select(SearchSpace).where(SearchSpace.id == int(target_id))) + return result.scalars().first() + + +def _get_memory(target: User | SearchSpace, scope: MemoryScope) -> str: + if scope is MemoryScope.USER: + return getattr(target, "memory_md", None) or "" + return getattr(target, "shared_memory_md", None) or "" + + +def _set_memory(target: User | SearchSpace, scope: MemoryScope, content: str) -> None: + if scope is MemoryScope.USER: + target.memory_md = content + else: + target.shared_memory_md = content + + +async def read_memory( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + session: AsyncSession, +) -> str: + normalized = _normalize_scope(scope) + target = await _load_target(scope=normalized, target_id=target_id, session=session) + if target is None: + return "" + return _get_memory(target, normalized) + + +async def save_memory( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + content: str, + session: AsyncSession, + llm: Any | None = None, +) -> SaveResult: + normalized = _normalize_scope(scope) + if not isinstance(content, str): + return SaveResult( + status="error", + message="Internal error: memory payload must be a string.", + ) + + target = await _load_target(scope=normalized, target_id=target_id, session=session) + if target is None: + return SaveResult( + status="error", + message="User not found." if normalized is MemoryScope.USER else "Search space not found.", + ) + + old_memory = _get_memory(target, normalized) + next_content = strip_preamble_to_first_heading(content.strip()) + notice: str | None = None + warnings: list[str] = [] + + if len(next_content) > MEMORY_HARD_LIMIT and llm is not None: + rewritten = await forced_rewrite(next_content, llm) + if rewritten is not None and len(rewritten) < len(next_content): + next_content = strip_preamble_to_first_heading(rewritten) + notice = "Memory was automatically rewritten to fit within limits." + + for validation in ( + validate_memory_size(next_content), + validate_heading_sanity(next_content), + ): + if validation: + return SaveResult( + status="error", + message=validation["message"], + memory_md=old_memory, + ) + + scope_error, scope_warnings = validate_memory_scope( + next_content, + normalized.value, + old_memory=old_memory, + ) + warnings.extend(scope_warnings) + if scope_error: + return SaveResult( + status="error", + message=scope_error["message"], + memory_md=old_memory, + warnings=warnings, + ) + + try: + _set_memory(target, normalized, next_content) + session.add(target) + await session.commit() + except Exception as e: + logger.exception("Failed to update %s memory: %s", normalized.value, e) + await session.rollback() + return SaveResult( + status="error", + message=f"Failed to update {normalized.value} memory: {e}", + memory_md=old_memory, + ) + + diff_warnings = validate_diff(old_memory, next_content) + format_warnings = validate_bullet_format(next_content) + warning = soft_limit_warning(next_content) + if warning: + warnings.append(warning) + + return SaveResult( + status="saved", + message=( + "Memory updated." + if normalized is MemoryScope.USER + else "Team memory updated." + ), + memory_md=next_content, + warnings=warnings, + diff_warnings=diff_warnings, + format_warnings=format_warnings, + notice=notice, + ) + + +async def reset_memory( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + session: AsyncSession, +) -> SaveResult: + return await save_memory( + scope=scope, + target_id=target_id, + content="", + session=session, + llm=None, + ) + + +async def extract_and_save( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + user_message: str, + actor_display_name: str | None, + session: AsyncSession, + llm: Any, +) -> SaveResult: + normalized = _normalize_scope(scope) + current_memory = await read_memory( + scope=normalized, + target_id=target_id, + session=session, + ) + + if normalized is MemoryScope.USER: + first_name = ( + actor_display_name.strip().split()[0] + if actor_display_name and actor_display_name.strip() + else "The user" + ) + prompt = USER_MEMORY_EXTRACT_PROMPT.format( + current_memory=current_memory or "(empty)", + user_message=user_message, + user_name=first_name, + ) + else: + prompt = TEAM_MEMORY_EXTRACT_PROMPT.format( + current_memory=current_memory or "(empty)", + author=actor_display_name or "Unknown team member", + user_message=user_message, + ) + + try: + structured = llm.with_structured_output(MemoryExtractionDecision) + decision = await structured.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-extraction"]}, + ) + except Exception: + logger.exception("Structured memory extraction failed") + return SaveResult( + status="error", + message="Structured memory extraction failed.", + memory_md=current_memory, + ) + + if decision.action == "no_update": + return SaveResult( + status="no_op", + message=decision.reason or "No durable memory to persist.", + memory_md=current_memory, + ) + + if not decision.updated_memory: + return SaveResult( + status="error", + message="Structured memory extraction chose save without updated_memory.", + memory_md=current_memory, + ) + + return await save_memory( + scope=normalized, + target_id=target_id, + content=decision.updated_memory, + session=session, + llm=llm, + ) diff --git a/surfsense_backend/app/services/memory/validation.py b/surfsense_backend/app/services/memory/validation.py new file mode 100644 index 000000000..0e856943b --- /dev/null +++ b/surfsense_backend/app/services/memory/validation.py @@ -0,0 +1,158 @@ +"""Validation helpers for markdown-backed memory.""" + +from __future__ import annotations + +import re +from typing import Literal + +MEMORY_SOFT_LIMIT = 18_000 +MEMORY_HARD_LIMIT = 25_000 + +_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) +_HEADING_LINE_RE = re.compile(r"^##\s+\S+", re.MULTILINE) +_HEADING_NORMALIZE_RE = re.compile(r"[^a-z0-9]+") +_LEGACY_BULLET_RE = re.compile(r"^-\s+\(\d{4}-\d{2}-\d{2}\)\s+\[(fact|pref|instr)\]\s+.+$") +_NEW_BULLET_RE = re.compile(r"^-\s+\d{4}-\d{2}-\d{2}:\s+.+$") + +_FORBIDDEN_TEAM_HEADINGS = { + "preferences", + "instructions", + "personal notes", + "personal instructions", +} + + +def has_markdown_heading(content: str) -> bool: + return bool(_HEADING_LINE_RE.search(content)) + + +def strip_preamble_to_first_heading(content: str) -> str: + """Drop model preamble before the first ``##`` heading, if one exists.""" + match = _HEADING_LINE_RE.search(content) + if not match: + return content.strip() + return content[match.start() :].strip() + + +def extract_headings(memory: str | None) -> set[str]: + if not memory: + return set() + return {_normalize_heading(h) for h in _SECTION_HEADING_RE.findall(memory)} + + +def _normalize_heading(heading: str) -> str: + return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower()).strip() + + +def validate_memory_size(content: str) -> dict[str, str] | None: + length = len(content) + if length > MEMORY_HARD_LIMIT: + return { + "status": "error", + "message": ( + f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit " + f"({length:,} chars). Consolidate by merging related items, " + "removing outdated entries, and shortening descriptions." + ), + } + return None + + +def validate_heading_sanity(content: str) -> dict[str, str] | None: + """Block long prose blobs without headings unless they are legacy bullets.""" + stripped = content.strip() + if not stripped: + return None + if has_markdown_heading(stripped): + return None + if len(stripped) <= 40: + return None + if any(_LEGACY_BULLET_RE.match(line.strip()) for line in stripped.splitlines()): + return None + return { + "status": "error", + "message": "Memory must be markdown with at least one ## heading.", + } + + +def validate_memory_scope( + content: str, + scope: Literal["user", "team"], + *, + old_memory: str | None = None, +) -> tuple[dict[str, str] | None, list[str]]: + """Reject new personal headings in team memory, grandfather existing ones.""" + if scope != "team": + return None, [] + + old_forbidden = extract_headings(old_memory) & _FORBIDDEN_TEAM_HEADINGS + new_forbidden = extract_headings(content) & _FORBIDDEN_TEAM_HEADINGS + introduced = sorted(new_forbidden - old_forbidden) + grandfathered = sorted(new_forbidden & old_forbidden) + + warnings: list[str] = [] + if grandfathered: + warnings.append( + "Team memory contains legacy personal headings: " + + ", ".join(grandfathered) + + ". Please consolidate them into team-safe headings." + ) + if introduced: + return ( + { + "status": "error", + "message": ( + "Team memory cannot introduce personal headings: " + + ", ".join(introduced) + + ". Use team-safe headings instead." + ), + }, + warnings, + ) + return None, warnings + + +def validate_bullet_format(content: str) -> list[str]: + warnings: list[str] = [] + for line in content.splitlines(): + stripped = line.strip() + if not stripped.startswith("- "): + continue + if _NEW_BULLET_RE.match(stripped) or _LEGACY_BULLET_RE.match(stripped): + continue + short = stripped[:80] + ("..." if len(stripped) > 80 else "") + warnings.append(f"Non-standard memory bullet: {short}") + return warnings + + +def validate_diff(old_memory: str | None, new_memory: str) -> list[str]: + if not old_memory: + return [] + + warnings: list[str] = [] + old_headings = extract_headings(old_memory) + new_headings = extract_headings(new_memory) + dropped = old_headings - new_headings + if dropped: + names = ", ".join(sorted(dropped)) + warnings.append( + f"Sections removed: {names}. If unintentional, restore from the settings page." + ) + + old_len = len(old_memory) + new_len = len(new_memory) + if old_len > 0 and new_len < old_len * 0.4: + warnings.append( + f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). Possible data loss." + ) + return warnings + + +def soft_limit_warning(content: str) -> str | None: + length = len(content) + if length > MEMORY_SOFT_LIMIT: + return ( + f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. " + "Consolidate by merging related items and removing less important entries." + ) + return None diff --git a/surfsense_backend/tests/unit/services/test_memory_service.py b/surfsense_backend/tests/unit/services/test_memory_service.py new file mode 100644 index 000000000..c16e34062 --- /dev/null +++ b/surfsense_backend/tests/unit/services/test_memory_service.py @@ -0,0 +1,204 @@ +"""Unit tests for the first-class memory service.""" + +from types import SimpleNamespace + +import pytest + +from app.services.memory import ( + MemoryScope, + extract_and_save, + reset_memory, + save_memory, +) +from app.services.memory.schemas import MemoryExtractionDecision + +pytestmark = pytest.mark.unit + + +class _FakeSession: + def __init__(self) -> None: + self.commit_calls = 0 + self.rollback_calls = 0 + self.added = [] + + def add(self, obj) -> None: + self.added.append(obj) + + async def commit(self) -> None: + self.commit_calls += 1 + + async def rollback(self) -> None: + self.rollback_calls += 1 + + +class _StructuredLLM: + def __init__(self, decision: MemoryExtractionDecision) -> None: + self.decision = decision + + def with_structured_output(self, _schema): + return self + + async def ainvoke(self, *_args, **_kwargs): + return self.decision + + +@pytest.mark.asyncio +async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None: + target = SimpleNamespace(memory_md="") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="## Facts\n- 2026-05-19: Anish works on SurfSense\n", + session=session, + ) + + assert result.status == "saved" + assert target.memory_md.startswith("## Facts") + assert session.commit_calls == 1 + + +@pytest.mark.asyncio +async def test_save_memory_accepts_legacy_marker_payload(monkeypatch) -> None: + target = SimpleNamespace(memory_md="") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="- (2026-05-19) [fact] Legacy marker memory\n", + session=session, + ) + + assert result.status == "saved" + assert "[fact]" in target.memory_md + + +@pytest.mark.asyncio +async def test_save_memory_rejects_long_no_heading_payload(monkeypatch) -> None: + target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="reasoning text before NO_UPDATE should not become saved memory", + session=session, + ) + + assert result.status == "error" + assert session.commit_calls == 0 + assert target.memory_md.startswith("## Facts") + + +@pytest.mark.asyncio +async def test_save_memory_grandfathers_existing_team_personal_heading(monkeypatch) -> None: + content = "## Preferences\n- 2026-05-19: Existing legacy heading\n" + target = SimpleNamespace(shared_memory_md=content) + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=1, + content=content, + session=session, + ) + + assert result.status == "saved" + assert result.warnings + assert session.commit_calls == 1 + + +@pytest.mark.asyncio +async def test_reset_memory_clears_memory(monkeypatch) -> None: + target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await reset_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + session=session, + ) + + assert result.status == "saved" + assert target.memory_md == "" + + +@pytest.mark.asyncio +async def test_extract_and_save_no_update_does_not_commit(monkeypatch) -> None: + target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await extract_and_save( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + user_message="hello", + actor_display_name="Anish", + session=session, + llm=_StructuredLLM( + MemoryExtractionDecision(action="no_update", reason="Greeting only") + ), + ) + + assert result.status == "no_op" + assert session.commit_calls == 0 + + +@pytest.mark.asyncio +async def test_extract_and_save_persists_structured_update(monkeypatch) -> None: + target = SimpleNamespace(memory_md="") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await extract_and_save( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + user_message="I work on SurfSense", + actor_display_name="Anish", + session=session, + llm=_StructuredLLM( + MemoryExtractionDecision( + action="save", + updated_memory="## Facts\n- 2026-05-19: Anish works on SurfSense\n", + ) + ), + ) + + assert result.status == "saved" + assert "SurfSense" in target.memory_md + assert session.commit_calls == 1 From 5247dc709708556a5be7a1a61bef9444b43248e6 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 02:02:10 +0530 Subject: [PATCH 12/63] feat: refine private and team memory protocols --- .../prompts/memory_protocol/private.md | 6 + .../prompts/memory_protocol/team.md | 8 + .../update_memory/private/description.md | 10 +- .../tools/update_memory/private/example.md | 10 +- .../tools/update_memory/team/description.md | 16 +- .../tools/update_memory/team/example.md | 4 +- .../builtins/memory/system_prompt.md | 7 + .../new_chat/middleware/memory_injection.py | 2 +- .../prompts/base/memory_protocol_private.md | 6 + .../prompts/base/memory_protocol_team.md | 8 + .../prompts/examples/update_memory_private.md | 14 +- .../prompts/examples/update_memory_team.md | 4 +- .../prompts/tools/update_memory_private.md | 51 ++-- .../prompts/tools/update_memory_team.md | 48 ++-- .../new_chat/test_memory_response_content.py | 41 +-- .../tools/test_update_memory_scope.py | 261 +++++++----------- 16 files changed, 232 insertions(+), 264 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md index 4dd511014..bcb80f0f4 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md @@ -6,4 +6,10 @@ standing instructions? If yes, call `update_memory` **alongside** your normal response — don't defer it to a later turn. Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). Stay within the budget shown in ``. + +Memory is heading-based markdown. New entries should be under `##` headings +such as `## Facts`, `## Preferences`, or `## Instructions`, with bullets like +`- YYYY-MM-DD: text`. If existing memory contains legacy +`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write +new saves in the heading-based format. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md index decd23c4d..14d9a6793 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md @@ -6,4 +6,12 @@ key facts? If yes, call `update_memory` **alongside** your normal response — don't defer it to a later turn. Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). Stay within the budget shown in ``. + +Team memory is heading-based markdown. New entries should be under `##` +headings such as `## Product Decisions`, `## Engineering Conventions`, +`## Project Facts`, or `## Open Questions`, with bullets like +`- YYYY-MM-DD: text`. If existing memory contains legacy `(YYYY-MM-DD) [fact]` +markers, preserve the information but write new saves in the heading-based +format. Do not create personal headings such as `## Preferences` or +`## Instructions`. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md index e7fa842b1..01169ff60 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md @@ -9,7 +9,9 @@ - Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). - Args: `updated_memory` — FULL replacement markdown (merge and curate, don't only append). - - Formatting: bullets `- (YYYY-MM-DD) [marker] text` with markers `[fact]`, - `[pref]`, `[instr]` (priority when trimming: `instr > pref > fact`). - Group bullets under short `##` headings; stay under the limit shown in - ``. + - Formatting: heading-based markdown with entries under `##` headings. + Recommended headings are `## Facts`, `## Preferences`, `## Instructions`, + though clearer natural headings are allowed. New bullets should look like + `- YYYY-MM-DD: text`; stay under the limit shown in ``. + - If existing memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers, + preserve the information but write the updated document in the new format. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md index 2505bdf87..9afadb02c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md @@ -1,28 +1,28 @@ Alex, is empty. user: "I'm a space enthusiast, explain astrophage to me" -→ update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n") +→ update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n") (Casual durable fact; use first name, neutral heading.) user: "Remember that I prefer concise answers over detailed explanations" -→ update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n") +→ update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n") (Durable preference; merge with existing memory.) user: "I actually moved to Tokyo last month" -→ update_memory(updated_memory="...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...") +→ update_memory(updated_memory="...\n\n## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...") (Updated fact; date reflects when recorded.) user: "I'm a freelance photographer working on a nature documentary" -→ update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n") +→ update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n") user: "Always respond in bullet points" -→ update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n") +→ update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n") diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md index 13341a910..8459f9e7a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md @@ -9,8 +9,14 @@ - Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). - Args: `updated_memory` — FULL replacement markdown (merge and curate, don't only append). - - Formatting: bullets `- (YYYY-MM-DD) [fact] text`. Team memory uses ONLY - the `[fact]` marker (never `[pref]` or `[instr]`). Group bullets under - short `##` headings (2-3 words each); stay under the limit shown in - ``. When trimming, prioritise: decisions/conventions > key - facts > current priorities. + - Formatting: heading-based markdown with entries under `##` headings. + Recommended headings are `## Product Decisions`, + `## Engineering Conventions`, `## Project Facts`, and `## Open Questions`. + New bullets should look like `- YYYY-MM-DD: text`; stay under the limit + shown in ``. + - If existing memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the + information but write the updated document in the new format. + - Do not create personal headings such as `## Preferences`, + `## Instructions`, `## Personal Notes`, or `## Personal Instructions`. + When trimming, prioritise: decisions/conventions > key facts > current + priorities. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md index 8bd8fcfe4..5d06d9a0c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md @@ -1,9 +1,9 @@ user: "Let's remember that we decided to do weekly standup meetings on Mondays" -→ update_memory(updated_memory="...\n\n## Team rituals\n- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...") +→ update_memory(updated_memory="...\n\n## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...") user: "Our office is in downtown Seattle, 5th floor" -→ update_memory(updated_memory="...\n\n## Workspace\n- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...") +→ update_memory(updated_memory="...\n\n## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...") diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md index 32becf233..13f7b68a5 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md @@ -18,6 +18,10 @@ Persist durable preferences/facts/instructions with `update_memory` while avoidi - Do not store transient chatter. - Do not store secrets unless explicitly instructed. - If memory intent is unclear, return `status=blocked` with the missing intent signal. +- Persisted memory is heading-based markdown. New saved bullets should look like + `- YYYY-MM-DD: text` under `##` headings. If existing memory has legacy + `(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write + the updated document in the heading-based format. @@ -53,4 +57,7 @@ Rules: - `status=success` -> `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` -> `next_step` must be non-null. - `status=blocked` due to missing required inputs -> `missing_fields` must be non-null. +- `evidence.memory_category` is a semantic classification for supervisor logs + only. It is not the persisted storage format and must not force inline + `[fact|preference|instruction]` markers into saved memory. diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py index 6179adccd..1d447aa28 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py +++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py @@ -17,8 +17,8 @@ from langgraph.runtime import Runtime from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT from app.db import ChatVisibility, SearchSpace, User, shielded_async_session +from app.services.memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT logger = logging.getLogger(__name__) diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md index 8f7da14f8..22fed418a 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md @@ -3,4 +3,10 @@ IMPORTANT — After understanding each user message, ALWAYS check: does this mes reveal durable facts about the user (role, interests, preferences, projects, background, or standing instructions)? If yes, you MUST call update_memory alongside your normal response — do not defer this to a later turn. + +Memory is stored as a heading-based markdown document. New entries should be +under `##` headings such as `## Facts`, `## Preferences`, or `## Instructions` +with bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy +`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write +new saves in the heading-based format. diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md index 61d89cc5d..38ec798c0 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md @@ -3,4 +3,12 @@ IMPORTANT — After understanding each user message, ALWAYS check: does this mes reveal durable facts about the team (decisions, conventions, architecture, processes, or key facts)? If yes, you MUST call update_memory alongside your normal response — do not defer this to a later turn. + +Team memory is stored as a heading-based markdown document. New entries should +be under `##` headings such as `## Product Decisions`, +`## Engineering Conventions`, `## Project Facts`, or `## Open Questions` with +bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy +`(YYYY-MM-DD) [fact]` markers, preserve the information but write new saves in +the heading-based format. Do not create personal headings such as +`## Preferences` or `## Instructions`. diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md index f83fe40b4..496bdcae3 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md @@ -1,16 +1,16 @@ - Alex, is empty. User: "I'm a space enthusiast, explain astrophage to me" - - The user casually shared a durable fact. Use their first name in the entry, short neutral heading: - update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n") + - The user casually shared a durable fact: + update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n") - User: "Remember that I prefer concise answers over detailed explanations" - - Durable preference. Merge with existing memory, add a new heading: - update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n") + - Durable preference. Merge with existing memory: + update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n") - User: "I actually moved to Tokyo last month" - Updated fact, date prefix reflects when recorded: - update_memory(updated_memory="## Interests & background\n...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...") + update_memory(updated_memory="## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...") - User: "I'm a freelance photographer working on a nature documentary" - Durable background info under a fitting heading: - update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n") + update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n") - User: "Always respond in bullet points" - Standing instruction: - update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n") + update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n") diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md index 1c74fdf6e..16b90babf 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md @@ -1,7 +1,7 @@ - User: "Let's remember that we decided to do weekly standup meetings on Mondays" - Durable team decision: - update_memory(updated_memory="- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...") + update_memory(updated_memory="## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...") - User: "Our office is in downtown Seattle, 5th floor" - Durable team fact: - update_memory(updated_memory="- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...") + update_memory(updated_memory="## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...") diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md index 184013804..65de785e9 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md @@ -1,31 +1,26 @@ - update_memory: Update your personal memory document about the user. - - Your current memory is already in in your context. The `chars` and - `limit` attributes show your current usage and the maximum allowed size. - - This is your curated long-term memory — the distilled essence of what you know about - the user, not raw conversation logs. - - Call update_memory when: - * The user explicitly asks to remember or forget something - * The user shares durable facts or preferences that will matter in future conversations - - The user's first name is provided in . Use it in memory entries - instead of "the user" (e.g. "{name} works at..." not "The user works at..."). - Do not store the name itself as a separate memory entry. - - Do not store short-lived or ephemeral info: one-off questions, greetings, - session logistics, or things that only matter for the current task. + - Your current memory is already in in your context. The `chars` + and `limit` attributes show current usage and the maximum allowed size. + - This is curated long-term memory, not raw conversation logs. + - Call update_memory when the user explicitly asks to remember/forget + something or shares durable facts, preferences, or standing instructions. + - The user's first name is provided in . Use it in entries instead + of "the user" when helpful. Do not store the name alone as a memory entry. + - Do not store short-lived info: one-off questions, greetings, session + logistics, or things that only matter for the current task. - Args: - - updated_memory: The FULL updated markdown document (not a diff). - Merge new facts with existing ones, update contradictions, remove outdated entries. - Treat every update as a curation pass — consolidate, don't just append. - - Every bullet MUST use this format: - (YYYY-MM-DD) [marker] text - Markers: - [fact] — durable facts (role, background, projects, tools, expertise) - [pref] — preferences (response style, languages, formats, tools) - [instr] — standing instructions (always/never do, response rules) - - Keep it concise and well under the character limit shown in . - - Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and - natural. Do NOT include the user's name in headings. Organize by context — e.g. - who they are, what they're focused on, how they prefer things. Create, split, or - merge headings freely as the memory grows. - - Each entry MUST be a single bullet point. Be descriptive but concise — include relevant - details and context rather than just a few words. - - During consolidation, prioritize keeping: [instr] > [pref] > [fact]. + - updated_memory: The FULL updated markdown document, not a diff. Merge new + facts with existing ones, update contradictions, remove outdated entries, + and consolidate instead of only appending. + - Use heading-based Markdown: + * Every entry must be under a `##` heading. + * Recommended headings: `## Facts`, `## Preferences`, `## Instructions`. + Specific natural headings are allowed when clearer. + * New bullets should use `- YYYY-MM-DD: text`. + * Each entry should be one concise but descriptive bullet. + - If existing memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers, + preserve the information but write the updated document in the new + heading-based format. + - During consolidation, prioritize durable instructions and preferences before + generic facts. diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md index 7eaca8818..79d4ead3a 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md @@ -1,26 +1,28 @@ - update_memory: Update the team's shared memory document for this search space. - - Your current team memory is already in in your context. The `chars` - and `limit` attributes show current usage and the maximum allowed size. - - This is the team's curated long-term memory — decisions, conventions, key facts. - - NEVER store personal memory in team memory (e.g. personal bio, individual - preferences, or user-only standing instructions). - - Call update_memory when: - * A team member explicitly asks to remember or forget something - * The conversation surfaces durable team decisions, conventions, or facts - that will matter in future conversations - - Do not store short-lived or ephemeral info: one-off questions, greetings, - session logistics, or things that only matter for the current task. + - Your current team memory is already in in your context. The + `chars` and `limit` attributes show current usage and the maximum allowed size. + - This is curated long-term team memory: decisions, conventions, architecture, + processes, and key shared facts. + - NEVER store personal memory in team memory: individual bios, personal + preferences, or user-only standing instructions. + - Call update_memory when a team member asks to remember/forget something, or + when the conversation surfaces durable team context that matters later. + - Do not store short-lived info: one-off questions, greetings, session + logistics, or things that only matter for the current task. - Args: - - updated_memory: The FULL updated markdown document (not a diff). - Merge new facts with existing ones, update contradictions, remove outdated entries. - Treat every update as a curation pass — consolidate, don't just append. - - Every bullet MUST use this format: - (YYYY-MM-DD) [fact] text - Team memory uses ONLY the [fact] marker. Never use [pref] or [instr] in team memory. - - Keep it concise and well under the character limit shown in . - - Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and - natural. Organize by context — e.g. what the team decided, current architecture, - active processes. Create, split, or merge headings freely as the memory grows. - - Each entry MUST be a single bullet point. Be descriptive but concise — include relevant - details and context rather than just a few words. - - During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities. + - updated_memory: The FULL updated markdown document, not a diff. Merge new + facts with existing ones, update contradictions, remove outdated entries, + and consolidate instead of only appending. + - Use heading-based Markdown: + * Every entry must be under a `##` heading. + * Recommended headings: `## Product Decisions`, `## Engineering Conventions`, + `## Project Facts`, `## Open Questions`. + * New bullets should use `- YYYY-MM-DD: text`. + * Each entry should be one concise but descriptive bullet. + - If existing memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the + information but write the updated document in the new heading-based format. + - Do not create personal headings such as `## Preferences`, `## Instructions`, + `## Personal Notes`, or `## Personal Instructions`. + - During consolidation, prioritize decisions/conventions, then key facts, then + current priorities. diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py b/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py index 1f338ee3e..c2f52659c 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py @@ -2,28 +2,12 @@ import pytest -from app.agents.new_chat.tools.update_memory import _save_memory +from app.services.memory import MemoryScope, save_memory from app.utils.content_utils import extract_text_content pytestmark = pytest.mark.unit -class _Recorder: - def __init__(self) -> None: - self.applied_content: str | None = None - self.commit_calls = 0 - self.rollback_calls = 0 - - def apply(self, content: str) -> None: - self.applied_content = content - - async def commit(self) -> None: - self.commit_calls += 1 - - async def rollback(self) -> None: - self.rollback_calls += 1 - - def test_extract_text_content_keeps_no_update_bare_string_from_content_blocks() -> None: content = [ {"type": "thinking", "thinking": "No"}, @@ -69,21 +53,12 @@ def test_extract_text_content_preserves_plain_string_responses() -> None: @pytest.mark.asyncio async def test_save_memory_rejects_non_string_payload_before_commit() -> None: - recorder = _Recorder() - - result = await _save_memory( - updated_memory=["NO_UPDATE"], # type: ignore[arg-type] - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="memory", - scope="user", + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content=["NO_UPDATE"], # type: ignore[arg-type] + session=None, # type: ignore[arg-type] ) - assert result["status"] == "error" - assert "must be a string" in result["message"] - assert recorder.applied_content is None - assert recorder.commit_calls == 0 - assert recorder.rollback_calls == 0 + assert result.status == "error" + assert "must be a string" in result.message diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py index f7fbacf50..60310d907 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py @@ -1,24 +1,24 @@ -"""Unit tests for memory scope validation and bullet format validation.""" +"""Unit tests for heading-based memory validation.""" import pytest -from app.agents.new_chat.tools.update_memory import ( - _save_memory, - _validate_bullet_format, - _validate_memory_scope, +from app.services.memory import MemoryScope, save_memory +from app.services.memory.validation import ( + validate_bullet_format, + validate_memory_scope, ) pytestmark = pytest.mark.unit -class _Recorder: +class _FakeSession: def __init__(self) -> None: - self.applied_content: str | None = None + self.added = [] self.commit_calls = 0 self.rollback_calls = 0 - def apply(self, content: str) -> None: - self.applied_content = content + def add(self, obj) -> None: + self.added.append(obj) async def commit(self) -> None: self.commit_calls += 1 @@ -27,172 +27,125 @@ class _Recorder: self.rollback_calls += 1 -# --------------------------------------------------------------------------- -# _validate_memory_scope — marker-based -# --------------------------------------------------------------------------- - - -def test_validate_memory_scope_rejects_pref_marker_in_team_scope() -> None: - content = "- (2026-04-10) [pref] Prefers dark mode\n" - result = _validate_memory_scope(content, "team") +def test_validate_memory_scope_rejects_new_personal_heading_in_team() -> None: + content = "## Preferences\n- 2026-04-10: Prefers dark mode\n" + result, _warnings = validate_memory_scope(content, "team") assert result is not None assert result["status"] == "error" - assert "[pref]" in result["message"] + assert "preferences" in result["message"] -def test_validate_memory_scope_rejects_instr_marker_in_team_scope() -> None: - content = "- (2026-04-10) [instr] Always respond in Spanish\n" - result = _validate_memory_scope(content, "team") - assert result is not None - assert result["status"] == "error" - assert "[instr]" in result["message"] +def test_validate_memory_scope_allows_old_marker_payload_in_team_scope() -> None: + content = "- (2026-04-10) [pref] Legacy personal marker remains readable\n" + result, _warnings = validate_memory_scope(content, "team") + assert result is None -def test_validate_memory_scope_rejects_both_personal_markers_in_team() -> None: +def test_validate_memory_scope_allows_team_headings() -> None: + content = "## Engineering Conventions\n- 2026-04-10: Uses PostgreSQL\n" + result, _warnings = validate_memory_scope(content, "team") + assert result is None + + +def test_validate_bullet_format_accepts_new_and_legacy_bullets() -> None: content = ( - "- (2026-04-10) [pref] Prefers dark mode\n" - "- (2026-04-10) [instr] Always respond in Spanish\n" + "## Facts\n" + "- 2026-04-10: Senior Python developer\n" + "- (2026-04-10) [fact] Legacy fact is preserved\n" ) - result = _validate_memory_scope(content, "team") - assert result is not None - assert result["status"] == "error" - assert "[instr]" in result["message"] - assert "[pref]" in result["message"] - - -def test_validate_memory_scope_allows_fact_in_team_scope() -> None: - content = "- (2026-04-10) [fact] Office is in downtown Seattle\n" - result = _validate_memory_scope(content, "team") - assert result is None - - -def test_validate_memory_scope_allows_all_markers_in_user_scope() -> None: - content = ( - "- (2026-04-10) [fact] Python developer\n" - "- (2026-04-10) [pref] Prefers concise answers\n" - "- (2026-04-10) [instr] Always use bullet points\n" - ) - result = _validate_memory_scope(content, "user") - assert result is None - - -def test_validate_memory_scope_allows_any_heading_in_team() -> None: - content = "## Architecture\n- (2026-04-10) [fact] Uses PostgreSQL for persistence\n" - result = _validate_memory_scope(content, "team") - assert result is None - - -def test_validate_memory_scope_allows_any_heading_in_user() -> None: - content = "## My Projects\n- (2026-04-10) [fact] Working on SurfSense\n" - result = _validate_memory_scope(content, "user") - assert result is None - - -# --------------------------------------------------------------------------- -# _validate_bullet_format -# --------------------------------------------------------------------------- - - -def test_validate_bullet_format_passes_valid_bullets() -> None: - content = ( - "## Work\n" - "- (2026-04-10) [fact] Senior Python developer\n" - "- (2026-04-10) [pref] Prefers dark mode\n" - "- (2026-04-10) [instr] Always respond in bullet points\n" - ) - warnings = _validate_bullet_format(content) + warnings = validate_bullet_format(content) assert warnings == [] -def test_validate_bullet_format_warns_on_missing_marker() -> None: - content = "- (2026-04-10) Senior Python developer\n" - warnings = _validate_bullet_format(content) +def test_validate_bullet_format_warns_on_nonstandard_bullet() -> None: + content = "## Facts\n- Senior Python developer\n" + warnings = validate_bullet_format(content) assert len(warnings) == 1 - assert "Malformed bullet" in warnings[0] - - -def test_validate_bullet_format_warns_on_missing_date() -> None: - content = "- [fact] Senior Python developer\n" - warnings = _validate_bullet_format(content) - assert len(warnings) == 1 - assert "Malformed bullet" in warnings[0] - - -def test_validate_bullet_format_warns_on_unknown_marker() -> None: - content = "- (2026-04-10) [context] Working on project X\n" - warnings = _validate_bullet_format(content) - assert len(warnings) == 1 - assert "Malformed bullet" in warnings[0] - - -def test_validate_bullet_format_ignores_non_bullet_lines() -> None: - content = "## Some Heading\nSome paragraph text\n" - warnings = _validate_bullet_format(content) - assert warnings == [] - - -def test_validate_bullet_format_warns_on_old_format_without_marker() -> None: - content = "## About the user\n- (2026-04-10) Likes cats\n" - warnings = _validate_bullet_format(content) - assert len(warnings) == 1 - - -# --------------------------------------------------------------------------- -# _save_memory — end-to-end with marker scope check -# --------------------------------------------------------------------------- + assert "Non-standard memory bullet" in warnings[0] @pytest.mark.asyncio -async def test_save_memory_blocks_pref_in_team_before_commit() -> None: - recorder = _Recorder() - result = await _save_memory( - updated_memory="- (2026-04-10) [pref] Prefers dark mode\n", - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="team memory", - scope="team", +async def test_save_memory_blocks_new_personal_heading_in_team_before_commit( + monkeypatch, +) -> None: + target = type("Target", (), {"shared_memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=1, + content="## Preferences\n- 2026-04-10: Prefers dark mode\n", + session=session, ) - assert result["status"] == "error" - assert recorder.commit_calls == 0 - assert recorder.applied_content is None + assert result.status == "error" + assert session.commit_calls == 0 + assert target.shared_memory_md == "" @pytest.mark.asyncio -async def test_save_memory_allows_fact_in_team_and_commits() -> None: - recorder = _Recorder() - content = "- (2026-04-10) [fact] Weekly standup on Mondays\n" - result = await _save_memory( - updated_memory=content, - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="team memory", - scope="team", +async def test_save_memory_allows_grandfathered_personal_heading_in_team(monkeypatch) -> None: + content = "## Preferences\n- 2026-04-10: Prefers dark mode\n" + target = type("Target", (), {"shared_memory_md": content})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=1, + content=content, + session=session, ) - assert result["status"] == "saved" - assert recorder.commit_calls == 1 - assert recorder.applied_content == content + assert result.status == "saved" + assert session.commit_calls == 1 + assert target.shared_memory_md == content.strip() + assert result.warnings @pytest.mark.asyncio -async def test_save_memory_includes_format_warnings() -> None: - recorder = _Recorder() - content = "- (2026-04-10) Missing marker text\n" - result = await _save_memory( - updated_memory=content, - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="memory", - scope="user", +async def test_save_memory_strips_preamble_before_heading(monkeypatch) -> None: + target = type("Target", (), {"memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="Sure, here is the update:\n\n## Facts\n- 2026-04-10: Likes cats\n", + session=session, ) - assert result["status"] == "saved" - assert "format_warnings" in result - assert len(result["format_warnings"]) == 1 + assert result.status == "saved" + assert target.memory_md == "## Facts\n- 2026-04-10: Likes cats" + + +@pytest.mark.asyncio +async def test_save_memory_rejects_long_no_heading_payload(monkeypatch) -> None: + target = type("Target", (), {"memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="NO_UPDATE because there is nothing durable to remember.", + session=session, + ) + assert result.status == "error" + assert "## heading" in result.message + assert session.commit_calls == 0 From 3178309e1ae21238bd09e2e0d301740b45f6308f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 02:02:27 +0530 Subject: [PATCH 13/63] feat: add team memory routes --- surfsense_backend/app/routes/__init__.py | 2 + surfsense_backend/app/routes/memory_routes.py | 141 ++++-------------- .../app/routes/search_spaces_routes.py | 111 -------------- .../app/routes/team_memory_routes.py | 78 ++++++++++ surfsense_backend/app/schemas/search_space.py | 1 - 5 files changed, 111 insertions(+), 222 deletions(-) create mode 100644 surfsense_backend/app/routes/team_memory_routes.py diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 5b6a74376..ec4d1650f 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -54,6 +54,7 @@ from .search_spaces_routes import router as search_spaces_router from .slack_add_connector_route import router as slack_add_connector_router from .stripe_routes import router as stripe_router from .surfsense_docs_routes import router as surfsense_docs_router +from .team_memory_routes import router as team_memory_router from .teams_add_connector_route import router as teams_add_connector_router from .video_presentations_routes import router as video_presentations_router from .vision_llm_routes import router as vision_llm_router @@ -117,3 +118,4 @@ router.include_router(stripe_router) # Stripe checkout for additional page pack router.include_router(youtube_router) # YouTube playlist resolution router.include_router(prompts_router) router.include_router(memory_router) # User personal memory (memory.md style) +router.include_router(team_memory_router) # Search-space team memory diff --git a/surfsense_backend/app/routes/memory_routes.py b/surfsense_backend/app/routes/memory_routes.py index e57ca4055..7b674a584 100644 --- a/surfsense_backend/app/routes/memory_routes.py +++ b/surfsense_backend/app/routes/memory_routes.py @@ -1,24 +1,19 @@ -"""Routes for user memory management (personal memory.md).""" +"""Routes for user memory management.""" from __future__ import annotations -import logging - from fastapi import APIRouter, Depends, HTTPException -from langchain_core.messages import HumanMessage from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.llm_config import ( - create_chat_litellm_from_agent_config, - load_agent_llm_config_for_search_space, -) -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory from app.db import User, get_async_session +from app.services.memory import ( + MemoryScope, + read_memory, + reset_memory, + save_memory, +) from app.users import current_active_user -from app.utils.content_utils import extract_text_content - -logger = logging.getLogger(__name__) router = APIRouter() @@ -31,45 +26,17 @@ class MemoryUpdate(BaseModel): memory_md: str -class MemoryEditRequest(BaseModel): - query: str - search_space_id: int - - -_MEMORY_EDIT_PROMPT = """\ -You are a memory editor. The user wants to modify their memory document. \ -Apply the user's instruction to the existing memory document and output the \ -FULL updated document. - -RULES: -1. If the instruction asks to add something, add it with format: \ -- (YYYY-MM-DD) [fact|pref|instr] text, under an existing or new ## heading. \ -Heading names should be personal and descriptive, not generic categories. -2. If the instruction asks to remove something, remove the matching entry. -3. If the instruction asks to change something, update the matching entry. -4. Preserve existing ## headings and all other entries. -5. Every bullet must include a marker: [fact], [pref], or [instr]. -6. Use the user's first name (from ) in entries instead of "the user". -7. Output ONLY the updated markdown — no explanations, no wrapping. - -{user_name} - - -{current_memory} - - - -{instruction} -""" - - @router.get("/users/me/memory", response_model=MemoryRead) async def get_user_memory( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - await session.refresh(user, ["memory_md"]) - return MemoryRead(memory_md=user.memory_md or "") + memory_md = await read_memory( + scope=MemoryScope.USER, + target_id=user.id, + session=session, + ) + return MemoryRead(memory_md=memory_md) @router.put("/users/me/memory", response_model=MemoryRead) @@ -78,73 +45,27 @@ async def update_user_memory( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - if len(body.memory_md) > MEMORY_HARD_LIMIT: - raise HTTPException( - status_code=400, - detail=f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit ({len(body.memory_md):,} chars).", - ) - user.memory_md = body.memory_md - session.add(user) - await session.commit() - await session.refresh(user, ["memory_md"]) - return MemoryRead(memory_md=user.memory_md or "") + result = await save_memory( + scope=MemoryScope.USER, + target_id=user.id, + content=body.memory_md, + session=session, + ) + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return MemoryRead(memory_md=result.memory_md) -@router.post("/users/me/memory/edit", response_model=MemoryRead) -async def edit_user_memory( - body: MemoryEditRequest, +@router.post("/users/me/memory/reset", response_model=MemoryRead) +async def reset_user_memory( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - """Apply a natural language edit to the user's personal memory via LLM.""" - agent_config = await load_agent_llm_config_for_search_space( - session, body.search_space_id + result = await reset_memory( + scope=MemoryScope.USER, + target_id=user.id, + session=session, ) - if not agent_config: - raise HTTPException(status_code=500, detail="No LLM configuration available.") - llm = create_chat_litellm_from_agent_config(agent_config) - if not llm: - raise HTTPException(status_code=500, detail="Failed to create LLM instance.") - - await session.refresh(user, ["memory_md", "display_name"]) - current_memory = user.memory_md or "" - first_name = ( - user.display_name.strip().split()[0] - if user.display_name and user.display_name.strip() - else "The user" - ) - - prompt = _MEMORY_EDIT_PROMPT.format( - current_memory=current_memory or "(empty)", - instruction=body.query, - user_name=first_name, - ) - try: - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-edit"]}, - ) - updated = extract_text_content(response.content).strip() - except Exception as e: - logger.exception("Memory edit LLM call failed: %s", e) - raise HTTPException(status_code=500, detail="Memory edit failed.") from e - - if not updated: - raise HTTPException(status_code=400, detail="LLM returned empty result.") - - result = await _save_memory( - updated_memory=updated, - old_memory=current_memory, - llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="memory", - scope="user", - ) - - if result.get("status") == "error": - raise HTTPException(status_code=400, detail=result["message"]) - - await session.refresh(user, ["memory_md"]) - return MemoryRead(memory_md=user.memory_md or "") + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return MemoryRead(memory_md=result.memory_md) diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 0f0e43035..db230b0f5 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -1,17 +1,10 @@ import logging from fastapi import APIRouter, Depends, HTTPException -from langchain_core.messages import HumanMessage -from pydantic import BaseModel as PydanticBaseModel from sqlalchemy import func, update from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select -from app.agents.new_chat.llm_config import ( - create_chat_litellm_from_agent_config, - load_agent_llm_config_for_search_space, -) -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory from app.config import config from app.db import ( ImageGenerationConfig, @@ -35,7 +28,6 @@ from app.schemas import ( SearchSpaceWithStats, ) from app.users import current_active_user -from app.utils.content_utils import extract_text_content from app.utils.rbac import check_permission, check_search_space_access logger = logging.getLogger(__name__) @@ -43,34 +35,6 @@ logger = logging.getLogger(__name__) router = APIRouter() -class _TeamMemoryEditRequest(PydanticBaseModel): - query: str - - -_TEAM_MEMORY_EDIT_PROMPT = """\ -You are a memory editor for a team workspace. The user wants to modify the \ -team's shared memory document. Apply the user's instruction to the existing \ -memory document and output the FULL updated document. - -RULES: -1. If the instruction asks to add something, add it with format: \ -- (YYYY-MM-DD) [fact] text, under an existing or new ## heading. \ -Heading names should be descriptive, not generic categories. -2. If the instruction asks to remove something, remove the matching entry. -3. If the instruction asks to change something, update the matching entry. -4. Preserve existing ## headings and all other entries. -5. NEVER use [pref] or [instr] markers. Team memory uses [fact] only. -6. Output ONLY the updated markdown — no explanations, no wrapping. - - -{current_memory} - - - -{instruction} -""" - - async def create_default_roles_and_membership( session: AsyncSession, search_space_id: int, @@ -294,15 +258,6 @@ async def update_search_space( update_data = search_space_update.model_dump(exclude_unset=True) - if ( - "shared_memory_md" in update_data - and len(update_data["shared_memory_md"] or "") > MEMORY_HARD_LIMIT - ): - raise HTTPException( - status_code=400, - detail=f"Team memory exceeds {MEMORY_HARD_LIMIT:,} character limit.", - ) - for key, value in update_data.items(): setattr(db_search_space, key, value) await session.commit() @@ -317,72 +272,6 @@ async def update_search_space( ) from e -@router.post( - "/searchspaces/{search_space_id}/memory/edit", - response_model=SearchSpaceRead, -) -async def edit_team_memory( - search_space_id: int, - body: _TeamMemoryEditRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Apply a natural language edit to the team memory via LLM.""" - await check_search_space_access(session, user, search_space_id) - - agent_config = await load_agent_llm_config_for_search_space( - session, search_space_id - ) - if not agent_config: - raise HTTPException(status_code=500, detail="No LLM configuration available.") - llm = create_chat_litellm_from_agent_config(agent_config) - if not llm: - raise HTTPException(status_code=500, detail="Failed to create LLM instance.") - - result = await session.execute( - select(SearchSpace).filter(SearchSpace.id == search_space_id) - ) - db_search_space = result.scalars().first() - if not db_search_space: - raise HTTPException(status_code=404, detail="Search space not found") - - current_memory = db_search_space.shared_memory_md or "" - - prompt = _TEAM_MEMORY_EDIT_PROMPT.format( - current_memory=current_memory or "(empty)", - instruction=body.query, - ) - try: - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-edit"]}, - ) - updated = extract_text_content(response.content).strip() - except Exception as e: - logger.exception("Team memory edit LLM call failed: %s", e) - raise HTTPException(status_code=500, detail="Team memory edit failed.") from e - - if not updated: - raise HTTPException(status_code=400, detail="LLM returned empty result.") - - save_result = await _save_memory( - updated_memory=updated, - old_memory=current_memory, - llm=llm, - apply_fn=lambda content: setattr(db_search_space, "shared_memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="team memory", - scope="team", - ) - - if save_result.get("status") == "error": - raise HTTPException(status_code=400, detail=save_result["message"]) - - await session.refresh(db_search_space) - return db_search_space - - @router.post("/searchspaces/{search_space_id}/ai-sort") async def trigger_ai_sort( search_space_id: int, diff --git a/surfsense_backend/app/routes/team_memory_routes.py b/surfsense_backend/app/routes/team_memory_routes.py new file mode 100644 index 000000000..3e552ce32 --- /dev/null +++ b/surfsense_backend/app/routes/team_memory_routes.py @@ -0,0 +1,78 @@ +"""Routes for search-space team memory.""" + +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import User, get_async_session +from app.services.memory import ( + MemoryScope, + read_memory, + reset_memory, + save_memory, +) +from app.users import current_active_user +from app.utils.rbac import check_search_space_access + +router = APIRouter() + + +class TeamMemoryRead(BaseModel): + memory_md: str + + +class TeamMemoryUpdate(BaseModel): + memory_md: str + + +@router.get("/searchspaces/{search_space_id}/memory", response_model=TeamMemoryRead) +async def get_team_memory( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await check_search_space_access(session, user, search_space_id) + memory_md = await read_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + session=session, + ) + return TeamMemoryRead(memory_md=memory_md) + + +@router.put("/searchspaces/{search_space_id}/memory", response_model=TeamMemoryRead) +async def update_team_memory( + search_space_id: int, + body: TeamMemoryUpdate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await check_search_space_access(session, user, search_space_id) + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + content=body.memory_md, + session=session, + ) + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return TeamMemoryRead(memory_md=result.memory_md) + + +@router.post("/searchspaces/{search_space_id}/memory/reset", response_model=TeamMemoryRead) +async def reset_team_memory( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await check_search_space_access(session, user, search_space_id) + result = await reset_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + session=session, + ) + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return TeamMemoryRead(memory_md=result.memory_md) diff --git a/surfsense_backend/app/schemas/search_space.py b/surfsense_backend/app/schemas/search_space.py index 77e34ea4b..70ed0004e 100644 --- a/surfsense_backend/app/schemas/search_space.py +++ b/surfsense_backend/app/schemas/search_space.py @@ -21,7 +21,6 @@ class SearchSpaceUpdate(BaseModel): description: str | None = None citations_enabled: bool | None = None qna_custom_instructions: str | None = None - shared_memory_md: str | None = None ai_file_sort_enabled: bool | None = None From 89a8438864b5a52d9927b8af3e37df8b1116d900 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 02:02:42 +0530 Subject: [PATCH 14/63] feat: wire memory settings to memory API --- .../components/MemoryContent.tsx | 151 +---------------- .../settings/team-memory-manager.tsx | 152 +----------------- .../contracts/types/search-space.types.ts | 1 - surfsense_web/hooks/use-memory.ts | 109 +++++++++++++ 4 files changed, 121 insertions(+), 292 deletions(-) create mode 100644 surfsense_web/hooks/use-memory.ts diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx index 3542f0925..dc002244f 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx @@ -1,10 +1,8 @@ "use client"; import { useAtomValue } from "jotai"; -import { ArrowUp, ChevronDown, ClipboardCopy, Download, Info, Pencil } from "lucide-react"; -import { useCallback, useEffect, useRef, useState } from "react"; +import { ChevronDown, ClipboardCopy, Download, Info } from "lucide-react"; import { toast } from "sonner"; -import { z } from "zod"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { PlateEditor } from "@/components/editor/plate-editor"; import { Alert, AlertDescription } from "@/components/ui/alert"; @@ -16,102 +14,23 @@ import { DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; import { Spinner } from "@/components/ui/spinner"; - -import { baseApiService } from "@/lib/apis/base-api.service"; - -const MEMORY_HARD_LIMIT = 25_000; - -const MemoryReadSchema = z.object({ - memory_md: z.string(), -}); +import { MEMORY_HARD_LIMIT, useUserMemory } from "@/hooks/use-memory"; export function MemoryContent() { const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom); - const [memory, setMemory] = useState(""); - const [loading, setLoading] = useState(true); - const [saving, setSaving] = useState(false); - const [editQuery, setEditQuery] = useState(""); - const [editing, setEditing] = useState(false); - const [showInput, setShowInput] = useState(false); - const textareaRef = useRef(null); - const inputContainerRef = useRef(null); - - const fetchMemory = useCallback(async () => { - try { - setLoading(true); - const data = await baseApiService.get("/api/v1/users/me/memory", MemoryReadSchema); - setMemory(data.memory_md); - } catch { - toast.error("Failed to load memory"); - } finally { - setLoading(false); - } - }, []); - - useEffect(() => { - fetchMemory(); - }, [fetchMemory]); - - useEffect(() => { - if (!showInput) return; - - const handlePointerDownOutside = (event: MouseEvent | TouchEvent) => { - const target = event.target; - if (!(target instanceof Node)) return; - if (inputContainerRef.current?.contains(target)) return; - - setShowInput(false); - }; - - document.addEventListener("mousedown", handlePointerDownOutside); - document.addEventListener("touchstart", handlePointerDownOutside, { passive: true }); - - return () => { - document.removeEventListener("mousedown", handlePointerDownOutside); - document.removeEventListener("touchstart", handlePointerDownOutside); - }; - }, [showInput]); + const { memory, displayMemory, loading, saving, reset } = useUserMemory( + Number(activeSearchSpaceId) + ); const handleClear = async () => { try { - setSaving(true); - const data = await baseApiService.put("/api/v1/users/me/memory", MemoryReadSchema, { - body: { memory_md: "" }, - }); - setMemory(data.memory_md); + await reset(); toast.success("Memory cleared"); } catch { toast.error("Failed to clear memory"); - } finally { - setSaving(false); } }; - const handleEdit = async () => { - const query = editQuery.trim(); - if (!query) return; - - try { - setEditing(true); - const data = await baseApiService.post("/api/v1/users/me/memory/edit", MemoryReadSchema, { - body: { query, search_space_id: Number(activeSearchSpaceId) }, - }); - setMemory(data.memory_md); - setEditQuery(""); - setShowInput(false); - toast.success("Memory updated"); - } catch { - toast.error("Failed to edit memory"); - } finally { - setEditing(false); - } - }; - - const openInput = () => { - setShowInput(true); - requestAnimationFrame(() => textareaRef.current?.focus()); - }; - const handleDownload = () => { if (!memory) return; try { @@ -139,14 +58,6 @@ export function MemoryContent() { } }; - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - handleEdit(); - } - }; - - const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*\[(fact|pref|instr)\]\s*/g, ""); const charCount = memory.length; const getCounterColor = () => { @@ -198,54 +109,6 @@ export function MemoryContent() { className="px-5 py-4 text-sm min-h-full" /> - - {showInput ? ( -
-
- setEditQuery(e.target.value)} - onKeyDown={handleKeyDown} - placeholder="Tell SurfSense what to remember or forget" - disabled={editing} - className="flex-1 bg-transparent text-sm outline-none placeholder:text-muted-foreground/70" - /> - -
-
- ) : ( - - )}
@@ -263,7 +126,7 @@ export function MemoryContent() { size="sm" className="text-xs sm:text-sm" onClick={handleClear} - disabled={saving || editing || !memory} + disabled={saving || !memory} > Reset Memory Reset diff --git a/surfsense_web/components/settings/team-memory-manager.tsx b/surfsense_web/components/settings/team-memory-manager.tsx index 9d3a40e46..6a2cbf52f 100644 --- a/surfsense_web/components/settings/team-memory-manager.tsx +++ b/surfsense_web/components/settings/team-memory-manager.tsx @@ -1,12 +1,7 @@ "use client"; -import { useQuery, useQueryClient } from "@tanstack/react-query"; -import { useAtomValue } from "jotai"; -import { ArrowUp, ChevronDown, ClipboardCopy, Download, Info, Pencil } from "lucide-react"; -import { useEffect, useRef, useState } from "react"; +import { ChevronDown, ClipboardCopy, Download, Info } from "lucide-react"; import { toast } from "sonner"; -import { z } from "zod"; -import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms"; import { PlateEditor } from "@/components/editor/plate-editor"; import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; @@ -17,105 +12,24 @@ import { DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; import { Spinner } from "@/components/ui/spinner"; -import { baseApiService } from "@/lib/apis/base-api.service"; -import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service"; -import { cacheKeys } from "@/lib/query-client/cache-keys"; - -const MEMORY_HARD_LIMIT = 25_000; - -const SearchSpaceSchema = z - .object({ - shared_memory_md: z.string().optional().default(""), - }) - .passthrough(); +import { MEMORY_HARD_LIMIT, useTeamMemory } from "@/hooks/use-memory"; interface TeamMemoryManagerProps { searchSpaceId: number; } export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) { - const queryClient = useQueryClient(); - const { data: searchSpace, isLoading: loading } = useQuery({ - queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), - queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }), - enabled: !!searchSpaceId, - }); - - const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom); - - const [saving, setSaving] = useState(false); - const [editQuery, setEditQuery] = useState(""); - const [editing, setEditing] = useState(false); - const [showInput, setShowInput] = useState(false); - const textareaRef = useRef(null); - const inputContainerRef = useRef(null); - - const memory = searchSpace?.shared_memory_md || ""; - - useEffect(() => { - if (!showInput) return; - - const handlePointerDownOutside = (event: MouseEvent | TouchEvent) => { - const target = event.target; - if (!(target instanceof Node)) return; - if (inputContainerRef.current?.contains(target)) return; - - setShowInput(false); - }; - - document.addEventListener("mousedown", handlePointerDownOutside); - document.addEventListener("touchstart", handlePointerDownOutside, { passive: true }); - - return () => { - document.removeEventListener("mousedown", handlePointerDownOutside); - document.removeEventListener("touchstart", handlePointerDownOutside); - }; - }, [showInput]); + const { memory, displayMemory, loading, saving, reset } = useTeamMemory(searchSpaceId); const handleClear = async () => { try { - setSaving(true); - await updateSearchSpace({ - id: searchSpaceId, - data: { shared_memory_md: "" }, - }); + await reset(); toast.success("Team memory cleared"); } catch { toast.error("Failed to clear team memory"); - } finally { - setSaving(false); } }; - const handleEdit = async () => { - const query = editQuery.trim(); - if (!query) return; - - try { - setEditing(true); - await baseApiService.post( - `/api/v1/searchspaces/${searchSpaceId}/memory/edit`, - SearchSpaceSchema, - { body: { query } } - ); - setEditQuery(""); - setShowInput(false); - await queryClient.invalidateQueries({ - queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()), - }); - toast.success("Team memory updated"); - } catch { - toast.error("Failed to edit team memory"); - } finally { - setEditing(false); - } - }; - - const openInput = () => { - setShowInput(true); - requestAnimationFrame(() => textareaRef.current?.focus()); - }; - const handleDownload = () => { if (!memory) return; try { @@ -143,14 +57,6 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) { } }; - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - handleEdit(); - } - }; - - const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*\[(fact|pref|instr)\]\s*/g, ""); const charCount = memory.length; const getCounterColor = () => { @@ -204,54 +110,6 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) { className="px-5 py-4 text-sm min-h-full" />
- - {showInput ? ( -
-
- setEditQuery(e.target.value)} - onKeyDown={handleKeyDown} - placeholder="Tell SurfSense what to remember or forget about your team" - disabled={editing} - className="flex-1 bg-transparent text-sm outline-none placeholder:text-muted-foreground/70" - /> - -
-
- ) : ( - - )}
@@ -269,7 +127,7 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) { size="sm" className="text-xs sm:text-sm" onClick={handleClear} - disabled={saving || editing || !memory} + disabled={saving || !memory} > Reset Memory Reset diff --git a/surfsense_web/contracts/types/search-space.types.ts b/surfsense_web/contracts/types/search-space.types.ts index 7449f82b1..08918e2af 100644 --- a/surfsense_web/contracts/types/search-space.types.ts +++ b/surfsense_web/contracts/types/search-space.types.ts @@ -56,7 +56,6 @@ export const updateSearchSpaceRequest = z.object({ description: true, citations_enabled: true, qna_custom_instructions: true, - shared_memory_md: true, ai_file_sort_enabled: true, }) .partial(), diff --git a/surfsense_web/hooks/use-memory.ts b/surfsense_web/hooks/use-memory.ts new file mode 100644 index 000000000..1f7a51790 --- /dev/null +++ b/surfsense_web/hooks/use-memory.ts @@ -0,0 +1,109 @@ +"use client"; + +import { useCallback, useEffect, useState } from "react"; +import { z } from "zod"; +import { baseApiService } from "@/lib/apis/base-api.service"; + +export const MEMORY_HARD_LIMIT = 25_000; + +const MemoryReadSchema = z.object({ + memory_md: z.string(), +}); + +type MemoryScope = "user" | "team"; + +interface UseMemoryOptions { + scope: MemoryScope; + searchSpaceId?: number | null; + autoLoad?: boolean; +} + +function getMemoryPath(scope: MemoryScope, searchSpaceId?: number | null) { + if (scope === "user") return "/api/v1/users/me/memory"; + if (!searchSpaceId) throw new Error("searchSpaceId is required for team memory"); + return `/api/v1/searchspaces/${searchSpaceId}/memory`; +} + +export function stripMemoryDisplayPrefixes(memory: string) { + return memory.replace( + /^\s*-\s+(?:\(\d{4}-\d{2}-\d{2}\)\s*\[(?:fact|pref|instr)\]\s*|\d{4}-\d{2}-\d{2}:\s*)/gim, + "- " + ); +} + +export function useMemory({ scope, searchSpaceId, autoLoad = true }: UseMemoryOptions) { + const [memory, setMemory] = useState(""); + const [loading, setLoading] = useState(autoLoad); + const [saving, setSaving] = useState(false); + + const load = useCallback(async () => { + setLoading(true); + try { + const data = await baseApiService.get(getMemoryPath(scope, searchSpaceId), MemoryReadSchema); + setMemory(data.memory_md); + return data.memory_md; + } finally { + setLoading(false); + } + }, [scope, searchSpaceId]); + + useEffect(() => { + if (!autoLoad) return; + load().catch(() => { + setLoading(false); + }); + }, [autoLoad, load]); + + const save = useCallback( + async (memoryMd: string) => { + setSaving(true); + try { + const data = await baseApiService.put( + getMemoryPath(scope, searchSpaceId), + MemoryReadSchema, + { + body: { memory_md: memoryMd }, + } + ); + setMemory(data.memory_md); + return data.memory_md; + } finally { + setSaving(false); + } + }, + [scope, searchSpaceId] + ); + + const reset = useCallback(async () => { + setSaving(true); + try { + const data = await baseApiService.post( + `${getMemoryPath(scope, searchSpaceId)}/reset`, + MemoryReadSchema + ); + setMemory(data.memory_md); + return data.memory_md; + } finally { + setSaving(false); + } + }, [scope, searchSpaceId]); + + return { + memory, + setMemory, + displayMemory: stripMemoryDisplayPrefixes(memory), + loading, + saving, + load, + save, + reset, + }; +} + +export function useUserMemory(searchSpaceId?: number | null) { + return useMemory({ scope: "user", searchSpaceId }); +} + +export function useTeamMemory(searchSpaceId?: number | null) { + return useMemory({ scope: "team", searchSpaceId }); +} From cb1cf26ef3436c233fb22b3d0b791f5241552c15 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 02:02:59 +0530 Subject: [PATCH 15/63] feat: improve document editor panel behavior --- .../atoms/editor/editor-panel.atom.ts | 26 +++- .../components/documents/DocumentNode.tsx | 132 ++++++++++++++---- .../components/documents/FolderTreeView.tsx | 79 ++++++----- .../components/editor-panel/editor-panel.tsx | 108 ++++++++++++-- .../layout/ui/right-panel/RightPanel.tsx | 19 ++- .../layout/ui/sidebar/DocumentsSidebar.tsx | 91 +++++++++++- .../contracts/enums/connectorIcons.tsx | 4 + .../contracts/types/document.types.ts | 2 + 8 files changed, 380 insertions(+), 81 deletions(-) diff --git a/surfsense_web/atoms/editor/editor-panel.atom.ts b/surfsense_web/atoms/editor/editor-panel.atom.ts index 28563e7d3..c302c66ee 100644 --- a/surfsense_web/atoms/editor/editor-panel.atom.ts +++ b/surfsense_web/atoms/editor/editor-panel.atom.ts @@ -3,10 +3,11 @@ import { rightPanelCollapsedAtom, rightPanelTabAtom } from "@/atoms/layout/right interface EditorPanelState { isOpen: boolean; - kind: "document" | "local_file"; + kind: "document" | "local_file" | "memory"; documentId: number | null; localFilePath: string | null; searchSpaceId: number | null; + memoryScope: "user" | "team" | null; title: string | null; } @@ -16,6 +17,7 @@ const initialState: EditorPanelState = { documentId: null, localFilePath: null, searchSpaceId: null, + memoryScope: null, title: null, }; @@ -38,6 +40,12 @@ export const openEditorPanelAtom = atom( title?: string; searchSpaceId?: number; } + | { + kind: "memory"; + memoryScope: "user" | "team"; + title?: string; + searchSpaceId?: number; + } ) => { if (!get(editorPanelAtom).isOpen) { set(preEditorCollapsedAtom, get(rightPanelCollapsedAtom)); @@ -49,6 +57,21 @@ export const openEditorPanelAtom = atom( documentId: null, localFilePath: payload.localFilePath, searchSpaceId: payload.searchSpaceId ?? null, + memoryScope: null, + title: payload.title ?? null, + }); + set(rightPanelTabAtom, "editor"); + set(rightPanelCollapsedAtom, false); + return; + } + if (payload.kind === "memory") { + set(editorPanelAtom, { + isOpen: true, + kind: "memory", + documentId: null, + localFilePath: null, + searchSpaceId: payload.searchSpaceId ?? null, + memoryScope: payload.memoryScope, title: payload.title ?? null, }); set(rightPanelTabAtom, "editor"); @@ -61,6 +84,7 @@ export const openEditorPanelAtom = atom( documentId: payload.documentId, localFilePath: null, searchSpaceId: payload.searchSpaceId, + memoryScope: null, title: payload.title ?? null, }); set(rightPanelTabAtom, "editor"); diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index 0f3cd4a19..bef2c6ba2 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -9,6 +9,7 @@ import { MoreHorizontal, Move, Pencil, + RotateCcw, Trash2, } from "lucide-react"; import React, { useCallback, useRef, useState } from "react"; @@ -61,8 +62,13 @@ interface DocumentNodeProps { onEdit: (doc: DocumentNodeDoc) => void; onDelete: (doc: DocumentNodeDoc) => void; onMove: (doc: DocumentNodeDoc) => void; + onReset?: (doc: DocumentNodeDoc) => void; onExport?: (doc: DocumentNodeDoc, format: string) => void; onVersionHistory?: (doc: DocumentNodeDoc) => void; + canDelete?: boolean; + canMove?: boolean; + canMention?: boolean; + canEdit?: boolean; contextMenuOpen?: boolean; onContextMenuOpenChange?: (open: boolean) => void; } @@ -76,8 +82,13 @@ export const DocumentNode = React.memo(function DocumentNode({ onEdit, onDelete, onMove, + onReset, onExport, onVersionHistory, + canDelete = true, + canMove = true, + canMention = true, + canEdit = true, contextMenuOpen, onContextMenuOpenChange, }: DocumentNodeProps) { @@ -85,8 +96,13 @@ export const DocumentNode = React.memo(function DocumentNode({ const isFailed = statusState === "failed"; const isProcessing = statusState === "pending" || statusState === "processing"; const isUnavailable = isProcessing || isFailed; - const isSelectable = !isUnavailable; - const isEditable = EDITABLE_DOCUMENT_TYPES.has(doc.document_type) && !isUnavailable; + const isMemoryDocument = + doc.document_type === "USER_MEMORY" || doc.document_type === "TEAM_MEMORY"; + const isSelectable = canMention && !isUnavailable; + const isEditable = + canEdit && + (isMemoryDocument || EDITABLE_DOCUMENT_TYPES.has(doc.document_type)) && + !isUnavailable; const handleCheckChange = useCallback(() => { if (isSelectable) { @@ -94,13 +110,22 @@ export const DocumentNode = React.memo(function DocumentNode({ } }, [doc, isMentioned, isSelectable, onToggleChatMention]); + const handlePrimaryClick = useCallback(() => { + if (canMention) { + handleCheckChange(); + return; + } + onPreview(doc); + }, [canMention, doc, handleCheckChange, onPreview]); + const [{ isDragging }, drag] = useDrag( () => ({ type: DND_TYPES.DOCUMENT, item: { id: doc.id }, + canDrag: canMove, collect: (monitor) => ({ isDragging: monitor.isDragging() }), }), - [doc.id] + [canMove, doc.id] ); const [dropdownOpen, setDropdownOpen] = useState(false); @@ -130,9 +155,11 @@ export const DocumentNode = React.memo(function DocumentNode({ const attachRef = useCallback( (node: HTMLDivElement | null) => { (rowRef as React.MutableRefObject).current = node; - drag(node); + if (canMove) { + drag(node); + } }, - [drag] + [canMove, drag] ); return ( @@ -187,12 +214,39 @@ export const DocumentNode = React.memo(function DocumentNode({ ); } return ( - e.stopPropagation()} - className="h-3.5 w-3.5 shrink-0" - /> + <> + {isMemoryDocument ? ( + + ) : canMention ? ( + e.stopPropagation()} + className="h-3.5 w-3.5 shrink-0" + /> + ) : ( + + {getDocumentTypeIcon( + doc.document_type as DocumentTypeEnum, + "h-3.5 w-3.5 text-muted-foreground" + )} + + )} + ); })()} @@ -205,8 +259,8 @@ export const DocumentNode = React.memo(function DocumentNode({
@@ -708,7 +788,9 @@ function DesktopEditorPanel() { const hasTarget = panelState.kind === "document" ? !!panelState.documentId && !!panelState.searchSpaceId - : !!panelState.localFilePath; + : panelState.kind === "local_file" + ? !!panelState.localFilePath + : !!panelState.memoryScope; if (!panelState.isOpen || !hasTarget) return null; return ( @@ -717,6 +799,7 @@ function DesktopEditorPanel() { kind={panelState.kind} documentId={panelState.documentId ?? undefined} localFilePath={panelState.localFilePath ?? undefined} + memoryScope={panelState.memoryScope ?? undefined} searchSpaceId={panelState.searchSpaceId ?? undefined} title={panelState.title} onClose={closePanel} @@ -734,7 +817,7 @@ function MobileEditorDrawer() { const hasTarget = panelState.kind === "document" ? !!panelState.documentId && !!panelState.searchSpaceId - : !!panelState.localFilePath; + : !!panelState.memoryScope; if (!hasTarget) return null; return ( @@ -756,6 +839,7 @@ function MobileEditorDrawer() { kind={panelState.kind} documentId={panelState.documentId ?? undefined} localFilePath={panelState.localFilePath ?? undefined} + memoryScope={panelState.memoryScope ?? undefined} searchSpaceId={panelState.searchSpaceId ?? undefined} title={panelState.title} /> @@ -771,7 +855,9 @@ export function EditorPanel() { const hasTarget = panelState.kind === "document" ? !!panelState.documentId && !!panelState.searchSpaceId - : !!panelState.localFilePath; + : panelState.kind === "local_file" + ? !!panelState.localFilePath + : !!panelState.memoryScope; if (!panelState.isOpen || !hasTarget) return null; if (!isDesktop && panelState.kind === "local_file") return null; @@ -789,7 +875,9 @@ export function MobileEditorPanel() { const hasTarget = panelState.kind === "document" ? !!panelState.documentId && !!panelState.searchSpaceId - : !!panelState.localFilePath; + : panelState.kind === "local_file" + ? !!panelState.localFilePath + : !!panelState.memoryScope; if (isDesktop || !panelState.isOpen || !hasTarget || panelState.kind === "local_file") return null; diff --git a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx index b379e58e3..5a7588979 100644 --- a/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx +++ b/surfsense_web/components/layout/ui/right-panel/RightPanel.tsx @@ -103,7 +103,11 @@ export function RightPanelToggleButton({ const reportOpen = reportState.isOpen && !!reportState.reportId; const editorOpen = editorState.isOpen && - (editorState.kind === "document" ? !!editorState.documentId : !!editorState.localFilePath); + (editorState.kind === "document" + ? !!editorState.documentId + : editorState.kind === "memory" + ? !!editorState.memoryScope + : !!editorState.localFilePath); const hitlEditOpen = hitlEditState.isOpen && !!hitlEditState.onSave; const citationOpen = citationState.isOpen && citationState.chunkId != null; const hasContent = documentsOpen || reportOpen || editorOpen || hitlEditOpen || citationOpen; @@ -151,7 +155,11 @@ export function RightPanelExpandButton() { const reportOpen = reportState.isOpen && !!reportState.reportId; const editorOpen = editorState.isOpen && - (editorState.kind === "document" ? !!editorState.documentId : !!editorState.localFilePath); + (editorState.kind === "document" + ? !!editorState.documentId + : editorState.kind === "memory" + ? !!editorState.memoryScope + : !!editorState.localFilePath); const hitlEditOpen = hitlEditState.isOpen && !!hitlEditState.onSave; const citationOpen = citationState.isOpen && citationState.chunkId != null; const hasContent = documentsOpen || reportOpen || editorOpen || hitlEditOpen || citationOpen; @@ -193,7 +201,11 @@ export function RightPanel({ const reportOpen = reportState.isOpen && !!reportState.reportId; const editorOpen = editorState.isOpen && - (editorState.kind === "document" ? !!editorState.documentId : !!editorState.localFilePath); + (editorState.kind === "document" + ? !!editorState.documentId + : editorState.kind === "memory" + ? !!editorState.memoryScope + : !!editorState.localFilePath); const hitlEditOpen = hitlEditState.isOpen && !!hitlEditState.onSave; const citationOpen = citationState.isOpen && citationState.chunkId != null; @@ -292,6 +304,7 @@ export function RightPanel({ kind={editorState.kind} documentId={editorState.documentId ?? undefined} localFilePath={editorState.localFilePath ?? undefined} + memoryScope={editorState.memoryScope ?? undefined} searchSpaceId={editorState.searchSpaceId ?? undefined} title={editorState.title} onClose={closeEditor} diff --git a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx index cdb757cb2..0c37d003c 100644 --- a/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx +++ b/surfsense_web/components/layout/ui/sidebar/DocumentsSidebar.tsx @@ -88,7 +88,31 @@ const DesktopLocalTabContent = dynamic( { ssr: false } ); -const NON_DELETABLE_DOCUMENT_TYPES: readonly string[] = ["SURFSENSE_DOCS"]; +const NON_DELETABLE_DOCUMENT_TYPES: readonly string[] = [ + "SURFSENSE_DOCS", + "USER_MEMORY", + "TEAM_MEMORY", +]; +const MEMORY_DOCUMENTS: DocumentNodeDoc[] = [ + { + id: -1001, + title: "MEMORY.md", + document_type: "USER_MEMORY", + folderId: null, + status: { state: "ready" }, + }, + { + id: -1002, + title: "TEAM_MEMORY.md", + document_type: "TEAM_MEMORY", + folderId: null, + status: { state: "ready" }, + }, +]; + +function isMemoryDocument(doc: { document_type: string }) { + return doc.document_type === "USER_MEMORY" || doc.document_type === "TEAM_MEMORY"; +} const LOCAL_FILESYSTEM_TRUST_KEY = "surfsense.local-filesystem-trust.v1"; const MAX_LOCAL_FILESYSTEM_ROOTS = 10; @@ -879,6 +903,7 @@ function AuthenticatedDocumentsSidebarBase({ const handleToggleChatMention = useCallback( (doc: { id: number; title: string; document_type: string }, isMentioned: boolean) => { + if (isMemoryDocument(doc)) return; const key = getMentionDocKey({ ...doc, kind: "doc" }); if (isMentioned) { setSidebarDocs((prev) => prev.filter((d) => getMentionDocKey(d) !== key)); @@ -927,11 +952,66 @@ function AuthenticatedDocumentsSidebarBase({ [treeFolders, setSidebarDocs] ); + const treeDocumentsWithMemory = useMemo( + () => [...MEMORY_DOCUMENTS, ...treeDocuments], + [treeDocuments] + ); + const searchFilteredDocuments = useMemo(() => { const query = debouncedSearch.trim().toLowerCase(); - if (!query) return treeDocuments; - return treeDocuments.filter((d) => d.title.toLowerCase().includes(query)); - }, [treeDocuments, debouncedSearch]); + if (!query) return treeDocumentsWithMemory; + return treeDocumentsWithMemory.filter((d) => d.title.toLowerCase().includes(query)); + }, [treeDocumentsWithMemory, debouncedSearch]); + + const openMemoryDocument = useCallback( + (doc: DocumentNodeDoc) => { + if (doc.document_type === "USER_MEMORY") { + openEditorPanel({ + kind: "memory", + memoryScope: "user", + searchSpaceId, + title: doc.title, + }); + return true; + } + if (doc.document_type === "TEAM_MEMORY") { + openEditorPanel({ + kind: "memory", + memoryScope: "team", + searchSpaceId, + title: doc.title, + }); + return true; + } + return false; + }, + [openEditorPanel, searchSpaceId] + ); + + const handleResetMemoryDocument = useCallback( + async (doc: DocumentNodeDoc) => { + if (!isMemoryDocument(doc)) return; + if (!window.confirm(`Reset ${doc.title.toLowerCase()}? This clears the memory document.`)) { + return; + } + const endpoint = + doc.document_type === "USER_MEMORY" + ? `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/users/me/memory/reset` + : `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/memory/reset`; + try { + const response = await authenticatedFetch(endpoint, { method: "POST" }); + if (!response.ok) { + const errorData = await response.json().catch(() => ({ detail: "Reset failed" })); + throw new Error(errorData.detail || "Reset failed"); + } + toast.success(`${doc.title} reset`); + openMemoryDocument(doc); + } catch (error) { + toast.error((error as Error)?.message || `Failed to reset ${doc.title.toLowerCase()}`); + } + }, + [openMemoryDocument, searchSpaceId] + ); const typeCounts = useMemo(() => { const counts: Partial> = {}; @@ -1169,6 +1249,7 @@ function AuthenticatedDocumentsSidebarBase({ onCreateFolder={handleCreateFolder} searchQuery={debouncedSearch.trim() || undefined} onPreviewDocument={(doc) => { + if (openMemoryDocument(doc)) return; openEditorPanel({ documentId: doc.id, searchSpaceId, @@ -1176,6 +1257,7 @@ function AuthenticatedDocumentsSidebarBase({ }); }} onEditDocument={(doc) => { + if (openMemoryDocument(doc)) return; openEditorPanel({ documentId: doc.id, searchSpaceId, @@ -1184,6 +1266,7 @@ function AuthenticatedDocumentsSidebarBase({ }} onDeleteDocument={(doc) => handleDeleteDocument(doc.id)} onMoveDocument={handleMoveDocument} + onResetDocument={handleResetMemoryDocument} onExportDocument={handleExportDocument} onVersionHistory={(doc) => setVersionDocId(doc.id)} activeTypes={activeTypes} diff --git a/surfsense_web/contracts/enums/connectorIcons.tsx b/surfsense_web/contracts/enums/connectorIcons.tsx index 1c6745db5..610bd508b 100644 --- a/surfsense_web/contracts/enums/connectorIcons.tsx +++ b/surfsense_web/contracts/enums/connectorIcons.tsx @@ -1,6 +1,7 @@ import { IconUsersGroup } from "@tabler/icons-react"; import { BookOpen, + Brain, File, FileText, Globe, @@ -120,6 +121,9 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas return ; case "SURFSENSE_DOCS": return ; + case "USER_MEMORY": + case "TEAM_MEMORY": + return ; case "DEEP": return ; case "DEEPER": diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts index 7b8784568..ccc15fa62 100644 --- a/surfsense_web/contracts/types/document.types.ts +++ b/surfsense_web/contracts/types/document.types.ts @@ -29,6 +29,8 @@ export const documentTypeEnum = z.enum([ "LOCAL_FOLDER_FILE", "SURFSENSE_DOCS", "NOTE", + "USER_MEMORY", + "TEAM_MEMORY", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR", "COMPOSIO_GMAIL_CONNECTOR", "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR", From 1fbb8fb58d8b1d65f0745fd2cf8718e1f9913310 Mon Sep 17 00:00:00 2001 From: Varun Shukla Date: Tue, 19 May 2026 00:32:57 +0530 Subject: [PATCH 16/63] refactor(env): replace inline process.env reads with BACKEND_URL in lib/ --- surfsense_web/lib/apis/base-api.service.ts | 4 ++-- surfsense_web/lib/auth-utils.ts | 8 +++----- surfsense_web/lib/chat/thread-persistence.ts | 5 ++--- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts index 269fd916c..0819cbc7c 100644 --- a/surfsense_web/lib/apis/base-api.service.ts +++ b/surfsense_web/lib/apis/base-api.service.ts @@ -9,7 +9,7 @@ import { NetworkError, NotFoundError, } from "../error"; - +import { BACKEND_URL } from "@/lib/env-config"; enum ResponseType { JSON = "json", TEXT = "text", @@ -390,4 +390,4 @@ class BaseApiService { } } -export const baseApiService = new BaseApiService(process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || ""); +export const baseApiService = new BaseApiService(BACKEND_URL); diff --git a/surfsense_web/lib/auth-utils.ts b/surfsense_web/lib/auth-utils.ts index 4848f4ab7..645a6d1ba 100644 --- a/surfsense_web/lib/auth-utils.ts +++ b/surfsense_web/lib/auth-utils.ts @@ -1,7 +1,7 @@ /** * Authentication utilities for handling token expiration and redirects */ - +import { BACKEND_URL } from "@/lib/env-config"; const REDIRECT_PATH_KEY = "surfsense_redirect_path"; const BEARER_TOKEN_KEY = "surfsense_bearer_token"; const REFRESH_TOKEN_KEY = "surfsense_refresh_token"; @@ -194,8 +194,7 @@ export async function logout(): Promise { // Call backend to revoke the refresh token if (refreshToken) { try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - const response = await fetch(`${backendUrl}/auth/jwt/revoke`, { + const response = await fetch(`${BACKEND_URL}/auth/jwt/revoke`, { method: "POST", headers: { "Content-Type": "application/json", @@ -273,8 +272,7 @@ export async function refreshAccessToken(): Promise { isRefreshing = true; refreshPromise = (async () => { try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - const response = await fetch(`${backendUrl}/auth/jwt/refresh`, { + const response = await fetch(`${BACKEND_URL}/auth/jwt/refresh`, { method: "POST", headers: { "Content-Type": "application/json", diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 4cf9b8e68..abe6bc02c 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -4,7 +4,7 @@ */ import { baseApiService } from "@/lib/apis/base-api.service"; - +import { BACKEND_URL } from "@/lib/env-config"; // ============================================================================= // Types matching backend schemas // ============================================================================= @@ -228,6 +228,5 @@ export interface RegenerateParams { * Get the URL for the regenerate endpoint (for streaming fetch) */ export function getRegenerateUrl(threadId: number): string { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - return `${backendUrl}/api/v1/threads/${threadId}/regenerate`; + return `${BACKEND_URL}/api/v1/threads/${threadId}/regenerate`; } From 73043a07567a1516a518eff4f6f8dd33875d1a88 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 03:17:05 +0530 Subject: [PATCH 17/63] feat: enhance memory API responses with limits and update UI components for memory limit handling --- surfsense_backend/app/routes/memory_routes.py | 12 ++--- .../app/routes/team_memory_routes.py | 18 +++---- .../app/services/memory/__init__.py | 5 ++ .../app/services/memory/schemas.py | 16 +++++- .../app/services/memory/service.py | 8 +-- .../components/MemoryContent.tsx | 18 +++---- .../components/editor-panel/editor-panel.tsx | 54 +++++++++++++++++-- .../settings/team-memory-manager.tsx | 18 +++---- surfsense_web/hooks/use-memory.ts | 34 +++++++++++- 9 files changed, 132 insertions(+), 51 deletions(-) diff --git a/surfsense_backend/app/routes/memory_routes.py b/surfsense_backend/app/routes/memory_routes.py index 7b674a584..8e73a277c 100644 --- a/surfsense_backend/app/routes/memory_routes.py +++ b/surfsense_backend/app/routes/memory_routes.py @@ -8,7 +8,9 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import User, get_async_session from app.services.memory import ( + MemoryRead, MemoryScope, + memory_limits, read_memory, reset_memory, save_memory, @@ -18,10 +20,6 @@ from app.users import current_active_user router = APIRouter() -class MemoryRead(BaseModel): - memory_md: str - - class MemoryUpdate(BaseModel): memory_md: str @@ -36,7 +34,7 @@ async def get_user_memory( target_id=user.id, session=session, ) - return MemoryRead(memory_md=memory_md) + return MemoryRead(memory_md=memory_md, limits=memory_limits()) @router.put("/users/me/memory", response_model=MemoryRead) @@ -53,7 +51,7 @@ async def update_user_memory( ) if result.status == "error": raise HTTPException(status_code=400, detail=result.message) - return MemoryRead(memory_md=result.memory_md) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) @router.post("/users/me/memory/reset", response_model=MemoryRead) @@ -68,4 +66,4 @@ async def reset_user_memory( ) if result.status == "error": raise HTTPException(status_code=400, detail=result.message) - return MemoryRead(memory_md=result.memory_md) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) diff --git a/surfsense_backend/app/routes/team_memory_routes.py b/surfsense_backend/app/routes/team_memory_routes.py index 3e552ce32..b37a99b03 100644 --- a/surfsense_backend/app/routes/team_memory_routes.py +++ b/surfsense_backend/app/routes/team_memory_routes.py @@ -8,7 +8,9 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.db import User, get_async_session from app.services.memory import ( + MemoryRead, MemoryScope, + memory_limits, read_memory, reset_memory, save_memory, @@ -19,15 +21,11 @@ from app.utils.rbac import check_search_space_access router = APIRouter() -class TeamMemoryRead(BaseModel): - memory_md: str - - class TeamMemoryUpdate(BaseModel): memory_md: str -@router.get("/searchspaces/{search_space_id}/memory", response_model=TeamMemoryRead) +@router.get("/searchspaces/{search_space_id}/memory", response_model=MemoryRead) async def get_team_memory( search_space_id: int, session: AsyncSession = Depends(get_async_session), @@ -39,10 +37,10 @@ async def get_team_memory( target_id=search_space_id, session=session, ) - return TeamMemoryRead(memory_md=memory_md) + return MemoryRead(memory_md=memory_md, limits=memory_limits()) -@router.put("/searchspaces/{search_space_id}/memory", response_model=TeamMemoryRead) +@router.put("/searchspaces/{search_space_id}/memory", response_model=MemoryRead) async def update_team_memory( search_space_id: int, body: TeamMemoryUpdate, @@ -58,10 +56,10 @@ async def update_team_memory( ) if result.status == "error": raise HTTPException(status_code=400, detail=result.message) - return TeamMemoryRead(memory_md=result.memory_md) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) -@router.post("/searchspaces/{search_space_id}/memory/reset", response_model=TeamMemoryRead) +@router.post("/searchspaces/{search_space_id}/memory/reset", response_model=MemoryRead) async def reset_team_memory( search_space_id: int, session: AsyncSession = Depends(get_async_session), @@ -75,4 +73,4 @@ async def reset_team_memory( ) if result.status == "error": raise HTTPException(status_code=400, detail=result.message) - return TeamMemoryRead(memory_md=result.memory_md) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) diff --git a/surfsense_backend/app/services/memory/__init__.py b/surfsense_backend/app/services/memory/__init__.py index d72f45e1f..27d0592fd 100644 --- a/surfsense_backend/app/services/memory/__init__.py +++ b/surfsense_backend/app/services/memory/__init__.py @@ -1,9 +1,11 @@ """First-class memory service for user and team markdown memory.""" +from .schemas import MemoryLimits, MemoryRead from .service import ( MemoryScope, SaveResult, extract_and_save, + memory_limits, read_memory, reset_memory, save_memory, @@ -18,9 +20,12 @@ from .validation import ( __all__ = [ "MEMORY_HARD_LIMIT", "MEMORY_SOFT_LIMIT", + "MemoryLimits", + "MemoryRead", "MemoryScope", "SaveResult", "extract_and_save", + "memory_limits", "read_memory", "reset_memory", "save_memory", diff --git a/surfsense_backend/app/services/memory/schemas.py b/surfsense_backend/app/services/memory/schemas.py index 9b40ee5b1..623e4aa93 100644 --- a/surfsense_backend/app/services/memory/schemas.py +++ b/surfsense_backend/app/services/memory/schemas.py @@ -1,4 +1,4 @@ -"""Structured output schemas for memory extraction.""" +"""Schemas for memory API responses and structured extraction.""" from __future__ import annotations @@ -7,6 +7,20 @@ from typing import Literal from pydantic import BaseModel, Field +class MemoryLimits(BaseModel): + """Canonical memory size limits exposed to clients.""" + + soft: int + hard: int + + +class MemoryRead(BaseModel): + """Memory document payload returned by user and team memory APIs.""" + + memory_md: str + limits: MemoryLimits + + class MemoryExtractionDecision(BaseModel): """Structured extraction result; avoids string sentinel parsing.""" diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py index 85459c28c..8159977a7 100644 --- a/surfsense_backend/app/services/memory/service.py +++ b/surfsense_backend/app/services/memory/service.py @@ -9,7 +9,6 @@ from typing import Any, Literal from uuid import UUID from langchain_core.messages import HumanMessage -from pydantic import BaseModel from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -19,9 +18,10 @@ from app.services.memory.prompts import ( USER_MEMORY_EXTRACT_PROMPT, ) from app.services.memory.rewrite import forced_rewrite -from app.services.memory.schemas import MemoryExtractionDecision +from app.services.memory.schemas import MemoryExtractionDecision, MemoryLimits from app.services.memory.validation import ( MEMORY_HARD_LIMIT, + MEMORY_SOFT_LIMIT, soft_limit_warning, strip_preamble_to_first_heading, validate_bullet_format, @@ -68,8 +68,8 @@ class SaveResult: return data -class MemoryRead(BaseModel): - memory_md: str +def memory_limits() -> MemoryLimits: + return MemoryLimits(soft=MEMORY_SOFT_LIMIT, hard=MEMORY_HARD_LIMIT) def _normalize_scope(scope: MemoryScope | str) -> MemoryScope: diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx index dc002244f..c7cb3d1d4 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx @@ -14,11 +14,11 @@ import { DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; import { Spinner } from "@/components/ui/spinner"; -import { MEMORY_HARD_LIMIT, useUserMemory } from "@/hooks/use-memory"; +import { getMemoryLimitState, useUserMemory } from "@/hooks/use-memory"; export function MemoryContent() { const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom); - const { memory, displayMemory, loading, saving, reset } = useUserMemory( + const { memory, displayMemory, limits, loading, saving, reset } = useUserMemory( Number(activeSearchSpaceId) ); @@ -59,11 +59,11 @@ export function MemoryContent() { }; const charCount = memory.length; + const limitState = getMemoryLimitState(charCount, limits); const getCounterColor = () => { - if (charCount > MEMORY_HARD_LIMIT) return "text-red-500"; - if (charCount > 15_000) return "text-orange-500"; - if (charCount > 10_000) return "text-yellow-500"; + if (limitState.level === "error") return "text-red-500"; + if (limitState.level === "warning") return "text-orange-500"; return "text-muted-foreground"; }; @@ -112,13 +112,7 @@ export function MemoryContent() {
- - {charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} - characters - chars - {charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"} - {charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"} - + {limitState.label}
- - - - - - - - Copy as Markdown - - - - Download as Markdown - - - -
-
- - ); -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx index 820021622..037568db3 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx @@ -1,7 +1,6 @@ "use client"; import { - Brain, CircleUser, Keyboard, KeyRound, @@ -26,7 +25,6 @@ export type UserSettingsTab = | "api-key" | "prompts" | "community-prompts" - | "memory" | "agent-permissions" | "agent-status" | "purchases" @@ -75,11 +73,6 @@ export function UserSettingsLayoutShell({ searchSpaceId, children }: UserSetting label: "Community Prompts", icon: , }, - { - value: "memory" as const, - label: "Memory", - icon: , - }, { value: "agent-permissions" as const, label: "Agent Permissions", diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/memory/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/memory/page.tsx deleted file mode 100644 index b10c5bce5..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/memory/page.tsx +++ /dev/null @@ -1,5 +0,0 @@ -import { MemoryContent } from "../components/MemoryContent"; - -export default function Page() { - return ; -} diff --git a/surfsense_web/components/settings/team-memory-manager.tsx b/surfsense_web/components/settings/team-memory-manager.tsx deleted file mode 100644 index 4a730d45f..000000000 --- a/surfsense_web/components/settings/team-memory-manager.tsx +++ /dev/null @@ -1,151 +0,0 @@ -"use client"; - -import { ChevronDown, ClipboardCopy, Download, Info } from "lucide-react"; -import { toast } from "sonner"; -import { PlateEditor } from "@/components/editor/plate-editor"; -import { Alert, AlertDescription } from "@/components/ui/alert"; -import { Button } from "@/components/ui/button"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; -import { Spinner } from "@/components/ui/spinner"; -import { getMemoryLimitState, useTeamMemory } from "@/hooks/use-memory"; - -interface TeamMemoryManagerProps { - searchSpaceId: number; -} - -export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) { - const { memory, displayMemory, limits, loading, saving, reset } = useTeamMemory(searchSpaceId); - - const handleClear = async () => { - try { - await reset(); - toast.success("Team memory cleared"); - } catch { - toast.error("Failed to clear team memory"); - } - }; - - const handleDownload = () => { - if (!memory) return; - try { - const blob = new Blob([memory], { type: "text/markdown;charset=utf-8" }); - const url = URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = url; - a.download = "team-memory.md"; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - } catch { - toast.error("Failed to download team memory"); - } - }; - - const handleCopyMarkdown = async () => { - if (!memory) return; - try { - await navigator.clipboard.writeText(memory); - toast.success("Copied to clipboard"); - } catch { - toast.error("Failed to copy team memory"); - } - }; - - const charCount = memory.length; - const limitState = getMemoryLimitState(charCount, limits); - - const getCounterColor = () => { - if (limitState.level === "error") return "text-red-500"; - if (limitState.level === "warning") return "text-orange-500"; - return "text-muted-foreground"; - }; - - if (loading) { - return ( -
- -
- ); - } - - if (!memory) { - return ( -
-

- What does SurfSense remember about your team? -

-

- Nothing yet. SurfSense picks up on team decisions and conventions as your team chats. -

-
- ); - } - - return ( -
- - - -

- SurfSense uses this shared memory to provide team-wide context across all conversations - in this search space. -

-
-
- -
-
- -
-
- -
- {limitState.label} -
- - - - - - - - - Copy as Markdown - - - - Download as Markdown - - - -
-
-
- ); -} From 78a3c71bb59fb6f876b62f2c88c68a046b9e0644 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 12:50:15 +0530 Subject: [PATCH 23/63] feat: implement memory document fetching and saving functionality in the editor panel, and remove deprecated memory hook --- .../components/documents/DocumentNode.tsx | 8 +- .../components/editor-panel/editor-panel.tsx | 78 +++------- .../components/editor-panel/memory.ts | 116 ++++++++++++++ surfsense_web/hooks/use-memory.ts | 141 ------------------ 4 files changed, 140 insertions(+), 203 deletions(-) create mode 100644 surfsense_web/components/editor-panel/memory.ts delete mode 100644 surfsense_web/hooks/use-memory.ts diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index a5b02cbb3..86c9b899a 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -216,13 +216,9 @@ export const DocumentNode = React.memo(function DocumentNode({ return ( <> {isMemoryDocument ? ( - + ) : canMention ? ( ({ detail: "Failed to fetch memory" })); - throw new Error(errorData.detail || "Failed to fetch memory"); - } - const data = (await response.json()) as { - memory_md?: string; - limits?: MemoryLimits; - }; - setMemoryLimits(data.limits ?? null); - const content: EditorContent = { - document_id: memoryScope === "team" ? -1002 : -1001, - title: title || (memoryScope === "team" ? "Team Memory" : "Personal Memory"), - document_type: memoryScope === "team" ? "TEAM_MEMORY" : "USER_MEMORY", - source_markdown: data.memory_md ?? "", - }; + setMemoryLimits(limits); + const content: EditorContent = document; markdownRef.current = content.source_markdown; setDisplayTitle(content.title); setEditorDoc(content); @@ -370,34 +356,14 @@ export function EditorPanelContent({ return true; } if (isMemoryMode) { - if (memoryScope === "team" && !searchSpaceId) { - throw new Error("Missing search space context"); - } - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${ - memoryScope === "team" - ? `/api/v1/searchspaces/${searchSpaceId}/memory` - : "/api/v1/users/me/memory" - }`, - { - method: "PUT", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ memory_md: markdownRef.current }), - } - ); - if (!response.ok) { - const errorData = await response - .json() - .catch(() => ({ detail: "Failed to save memory" })); - throw new Error(errorData.detail || "Failed to save memory"); - } - const data = (await response.json()) as { - memory_md?: string; - limits?: MemoryLimits; - }; - const savedContent = data.memory_md ?? markdownRef.current; + if (!memoryScope) throw new Error("Missing memory context"); + const { markdown: savedContent, limits } = await saveMemoryMarkdown({ + scope: memoryScope, + searchSpaceId, + markdown: markdownRef.current, + }); markdownRef.current = savedContent; - setMemoryLimits(data.limits ?? memoryLimits); + setMemoryLimits(limits ?? memoryLimits); setEditorDoc((prev) => (prev ? { ...prev, source_markdown: savedContent } : prev)); setEditedMarkdown(null); if (!options?.silent) { diff --git a/surfsense_web/components/editor-panel/memory.ts b/surfsense_web/components/editor-panel/memory.ts new file mode 100644 index 000000000..aa5b1f68d --- /dev/null +++ b/surfsense_web/components/editor-panel/memory.ts @@ -0,0 +1,116 @@ +"use client"; + +import { authenticatedFetch } from "@/lib/auth-utils"; + +export type MemoryScope = "user" | "team"; + +export interface MemoryLimits { + soft: number; + hard: number; +} + +export type MemoryLimitLevel = "ok" | "warning" | "error"; + +export interface MemoryEditorDocument { + document_id: number; + title: string; + document_type: "USER_MEMORY" | "TEAM_MEMORY"; + source_markdown: string; +} + +interface MemoryReadResponse { + memory_md?: string; + limits?: MemoryLimits; +} + +function getMemoryPath(scope: MemoryScope, searchSpaceId?: number | null) { + if (scope === "user") return "/api/v1/users/me/memory"; + if (!searchSpaceId) throw new Error("Missing search space context"); + return `/api/v1/searchspaces/${searchSpaceId}/memory`; +} + +function getBackendUrl(path: string) { + return `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${path}`; +} + +export function getMemoryLimitState(length: number, limits?: MemoryLimits | null) { + if (!limits) { + return { + level: "ok" as MemoryLimitLevel, + label: `${length.toLocaleString()} chars`, + isOverLimit: false, + }; + } + + const isOverLimit = length > limits.hard; + const isNearLimit = length > limits.soft; + const level: MemoryLimitLevel = isOverLimit ? "error" : isNearLimit ? "warning" : "ok"; + const suffix = isOverLimit ? " - Exceeds limit" : isNearLimit ? " - Approaching limit" : ""; + + return { + level, + label: `${length.toLocaleString()}/${limits.hard.toLocaleString()} chars${suffix}`, + isOverLimit, + }; +} + +export async function fetchMemoryEditorDocument({ + scope, + searchSpaceId, + title, + signal, +}: { + scope: MemoryScope; + searchSpaceId?: number | null; + title?: string | null; + signal?: AbortSignal; +}) { + const response = await authenticatedFetch(getBackendUrl(getMemoryPath(scope, searchSpaceId)), { + method: "GET", + signal, + }); + if (!response.ok) { + const errorData = await response.json().catch(() => ({ detail: "Failed to fetch memory" })); + throw new Error(errorData.detail || "Failed to fetch memory"); + } + + const data = (await response.json()) as MemoryReadResponse; + const isTeamMemory = scope === "team"; + + return { + limits: data.limits ?? null, + document: { + document_id: isTeamMemory ? -1002 : -1001, + title: title || (isTeamMemory ? "Team Memory" : "Personal Memory"), + document_type: isTeamMemory ? "TEAM_MEMORY" : "USER_MEMORY", + source_markdown: data.memory_md ?? "", + } satisfies MemoryEditorDocument, + }; +} + +export async function saveMemoryMarkdown({ + scope, + searchSpaceId, + markdown, +}: { + scope: MemoryScope; + searchSpaceId?: number | null; + markdown: string; +}) { + const response = await authenticatedFetch(getBackendUrl(getMemoryPath(scope, searchSpaceId)), { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ memory_md: markdown }), + }); + if (!response.ok) { + const errorData = await response.json().catch(() => ({ detail: "Failed to save memory" })); + throw new Error(errorData.detail || "Failed to save memory"); + } + + const data = (await response.json()) as MemoryReadResponse; + + return { + markdown: data.memory_md ?? markdown, + limits: data.limits, + }; +} diff --git a/surfsense_web/hooks/use-memory.ts b/surfsense_web/hooks/use-memory.ts deleted file mode 100644 index 609aad537..000000000 --- a/surfsense_web/hooks/use-memory.ts +++ /dev/null @@ -1,141 +0,0 @@ -"use client"; - -import { useCallback, useEffect, useState } from "react"; -import { z } from "zod"; -import { baseApiService } from "@/lib/apis/base-api.service"; - -const MemoryLimitsSchema = z.object({ - soft: z.number(), - hard: z.number(), -}); - -const MemoryReadSchema = z.object({ - memory_md: z.string(), - limits: MemoryLimitsSchema, -}); - -type MemoryScope = "user" | "team"; -export type MemoryLimits = z.infer; -export type MemoryLimitLevel = "ok" | "warning" | "error"; - -interface UseMemoryOptions { - scope: MemoryScope; - searchSpaceId?: number | null; - autoLoad?: boolean; -} - -function getMemoryPath(scope: MemoryScope, searchSpaceId?: number | null) { - if (scope === "user") return "/api/v1/users/me/memory"; - if (!searchSpaceId) throw new Error("searchSpaceId is required for team memory"); - return `/api/v1/searchspaces/${searchSpaceId}/memory`; -} - -export function stripMemoryDisplayPrefixes(memory: string) { - return memory.replace( - /^\s*-\s+(?:\(\d{4}-\d{2}-\d{2}\)\s*\[(?:fact|pref|instr)\]\s*|\d{4}-\d{2}-\d{2}:\s*)/gim, - "- " - ); -} - -export function getMemoryLimitState(length: number, limits?: MemoryLimits | null) { - if (!limits) { - return { - level: "ok" as MemoryLimitLevel, - label: `${length.toLocaleString()} chars`, - isOverLimit: false, - }; - } - - const isOverLimit = length > limits.hard; - const isNearLimit = length > limits.soft; - const level: MemoryLimitLevel = isOverLimit ? "error" : isNearLimit ? "warning" : "ok"; - const suffix = isOverLimit ? " - Exceeds limit" : isNearLimit ? " - Approaching limit" : ""; - - return { - level, - label: `${length.toLocaleString()}/${limits.hard.toLocaleString()} chars${suffix}`, - isOverLimit, - }; -} - -export function useMemory({ scope, searchSpaceId, autoLoad = true }: UseMemoryOptions) { - const [memory, setMemory] = useState(""); - const [limits, setLimits] = useState(null); - const [loading, setLoading] = useState(autoLoad); - const [saving, setSaving] = useState(false); - - const load = useCallback(async () => { - setLoading(true); - try { - const data = await baseApiService.get(getMemoryPath(scope, searchSpaceId), MemoryReadSchema); - setMemory(data.memory_md); - setLimits(data.limits); - return data.memory_md; - } finally { - setLoading(false); - } - }, [scope, searchSpaceId]); - - useEffect(() => { - if (!autoLoad) return; - load().catch(() => { - setLoading(false); - }); - }, [autoLoad, load]); - - const save = useCallback( - async (memoryMd: string) => { - setSaving(true); - try { - const data = await baseApiService.put( - getMemoryPath(scope, searchSpaceId), - MemoryReadSchema, - { - body: { memory_md: memoryMd }, - } - ); - setMemory(data.memory_md); - setLimits(data.limits); - return data.memory_md; - } finally { - setSaving(false); - } - }, - [scope, searchSpaceId] - ); - - const reset = useCallback(async () => { - setSaving(true); - try { - const data = await baseApiService.post( - `${getMemoryPath(scope, searchSpaceId)}/reset`, - MemoryReadSchema - ); - setMemory(data.memory_md); - setLimits(data.limits); - return data.memory_md; - } finally { - setSaving(false); - } - }, [scope, searchSpaceId]); - - return { - memory, - setMemory, - limits, - displayMemory: stripMemoryDisplayPrefixes(memory), - loading, - saving, - load, - save, - reset, - }; -} - -export function useUserMemory(searchSpaceId?: number | null) { - return useMemory({ scope: "user", searchSpaceId }); -} - -export function useTeamMemory(searchSpaceId?: number | null) { - return useMemory({ scope: "team", searchSpaceId }); -} From fe07de3f9c027cad75493ce8e0670d347d497fc3 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 12:55:10 +0530 Subject: [PATCH 24/63] chore: ran linting --- surfsense_backend/app/services/memory/service.py | 8 ++++++-- surfsense_backend/app/services/memory/validation.py | 4 +++- .../agents/new_chat/tools/test_update_memory_scope.py | 4 +++- .../tests/unit/services/test_memory_service.py | 4 +++- surfsense_web/components/documents/DocumentNode.tsx | 5 +---- surfsense_web/components/editor-panel/editor-panel.tsx | 2 +- .../components/layout/ui/sidebar/DocumentsSidebar.tsx | 5 ++++- 7 files changed, 21 insertions(+), 11 deletions(-) diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py index 8159977a7..d4a7d0974 100644 --- a/surfsense_backend/app/services/memory/service.py +++ b/surfsense_backend/app/services/memory/service.py @@ -92,7 +92,9 @@ async def _load_target( select(User).where(User.id == _normalize_user_id(target_id)) # type: ignore[arg-type] ) return result.scalars().first() - result = await session.execute(select(SearchSpace).where(SearchSpace.id == int(target_id))) + result = await session.execute( + select(SearchSpace).where(SearchSpace.id == int(target_id)) + ) return result.scalars().first() @@ -141,7 +143,9 @@ async def save_memory( if target is None: return SaveResult( status="error", - message="User not found." if normalized is MemoryScope.USER else "Search space not found.", + message="User not found." + if normalized is MemoryScope.USER + else "Search space not found.", ) old_memory = _get_memory(target, normalized) diff --git a/surfsense_backend/app/services/memory/validation.py b/surfsense_backend/app/services/memory/validation.py index 0e856943b..f9c5007d9 100644 --- a/surfsense_backend/app/services/memory/validation.py +++ b/surfsense_backend/app/services/memory/validation.py @@ -11,7 +11,9 @@ MEMORY_HARD_LIMIT = 25_000 _SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) _HEADING_LINE_RE = re.compile(r"^##\s+\S+", re.MULTILINE) _HEADING_NORMALIZE_RE = re.compile(r"[^a-z0-9]+") -_LEGACY_BULLET_RE = re.compile(r"^-\s+\(\d{4}-\d{2}-\d{2}\)\s+\[(fact|pref|instr)\]\s+.+$") +_LEGACY_BULLET_RE = re.compile( + r"^-\s+\(\d{4}-\d{2}-\d{2}\)\s+\[(fact|pref|instr)\]\s+.+$" +) _NEW_BULLET_RE = re.compile(r"^-\s+\d{4}-\d{2}-\d{2}:\s+.+$") _FORBIDDEN_TEAM_HEADINGS = { diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py index 60310d907..f1a0f97f0 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py @@ -88,7 +88,9 @@ async def test_save_memory_blocks_new_personal_heading_in_team_before_commit( @pytest.mark.asyncio -async def test_save_memory_allows_grandfathered_personal_heading_in_team(monkeypatch) -> None: +async def test_save_memory_allows_grandfathered_personal_heading_in_team( + monkeypatch, +) -> None: content = "## Preferences\n- 2026-04-10: Prefers dark mode\n" target = type("Target", (), {"shared_memory_md": content})() session = _FakeSession() diff --git a/surfsense_backend/tests/unit/services/test_memory_service.py b/surfsense_backend/tests/unit/services/test_memory_service.py index c16e34062..e7fef2cac 100644 --- a/surfsense_backend/tests/unit/services/test_memory_service.py +++ b/surfsense_backend/tests/unit/services/test_memory_service.py @@ -108,7 +108,9 @@ async def test_save_memory_rejects_long_no_heading_payload(monkeypatch) -> None: @pytest.mark.asyncio -async def test_save_memory_grandfathers_existing_team_personal_heading(monkeypatch) -> None: +async def test_save_memory_grandfathers_existing_team_personal_heading( + monkeypatch, +) -> None: content = "## Preferences\n- 2026-05-19: Existing legacy heading\n" target = SimpleNamespace(shared_memory_md=content) session = _FakeSession() diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index 86c9b899a..a13bd0079 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -216,10 +216,7 @@ export const DocumentNode = React.memo(function DocumentNode({ return ( <> {isMemoryDocument ? ( - + Date: Wed, 20 May 2026 09:40:00 +0200 Subject: [PATCH 25/63] perf(gmail subagent): stop echoing raw emails array into evidence.items --- .../subagents/connectors/gmail/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md index d74e9bdc4..c04d69ad0 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md @@ -33,7 +33,7 @@ You are a Gmail specialist for the user's connected Gmail mailbox. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Gmail tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message_id`, `thread_id`, `draft_id`, `subject`, and recipient fields inside `evidence` when the tool returned them. For `search_gmail`, place the raw `emails` array inside `evidence.items`. Never invent a field the tool did not return. +Surface the tool's `message_id`, `thread_id`, `draft_id`, `subject`, and recipient fields inside `evidence` when the tool returned them. For `search_gmail`, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; one line per email; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples @@ -114,7 +114,7 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. -- For `search_gmail` results, populate `evidence.items` with `{ "emails": [...], "total": N }`. +- For `search_gmail` results, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; up to 10 entries, then `"...and N more"`). - For ambiguous matches across `update_gmail_draft` / `trash_gmail_email` / `read_gmail_email`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`). Infer before you call; verify before you send; map every tool outcome faithfully. From d3d396a473b90b6940e8c21dcf5c1967137f78bf Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:40:18 +0200 Subject: [PATCH 26/63] perf(linear subagent): stop echoing raw issues list into evidence.items --- .../subagents/connectors/linear/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md index f7dbeb9a9..1d96a4105 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md @@ -32,7 +32,7 @@ Failure handling: Supervisor: "Find issues assigned to me with priority Urgent." 1. Discovery: list issues with filters `{assignee: "me", priority: 1}`. -2. Return `status=success` with the matched issues in `evidence.items`. +2. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched issues listed in `action_summary` (identifier, title, state, assignee; one line per issue; up to 10 entries, then `"...and N more"`). @@ -106,7 +106,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: issue, user, project, state, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (identifier, title, state, assignee; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers. From 6e5dd54bbfad4cdd72d4ab0ac1332bd1aa46545f Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:40:33 +0200 Subject: [PATCH 27/63] perf(slack subagent): stop echoing raw messages list into evidence.items --- .../subagents/connectors/slack/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md index c8edfc1db..3c24b19c9 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md @@ -37,7 +37,7 @@ Failure handling: Supervisor: "Summarize the latest discussion in #marketing." 1. Search channels for "marketing" → one strong match. Capture the channel ID. 2. Read that channel's recent message history. -3. Return `status=success` with the message list in `evidence.items`. +3. Return `status=success` with `evidence.items` set to `{ "total": N }` and the messages listed in `action_summary` (sender, timestamp, text snippet; one line per message; up to 10 entries, then `"...and N more"`). @@ -92,7 +92,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: channel, user, message, thread). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (channel/user, key identifier, timestamp, short snippet; up to 10 entries, then `"...and N more"`). Discover before you post; never guess channel, user, or thread targets. From 6be1b22ef6d1c62c80b3696ec5281725047468aa Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:40:48 +0200 Subject: [PATCH 28/63] perf(jira subagent): stop echoing raw issues list into evidence.items --- .../subagents/connectors/jira/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md index 79c46f8a0..4dcc56454 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md @@ -39,7 +39,7 @@ Failure handling: Supervisor: "Find issues assigned to me with status 'In Progress'." 1. JQL search with `assignee = currentUser() AND status = "In Progress"`. -2. Return `status=success` with the matched issues in `evidence.items`. +2. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched issues listed in `action_summary` (issue key, summary, status, assignee; one line per issue; up to 10 entries, then `"...and N more"`). @@ -116,7 +116,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: site, project, issue, user, transition, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (issue key, summary, status, assignee; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers, transitions, or required fields. From 1b2f13e25ca5e455f9e5019d0b5ad98e3c203d6a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:41:04 +0200 Subject: [PATCH 29/63] perf(clickup subagent): stop echoing raw tasks list into evidence.items --- .../subagents/connectors/clickup/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md index eaea5827b..898197f14 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md @@ -36,7 +36,7 @@ Failure handling: Supervisor: "Find tasks about the homepage redesign." 1. Workspace search for "homepage redesign" → matched tasks. -2. Return `status=success` with the matched tasks in `evidence.items`. +2. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched tasks listed in `action_summary` (task id, title, status, assignees; one line per task; up to 10 entries, then `"...and N more"`). @@ -98,7 +98,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: task, list, member, status, custom-field choice, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (task id, title, status, assignees; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers, list statuses, or assignees. From 56d8ff89e2d46e890fe9c11999479af0abda8c16 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:41:18 +0200 Subject: [PATCH 30/63] perf(airtable subagent): stop echoing raw records list into evidence.items --- .../subagents/connectors/airtable/system_prompt.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md index 1b7e84710..9434db7a1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md @@ -38,7 +38,7 @@ Supervisor: "List open tasks in the Project Tracker base." 2. List tables in that base → identify the Tasks table; capture its table ID. 3. Get table schema → identify the status field and the choice IDs that represent "open" states. 4. List records with a typed filter on the status field for those choice IDs. -5. Return `status=success` with the matched records in `evidence.items`. +5. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched records listed in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; one line per record; up to 10 entries, then `"...and N more"`). @@ -97,7 +97,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: base, table, field, choice, record, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers, choice IDs, or required fields. From f4e66718be1d81541fb0e576da0e80ff90f3a46d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:41:36 +0200 Subject: [PATCH 31/63] perf(discord subagent): stop echoing raw channels/messages payload into evidence.items --- .../subagents/connectors/discord/system_prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md index a0ba6d87e..249f9ec8b 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md @@ -24,7 +24,7 @@ You are a Discord specialist for the user's connected Discord server. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Discord tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message`, `channel_id`, `message_id`, and the listed channels/messages payload inside `evidence` when the tool returned them. Never invent a field the tool did not return. +Surface the tool's `message`, `channel_id`, and `message_id` inside `evidence` when the tool returned them. For `list_discord_channels` and `read_discord_messages`, set `evidence.items` to `{ "total": N }` and list the matched entries in `action_summary` (channel name or sender + timestamp + short text snippet; one line per entry; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples From 20f7896a99d42892c9d02ab19e7a4613964e0dc0 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:41:47 +0200 Subject: [PATCH 32/63] perf(luma subagent): stop echoing raw events list into evidence.items --- .../multi_agent_chat/subagents/connectors/luma/system_prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md index 953dbff58..0f42161b3 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md @@ -26,7 +26,7 @@ You are a Luma specialist for the user's connected Luma account. | `error` | `error` | Relay the tool's `message` verbatim as `next_step` (this covers Luma Plus 403s and other API errors). | | tool raises / unknown | `error` | `"Luma tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message`, `event_id`, `name`, `start_at`, and `url` inside `evidence` when the tool returned them. Never invent a field the tool did not return. +Surface the tool's `message`, `event_id`, `name`, `start_at`, and `url` inside `evidence` when the tool returned them. For `list_luma_events`, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (event name, start date/time, location if present; one line per event; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples From 6c173dc2a7df14077b34d58ee5a9a5c83b37d740 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:42:03 +0200 Subject: [PATCH 33/63] perf(teams subagent): stop echoing raw teams/channels/messages payload into evidence.items --- .../subagents/connectors/teams/system_prompt.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md index b79c59f90..c3a280f79 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md @@ -26,7 +26,7 @@ You are a Microsoft Teams specialist for the user's connected Teams account. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Teams tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message`, `team_id`, `team_name`, `channel_id`, `channel_name`, and `message_id` inside `evidence` when the tool returned them. Never invent a field the tool did not return. +Surface the tool's `message`, `team_id`, `team_name`, `channel_id`, `channel_name`, and `message_id` inside `evidence` when the tool returned them. For `list_teams_channels` and `read_teams_messages`, set `evidence.items` to `{ "total": N }` and list the matched entries in `action_summary` (team › channel, or sender + timestamp + short text snippet; one line per entry; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples From b554c600bba5f5e9ffc414c74c609ac5b9475205 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:42:57 +0200 Subject: [PATCH 34/63] perf(research subagent): cap evidence.findings and evidence.sources to bound output --- .../subagents/builtins/research/system_prompt.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md index cf558db62..f1a22ddf1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md @@ -50,4 +50,6 @@ Rules: - `status=success` -> `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` -> `next_step` must be non-null. - `status=blocked` due to missing required inputs -> `missing_fields` must be non-null. +- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks. +- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once. From 5edf0520c4d44780c73e94e4cce83e461c6070a8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:43:32 +0200 Subject: [PATCH 35/63] perf(kb subagent, cloud): cap evidence.content_excerpt to 500 chars --- .../subagents/builtins/knowledge_base/system_prompt_cloud.md | 1 + 1 file changed, 1 insertion(+) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md index 60cafb30c..514ec6639 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md @@ -118,5 +118,6 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. +- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output. Infer before you call; map every tool outcome faithfully. From 0cdda14922f6ac07deae581d6448a30074887f50 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:43:36 +0200 Subject: [PATCH 36/63] perf(kb subagent, desktop): cap evidence.content_excerpt to 500 chars --- .../subagents/builtins/knowledge_base/system_prompt_desktop.md | 1 + 1 file changed, 1 insertion(+) diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md index 8f64f2eb6..bfa96ee5b 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md @@ -118,5 +118,6 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. +- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output. Infer before you call; map every tool outcome faithfully. From a0ff86e0e8e88c17ec83f26c50530137447d2809 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 13:20:05 +0530 Subject: [PATCH 37/63] feat: add memory document model and parsing functionality for markdown handling --- .../app/services/memory/document.py | 200 ++++++++++++++++++ .../app/services/memory/service.py | 3 + .../app/services/memory/validation.py | 52 ++--- .../tools/test_update_memory_scope.py | 21 ++ .../unit/services/test_memory_service.py | 2 +- 5 files changed, 241 insertions(+), 37 deletions(-) create mode 100644 surfsense_backend/app/services/memory/document.py diff --git a/surfsense_backend/app/services/memory/document.py b/surfsense_backend/app/services/memory/document.py new file mode 100644 index 000000000..498195e25 --- /dev/null +++ b/surfsense_backend/app/services/memory/document.py @@ -0,0 +1,200 @@ +"""Memory-specific markdown document model and canonical renderer. + +This intentionally parses only SurfSense memory's small markdown contract: +``##`` sections with dated bullet items. Unknown lines are preserved so user +edits are not lost, while legacy marker bullets are normalized on render. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import date + +DEFAULT_LEGACY_SECTION = "Memory" +LEGACY_MARKERS = frozenset({"fact", "pref", "instr"}) + + +@dataclass(frozen=True) +class MemoryBullet: + entry_date: date + text: str + + +@dataclass(frozen=True) +class MemoryRawLine: + text: str + + +MemoryLine = MemoryBullet | MemoryRawLine + + +@dataclass(frozen=True) +class MemorySection: + heading: str + lines: list[MemoryLine] = field(default_factory=list) + explicit_heading: bool = True + + +@dataclass(frozen=True) +class MemoryDocument: + sections: list[MemorySection] = field(default_factory=list) + + @property + def has_explicit_heading(self) -> bool: + return any(section.explicit_heading for section in self.sections) + + +def is_section_heading(line: str) -> bool: + return line.startswith("## ") and bool(line[3:].strip()) + + +def heading_text(line: str) -> str: + return line[3:].strip() + + +def normalize_heading(heading: str) -> str: + chars: list[str] = [] + previous_was_space = True + for char in heading.strip().lower(): + if char.isalnum(): + chars.append(char) + previous_was_space = False + elif not previous_was_space: + chars.append(" ") + previous_was_space = True + return "".join(chars).strip() + + +def parse_bullet_line(line: str) -> MemoryBullet | None: + stripped = line.strip() + if not stripped.startswith("- "): + return None + + body = stripped[2:] + parsed = _parse_canonical_bullet(body) + if parsed is not None: + return parsed + return _parse_legacy_bullet(body) + + +def _parse_canonical_bullet(body: str) -> MemoryBullet | None: + if len(body) < 13 or body[10:12] != ": ": + return None + try: + entry_date = date.fromisoformat(body[:10]) + except ValueError: + return None + text = body[12:].strip() + if not text: + return None + return MemoryBullet(entry_date=entry_date, text=text) + + +def _parse_legacy_bullet(body: str) -> MemoryBullet | None: + if len(body) < 20 or not body.startswith("("): + return None + if len(body) < 14 or body[11:14] != ") [": + return None + try: + entry_date = date.fromisoformat(body[1:11]) + except ValueError: + return None + + marker_end = body.find("] ", 14) + if marker_end == -1: + return None + marker = body[14:marker_end] + if marker not in LEGACY_MARKERS: + return None + + text = body[marker_end + 2 :].strip() + if not text: + return None + return MemoryBullet(entry_date=entry_date, text=text) + + +def parse_memory_document(content: str | None) -> MemoryDocument: + if not content: + return MemoryDocument() + + sections: list[MemorySection] = [] + current_heading: str | None = None + current_explicit = True + current_lines: list[MemoryLine] = [] + + def flush_current() -> None: + nonlocal current_heading, current_explicit, current_lines + if current_heading is None: + return + sections.append( + MemorySection( + heading=current_heading, + lines=current_lines, + explicit_heading=current_explicit, + ) + ) + current_heading = None + current_explicit = True + current_lines = [] + + for raw_line in content.strip().splitlines(): + line = raw_line.rstrip() + if is_section_heading(line): + flush_current() + current_heading = heading_text(line) + current_explicit = True + current_lines = [] + continue + + bullet = parse_bullet_line(line) + if current_heading is None: + if bullet is None: + continue + current_heading = DEFAULT_LEGACY_SECTION + current_explicit = False + current_lines = [bullet] + continue + + current_lines.append(bullet if bullet is not None else MemoryRawLine(text=line)) + + flush_current() + return MemoryDocument(sections=sections) + + +def render_memory_document(document: MemoryDocument) -> str: + rendered_sections: list[str] = [] + for section in document.sections: + section_lines = [f"## {section.heading}"] + for line in section.lines: + if isinstance(line, MemoryBullet): + section_lines.append(f"- {line.entry_date.isoformat()}: {line.text}") + else: + section_lines.append(line.text) + rendered_sections.append("\n".join(section_lines).strip()) + return "\n\n".join(section for section in rendered_sections if section).strip() + + +def extract_headings(memory: str | None) -> set[str]: + document = parse_memory_document(memory) + return { + normalize_heading(section.heading) + for section in document.sections + if section.explicit_heading + } + + +def has_explicit_heading(content: str) -> bool: + return parse_memory_document(content).has_explicit_heading + + +def nonstandard_bullets(content: str) -> list[str]: + warnings: list[str] = [] + for line in content.splitlines(): + stripped = line.strip() + if not stripped.startswith("- "): + continue + if parse_bullet_line(stripped) is not None: + continue + short = stripped[:80] + ("..." if len(stripped) > 80 else "") + warnings.append(f"Non-standard memory bullet: {short}") + return warnings diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py index d4a7d0974..dd4459e77 100644 --- a/surfsense_backend/app/services/memory/service.py +++ b/surfsense_backend/app/services/memory/service.py @@ -13,6 +13,7 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.db import SearchSpace, User +from app.services.memory.document import parse_memory_document, render_memory_document from app.services.memory.prompts import ( TEAM_MEMORY_EXTRACT_PROMPT, USER_MEMORY_EXTRACT_PROMPT, @@ -184,6 +185,8 @@ async def save_memory( warnings=warnings, ) + next_content = render_memory_document(parse_memory_document(next_content)) + try: _set_memory(target, normalized, next_content) session.add(target) diff --git a/surfsense_backend/app/services/memory/validation.py b/surfsense_backend/app/services/memory/validation.py index f9c5007d9..6565f39c7 100644 --- a/surfsense_backend/app/services/memory/validation.py +++ b/surfsense_backend/app/services/memory/validation.py @@ -2,20 +2,18 @@ from __future__ import annotations -import re from typing import Literal +from app.services.memory.document import ( + extract_headings, + has_explicit_heading, + nonstandard_bullets, + parse_memory_document, +) + MEMORY_SOFT_LIMIT = 18_000 MEMORY_HARD_LIMIT = 25_000 -_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) -_HEADING_LINE_RE = re.compile(r"^##\s+\S+", re.MULTILINE) -_HEADING_NORMALIZE_RE = re.compile(r"[^a-z0-9]+") -_LEGACY_BULLET_RE = re.compile( - r"^-\s+\(\d{4}-\d{2}-\d{2}\)\s+\[(fact|pref|instr)\]\s+.+$" -) -_NEW_BULLET_RE = re.compile(r"^-\s+\d{4}-\d{2}-\d{2}:\s+.+$") - _FORBIDDEN_TEAM_HEADINGS = { "preferences", "instructions", @@ -25,25 +23,16 @@ _FORBIDDEN_TEAM_HEADINGS = { def has_markdown_heading(content: str) -> bool: - return bool(_HEADING_LINE_RE.search(content)) + return has_explicit_heading(content) def strip_preamble_to_first_heading(content: str) -> str: """Drop model preamble before the first ``##`` heading, if one exists.""" - match = _HEADING_LINE_RE.search(content) - if not match: - return content.strip() - return content[match.start() :].strip() - - -def extract_headings(memory: str | None) -> set[str]: - if not memory: - return set() - return {_normalize_heading(h) for h in _SECTION_HEADING_RE.findall(memory)} - - -def _normalize_heading(heading: str) -> str: - return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower()).strip() + lines = content.splitlines() + for index, line in enumerate(lines): + if line.startswith("## ") and line[3:].strip(): + return "\n".join(lines[index:]).strip() + return content.strip() def validate_memory_size(content: str) -> dict[str, str] | None: @@ -69,7 +58,7 @@ def validate_heading_sanity(content: str) -> dict[str, str] | None: return None if len(stripped) <= 40: return None - if any(_LEGACY_BULLET_RE.match(line.strip()) for line in stripped.splitlines()): + if parse_memory_document(stripped).sections: return None return { "status": "error", @@ -115,16 +104,7 @@ def validate_memory_scope( def validate_bullet_format(content: str) -> list[str]: - warnings: list[str] = [] - for line in content.splitlines(): - stripped = line.strip() - if not stripped.startswith("- "): - continue - if _NEW_BULLET_RE.match(stripped) or _LEGACY_BULLET_RE.match(stripped): - continue - short = stripped[:80] + ("..." if len(stripped) > 80 else "") - warnings.append(f"Non-standard memory bullet: {short}") - return warnings + return nonstandard_bullets(content) def validate_diff(old_memory: str | None, new_memory: str) -> list[str]: @@ -138,7 +118,7 @@ def validate_diff(old_memory: str | None, new_memory: str) -> list[str]: if dropped: names = ", ".join(sorted(dropped)) warnings.append( - f"Sections removed: {names}. If unintentional, restore from the settings page." + f"Sections removed: {names}. If unintentional, restore them from the memory document." ) old_len = len(old_memory) diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py index f1a0f97f0..c941d7d65 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py @@ -64,6 +64,27 @@ def test_validate_bullet_format_warns_on_nonstandard_bullet() -> None: assert "Non-standard memory bullet" in warnings[0] +@pytest.mark.asyncio +async def test_save_memory_normalizes_legacy_marker_bullets(monkeypatch) -> None: + target = type("Target", (), {"memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="- (2026-04-10) [fact] Legacy fact is preserved\n", + session=session, + ) + + assert result.status == "saved" + assert target.memory_md == "## Memory\n- 2026-04-10: Legacy fact is preserved" + + @pytest.mark.asyncio async def test_save_memory_blocks_new_personal_heading_in_team_before_commit( monkeypatch, diff --git a/surfsense_backend/tests/unit/services/test_memory_service.py b/surfsense_backend/tests/unit/services/test_memory_service.py index e7fef2cac..0a45bf3aa 100644 --- a/surfsense_backend/tests/unit/services/test_memory_service.py +++ b/surfsense_backend/tests/unit/services/test_memory_service.py @@ -82,7 +82,7 @@ async def test_save_memory_accepts_legacy_marker_payload(monkeypatch) -> None: ) assert result.status == "saved" - assert "[fact]" in target.memory_md + assert target.memory_md == "## Memory\n- 2026-05-19: Legacy marker memory" @pytest.mark.asyncio From 6090980c5e314c1dae6d4e8a354088b75a79e94d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:51:44 +0200 Subject: [PATCH 38/63] obs(tokens): log prompt-cache read/write counts and hit ratio per LLM call --- .../app/services/token_tracking_service.py | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 58d06ba31..986e6bf05 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -325,6 +325,22 @@ class TokenTrackingCallback(CustomLogger): total_tokens = getattr(usage, "total_tokens", 0) or 0 call_kind = "chat" + # Prompt-cache accounting. Field shapes differ by provider: + # - OpenAI / Azure: ``usage.prompt_tokens_details.cached_tokens`` + # - Anthropic: ``usage.cache_read_input_tokens`` + ``usage.cache_creation_input_tokens`` + # LiteLLM normalizes both; we read both shapes and prefer whichever is set. + cached_tokens = 0 + cache_creation_tokens = 0 + if not is_image: + prompt_details = getattr(usage, "prompt_tokens_details", None) + if prompt_details is not None: + cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0 + if cached_tokens == 0: + cached_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0 + cache_creation_tokens = ( + getattr(usage, "cache_creation_input_tokens", 0) or 0 + ) + model = kwargs.get("model", "unknown") cost_usd = _extract_cost_usd( @@ -367,9 +383,13 @@ class TokenTrackingCallback(CustomLogger): except Exception: call_latency_s = None + cache_hit_ratio: float | None = None + if prompt_tokens > 0 and (cached_tokens > 0 or cache_creation_tokens > 0): + cache_hit_ratio = cached_tokens / prompt_tokens + logger.info( "[TokenTracking] Captured: model=%s kind=%s prompt=%d completion=%d total=%d " - "cost=$%.6f (%d micros) (accumulator now has %d calls)%s", + "cost=$%.6f (%d micros) (accumulator now has %d calls)%s%s", model, call_kind, prompt_tokens, @@ -379,6 +399,16 @@ class TokenTrackingCallback(CustomLogger): cost_micros, len(acc.calls), f" latency={call_latency_s:.3f}s" if call_latency_s is not None else "", + ( + f" cache_read={cached_tokens} cache_write={cache_creation_tokens}" + f" hit_ratio={cache_hit_ratio:.1%}" + if cache_hit_ratio is not None + else ( + f" cache_read={cached_tokens} cache_write={cache_creation_tokens}" + if (cached_tokens or cache_creation_tokens) + else "" + ) + ), ) From 32f6766cb6c5787fd56607fb020117e21681a1e3 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 09:55:39 +0200 Subject: [PATCH 39/63] fix(tokens): use canonical prompt_tokens_details path for cache fields LiteLLM normalizes every provider's cache fields onto usage.prompt_tokens_details (cached_tokens + cache_creation_tokens). The earlier fallback to usage.cache_read_input_tokens / usage.cache_creation_input_tokens was wrong: Anthropic-shaped fields only live there via a trailing setattr loop, and the canonical field name on the wrapper is cache_creation_tokens (not _input_tokens). --- .../app/services/token_tracking_service.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 986e6bf05..3f07e6f9e 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -325,21 +325,23 @@ class TokenTrackingCallback(CustomLogger): total_tokens = getattr(usage, "total_tokens", 0) or 0 call_kind = "chat" - # Prompt-cache accounting. Field shapes differ by provider: - # - OpenAI / Azure: ``usage.prompt_tokens_details.cached_tokens`` - # - Anthropic: ``usage.cache_read_input_tokens`` + ``usage.cache_creation_input_tokens`` - # LiteLLM normalizes both; we read both shapes and prefer whichever is set. + # Prompt-cache accounting. LiteLLM normalizes every provider's cache + # fields onto ``usage.prompt_tokens_details``: + # - ``cached_tokens`` — cache reads (OpenAI/Azure native, DeepSeek + # mapped from ``prompt_cache_hit_tokens``, + # Anthropic mapped from ``cache_read_input_tokens``). + # - ``cache_creation_tokens`` — cache writes (Anthropic only; OpenAI/Azure + # do not expose a write count). + # See ``litellm.types.utils.Usage.__init__`` for the mapping. cached_tokens = 0 cache_creation_tokens = 0 if not is_image: prompt_details = getattr(usage, "prompt_tokens_details", None) if prompt_details is not None: cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0 - if cached_tokens == 0: - cached_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0 - cache_creation_tokens = ( - getattr(usage, "cache_creation_input_tokens", 0) or 0 - ) + cache_creation_tokens = ( + getattr(prompt_details, "cache_creation_tokens", 0) or 0 + ) model = kwargs.get("model", "unknown") From 4fa85a9a947461581ced1ff0c0d65ba6e743ef19 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 10:02:38 +0200 Subject: [PATCH 40/63] perf(kb-search): offload sync embed_texts to thread embed_texts holds a threading.Lock and runs a sync embedding call inside search_knowledge_base, an async coroutine on the KB priority middleware critical path. Blocking the event loop here stalls every other coroutine on the worker (SSE keepalives, concurrent chat requests, background tasks). Wrap in asyncio.to_thread so the embed runs on the default executor pool while the loop keeps serving. --- .../app/agents/new_chat/middleware/knowledge_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py index dc06f8763..98bbf3bd7 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py +++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py @@ -457,7 +457,7 @@ async def search_knowledge_base( if not query: return [] - [embedding] = embed_texts([query]) + [embedding] = await asyncio.to_thread(embed_texts, [query]) doc_types = _resolve_search_types(available_connectors, available_document_types) retriever_top_k = min(top_k * 3, 30) From 52d425f17054a9fd5ee8eccf74459f238f5664da Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 10:03:14 +0200 Subject: [PATCH 41/63] perf(kb-persistence): offload sync embed_texts to thread _create_document and _update_document run on the chat critical path when the filesystem subagent writes via the user's chat turn. Both called embed_texts synchronously inside an async coroutine, blocking the event loop for the duration of the embed. --- .../app/agents/new_chat/middleware/kb_persistence.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py index d577441dd..cc30f4897 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py +++ b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py @@ -32,6 +32,7 @@ exact same routine when ``aafter_agent`` was skipped (e.g. client disconnect). from __future__ import annotations +import asyncio import logging from datetime import UTC, datetime from typing import Any @@ -249,11 +250,11 @@ async def _create_document( session.add(doc) await session.flush() - summary_embedding = embed_texts([content])[0] + summary_embedding = (await asyncio.to_thread(embed_texts, [content]))[0] doc.embedding = summary_embedding chunks = chunk_text(content) if chunks: - chunk_embeddings = embed_texts(chunks) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunks) session.add_all( [ Chunk(document_id=doc.id, content=text, embedding=embedding) @@ -295,13 +296,13 @@ async def _update_document( search_space_id, ) - summary_embedding = embed_texts([content])[0] + summary_embedding = (await asyncio.to_thread(embed_texts, [content]))[0] document.embedding = summary_embedding await session.execute(delete(Chunk).where(Chunk.document_id == document.id)) chunks = chunk_text(content) if chunks: - chunk_embeddings = embed_texts(chunks) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunks) session.add_all( [ Chunk(document_id=document.id, content=text, embedding=embedding) From a3d6fa6196f1871fc2e580f82c335305778362ca Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 10:03:42 +0200 Subject: [PATCH 42/63] perf(document-converters): offload sync embed_text/embed_texts to thread generate_document_summary and create_document_chunks are async helpers called from the chat path and from many connector indexers. Both wrapped embed_text/embed_texts directly inside the coroutine, blocking the event loop for the full duration of the embedding call. --- surfsense_backend/app/utils/document_converters.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index ed52c1b7b..9bc8103c5 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -1,3 +1,4 @@ +import asyncio import hashlib import logging import threading @@ -221,7 +222,9 @@ async def generate_document_summary( else: enhanced_summary_content = summary_content - summary_embedding = embed_text(enhanced_summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, enhanced_summary_content + ) return enhanced_summary_content, summary_embedding @@ -237,7 +240,7 @@ async def create_document_chunks(content: str) -> list[Chunk]: List of Chunk objects with embeddings """ chunk_texts = [c.text for c in config.chunker_instance.chunk(content)] - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) return [ Chunk(content=text, embedding=emb) for text, emb in zip(chunk_texts, chunk_embeddings, strict=False) From a8de98895a93fe376c21ab3478f5f1871526a7c1 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 10:04:26 +0200 Subject: [PATCH 43/63] perf(revert-service): offload sync embed_texts to thread _restore_in_place_document and _reinsert_document_from_revision are async helpers invoked by the synchronous-feeling POST /api/threads/.../revert route; both ran embed_texts inline, blocking the event loop while the HTTP client waited. --- surfsense_backend/app/services/revert_service.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/services/revert_service.py b/surfsense_backend/app/services/revert_service.py index d02a31345..60f6503aa 100644 --- a/surfsense_backend/app/services/revert_service.py +++ b/surfsense_backend/app/services/revert_service.py @@ -29,6 +29,7 @@ same trap waiting to happen). from __future__ import annotations +import asyncio import logging from dataclasses import dataclass from datetime import UTC, datetime @@ -234,7 +235,7 @@ async def _restore_in_place_document( if isinstance(c, dict) and isinstance(c.get("content"), str) ] if chunk_texts: - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) session.add_all( [ Chunk(document_id=doc.id, content=text, embedding=embedding) @@ -244,7 +245,9 @@ async def _restore_in_place_document( ] ) if isinstance(revision.content_before, str): - doc.embedding = embed_texts([revision.content_before])[0] + doc.embedding = ( + await asyncio.to_thread(embed_texts, [revision.content_before]) + )[0] doc.updated_at = datetime.now(UTC) return RevertOutcome(status="ok", message="Document restored from snapshot.") @@ -320,7 +323,7 @@ async def _reinsert_document_from_revision( session.add(new_doc) await session.flush() - new_doc.embedding = embed_texts([content])[0] + new_doc.embedding = (await asyncio.to_thread(embed_texts, [content]))[0] chunk_texts = [] chunks_before = revision.chunks_before if isinstance(chunks_before, list): @@ -330,7 +333,7 @@ async def _reinsert_document_from_revision( if isinstance(c, dict) and isinstance(c.get("content"), str) ] if chunk_texts: - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) session.add_all( [ Chunk(document_id=new_doc.id, content=text, embedding=embedding) From 1791241c0c8c451b7c82ec6e688b4264498da3fb Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 10:09:38 +0200 Subject: [PATCH 44/63] perf(indexers): offload sync embed_text to thread across background workers Connector kb_sync_services (gmail, onedrive, google_calendar, jira), streaming indexers (discord, luma, teams) and the file-processor save path all called embed_text inside async coroutines, blocking the background worker's event loop for the duration of the embed. Wrap each call site in asyncio.to_thread so concurrent indexing tasks stop serialising on the embed. --- surfsense_backend/app/services/gmail/kb_sync_service.py | 5 ++++- .../app/services/google_calendar/kb_sync_service.py | 8 ++++++-- surfsense_backend/app/services/jira/kb_sync_service.py | 8 ++++++-- .../app/services/onedrive/kb_sync_service.py | 5 ++++- .../app/tasks/connector_indexers/discord_indexer.py | 4 +++- .../app/tasks/connector_indexers/luma_indexer.py | 5 ++++- .../app/tasks/connector_indexers/teams_indexer.py | 5 ++++- surfsense_backend/app/tasks/document_processors/_save.py | 5 +++-- 8 files changed, 34 insertions(+), 11 deletions(-) diff --git a/surfsense_backend/app/services/gmail/kb_sync_service.py b/surfsense_backend/app/services/gmail/kb_sync_service.py index 885ee4b94..6ff5f3c2b 100644 --- a/surfsense_backend/app/services/gmail/kb_sync_service.py +++ b/surfsense_backend/app/services/gmail/kb_sync_service.py @@ -1,3 +1,4 @@ +import asyncio import logging from datetime import datetime @@ -100,7 +101,9 @@ class GmailKBSyncService: else: logger.warning("No LLM configured -- using fallback summary") summary_content = f"Gmail Message: {subject}\n\n{indexable_content}" - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/surfsense_backend/app/services/google_calendar/kb_sync_service.py b/surfsense_backend/app/services/google_calendar/kb_sync_service.py index 602a55738..1f017ec4d 100644 --- a/surfsense_backend/app/services/google_calendar/kb_sync_service.py +++ b/surfsense_backend/app/services/google_calendar/kb_sync_service.py @@ -116,7 +116,9 @@ class GoogleCalendarKBSyncService: summary_content = ( f"Google Calendar Event: {event_summary}\n\n{indexable_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -295,7 +297,9 @@ class GoogleCalendarKBSyncService: summary_content = ( f"Google Calendar Event: {event_summary}\n\n{indexable_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/surfsense_backend/app/services/jira/kb_sync_service.py b/surfsense_backend/app/services/jira/kb_sync_service.py index 8e88bee81..5f6668377 100644 --- a/surfsense_backend/app/services/jira/kb_sync_service.py +++ b/surfsense_backend/app/services/jira/kb_sync_service.py @@ -98,7 +98,9 @@ class JiraKBSyncService: summary_content = ( f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(issue_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -212,7 +214,9 @@ class JiraKBSyncService: summary_content = ( f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(issue_content) diff --git a/surfsense_backend/app/services/onedrive/kb_sync_service.py b/surfsense_backend/app/services/onedrive/kb_sync_service.py index e9b2e38ea..e1da3b4a1 100644 --- a/surfsense_backend/app/services/onedrive/kb_sync_service.py +++ b/surfsense_backend/app/services/onedrive/kb_sync_service.py @@ -1,3 +1,4 @@ +import asyncio import logging from datetime import datetime @@ -95,7 +96,9 @@ class OneDriveKBSyncService: else: logger.warning("No LLM configured — using fallback summary") summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}" - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 5e784cb4f..180f21412 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -670,7 +670,9 @@ async def index_discord_messages( # Heavy processing (embeddings, chunks) chunks = await create_document_chunks(item["combined_document_string"]) - doc_embedding = embed_text(item["combined_document_string"]) + doc_embedding = await asyncio.to_thread( + embed_text, item["combined_document_string"] + ) # Update document to READY with actual content document.title = f"{item['guild_name']}#{item['channel_name']}" diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index a698bfd46..555d60273 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -6,6 +6,7 @@ Implements 2-phase document status updates for real-time UI feedback: - Phase 2: Process each event: pending → processing → ready/failed """ +import asyncio import time from collections.abc import Awaitable, Callable from datetime import datetime, timedelta @@ -465,7 +466,9 @@ async def index_luma_events( summary_content = ( f"Luma Event: {item['event_name']}\n\n{item['event_markdown']}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(item["event_markdown"]) diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 12cdf384e..25994895a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -9,6 +9,7 @@ Uses 2-phase document status updates for real-time UI feedback: - Phase 2: Process each document: pending → processing → ready/failed """ +import asyncio import time from collections.abc import Awaitable, Callable from datetime import UTC, datetime @@ -581,7 +582,9 @@ async def index_teams_messages( # Heavy processing (embeddings, chunks) chunks = await create_document_chunks(item["combined_document_string"]) - doc_embedding = embed_text(item["combined_document_string"]) + doc_embedding = await asyncio.to_thread( + embed_text, item["combined_document_string"] + ) # Update document to READY with actual content document.title = f"{item['team_name']} - {item['channel_name']}" diff --git a/surfsense_backend/app/tasks/document_processors/_save.py b/surfsense_backend/app/tasks/document_processors/_save.py index ae45f7a69..d633dd4f6 100644 --- a/surfsense_backend/app/tasks/document_processors/_save.py +++ b/surfsense_backend/app/tasks/document_processors/_save.py @@ -2,6 +2,7 @@ Unified document save/update logic for file processors. """ +import asyncio import logging from sqlalchemy.exc import SQLAlchemyError @@ -43,7 +44,7 @@ async def _generate_summary( """ if not enable_summary: summary = f"File: {file_name}\n\n{markdown_content[:4000]}" - return summary, embed_text(summary) + return summary, await asyncio.to_thread(embed_text, summary) if etl_service == "DOCLING": from app.services.docling_service import create_docling_service @@ -65,7 +66,7 @@ async def _generate_summary( parts.append(f"**{formatted_key}:** {value}") enhanced = "\n".join(parts) + "\n\n# DOCUMENT SUMMARY\n\n" + summary_text - return enhanced, embed_text(enhanced) + return enhanced, await asyncio.to_thread(embed_text, enhanced) # Standard summary (Unstructured / LlamaCloud / others) meta = { From 132e7b3c44488cba85831f0f6c49e284036cade8 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 14:03:28 +0530 Subject: [PATCH 45/63] refactor: remove memory extraction functions and related components from the new chat agent --- .../app/agents/new_chat/memory_extraction.py | 78 ---------------- .../app/services/memory/__init__.py | 2 - .../app/services/memory/prompts.py | 90 ------------------- .../app/services/memory/schemas.py | 20 +---- .../app/services/memory/service.py | 78 +--------------- .../app/tasks/chat/stream_new_chat.py | 35 -------- .../streaming/graph_stream/event_stream.py | 1 - .../chat/streaming/graph_stream/result.py | 1 - .../tasks/chat/streaming/handlers/tool_end.py | 3 - .../app/tasks/chat/streaming/relay/state.py | 1 - .../unit/services/test_memory_service.py | 67 -------------- .../chat/streaming/test_stream_output.py | 1 - 12 files changed, 2 insertions(+), 375 deletions(-) delete mode 100644 surfsense_backend/app/agents/new_chat/memory_extraction.py diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py deleted file mode 100644 index d44b58f7b..000000000 --- a/surfsense_backend/app/agents/new_chat/memory_extraction.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Background memory extraction for the SurfSense agent.""" - -from __future__ import annotations - -import logging -from typing import Any -from uuid import UUID - -from app.db import User, shielded_async_session -from app.services.memory import MemoryScope, extract_and_save - -logger = logging.getLogger(__name__) - - -async def extract_and_save_memory( - *, - user_message: str, - user_id: str | None, - llm: Any, -) -> None: - """Fire-and-forget personal memory extraction. - - The service uses structured output, so free-form ``NO_UPDATE`` text can no - longer be accidentally persisted as memory. - """ - if not user_id: - return - - try: - uid = UUID(user_id) if isinstance(user_id, str) else user_id - async with shielded_async_session() as session: - user = await session.get(User, uid) - actor_display_name = user.display_name if user else None - result = await extract_and_save( - scope=MemoryScope.USER, - target_id=uid, - user_message=user_message, - actor_display_name=actor_display_name, - session=session, - llm=llm, - ) - logger.info( - "Background memory extraction for user %s: %s", - uid, - result.status, - ) - except Exception: - logger.exception("Background user memory extraction failed") - - -async def extract_and_save_team_memory( - *, - user_message: str, - search_space_id: int | None, - llm: Any, - author_display_name: str | None = None, -) -> None: - """Fire-and-forget team-level memory extraction.""" - if not search_space_id: - return - - try: - async with shielded_async_session() as session: - result = await extract_and_save( - scope=MemoryScope.TEAM, - target_id=search_space_id, - user_message=user_message, - actor_display_name=author_display_name, - session=session, - llm=llm, - ) - logger.info( - "Background team memory extraction for space %s: %s", - search_space_id, - result.status, - ) - except Exception: - logger.exception("Background team memory extraction failed") diff --git a/surfsense_backend/app/services/memory/__init__.py b/surfsense_backend/app/services/memory/__init__.py index 27d0592fd..eef6559c2 100644 --- a/surfsense_backend/app/services/memory/__init__.py +++ b/surfsense_backend/app/services/memory/__init__.py @@ -4,7 +4,6 @@ from .schemas import MemoryLimits, MemoryRead from .service import ( MemoryScope, SaveResult, - extract_and_save, memory_limits, read_memory, reset_memory, @@ -24,7 +23,6 @@ __all__ = [ "MemoryRead", "MemoryScope", "SaveResult", - "extract_and_save", "memory_limits", "read_memory", "reset_memory", diff --git a/surfsense_backend/app/services/memory/prompts.py b/surfsense_backend/app/services/memory/prompts.py index fbf27fd08..25c09e9c5 100644 --- a/surfsense_backend/app/services/memory/prompts.py +++ b/surfsense_backend/app/services/memory/prompts.py @@ -18,93 +18,3 @@ RULES: {content} """ - -USER_MEMORY_EXTRACT_PROMPT = """\ -You are a memory extraction assistant. Analyze the user's message and decide \ -if it contains any long-term information worth persisting to personal memory. - -Worth remembering: preferences, background/identity, goals, projects, \ -instructions, tools/languages they use, decisions, expertise, workplace — \ -durable facts that will matter in future conversations. - -NOT worth remembering: greetings, one-off factual questions, session \ -logistics, ephemeral requests, follow-up clarifications with no new personal \ -info, things that only matter for the current task. - -If there is nothing durable to remember, choose `action = no_update`. - -If the message contains memorizable information, choose `action = save` and \ -return the FULL updated memory document with the new information merged into \ -existing content. - -FORMAT RULES FOR `updated_memory`: -- Markdown only. -- Every entry should be under a `##` heading. -- Recommended headings: `## Facts`, `## Preferences`, `## Instructions`. -- New bullets should use: `- YYYY-MM-DD: memory text`. -- If current memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers, - preserve the information but write the updated document in the new - heading-based format. -- Use the user's first name from `` when helpful, not "the user". -- Do not duplicate existing information. - -{user_name} - - -{current_memory} - - - -{user_message} -""" - -TEAM_MEMORY_EXTRACT_PROMPT = """\ -You are a team-memory extraction assistant. Analyze the latest message and \ -decide if it contains durable TEAM-level information worth persisting. - -Decision policy: -- Prioritize recall for durable team context, while avoiding personal-only facts. -- Do NOT require explicit consensus language. A direct team-level statement can - be stored if it is stable and broadly useful for future team chats. -- If evidence is weak or clearly tentative, choose `action = no_update`. - -Worth remembering (team-level only): -- Decisions and defaults that guide future team work -- Team conventions/standards (naming, review policy, coding norms) -- Stable org/project facts (locations, ownership, constraints) -- Long-lived architecture/process facts -- Ongoing priorities that are likely relevant beyond this turn - -NOT worth remembering: -- Personal preferences or biography of one person -- Questions, brainstorming, tentative ideas, or speculation -- One-off requests, status updates, TODOs, logistics for this session -- Information scoped only to a single ephemeral task - -If the message contains memorizable team information, choose `action = save` \ -and return the FULL updated team memory document with new facts merged into \ -existing content. - -FORMAT RULES FOR `updated_memory`: -- Markdown only. -- Every entry should be under a `##` heading. -- Recommended headings: `## Product Decisions`, `## Engineering Conventions`, - `## Project Facts`, `## Open Questions`. -- New bullets should use: `- YYYY-MM-DD: memory text`. -- If current memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the - information but write the updated document in the new heading-based format. -- Do not create personal headings such as `## Preferences`, `## Instructions`, - or `## Personal Notes`. -- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored. - - -{current_memory} - - - -{author} - - - -{user_message} -""" diff --git a/surfsense_backend/app/services/memory/schemas.py b/surfsense_backend/app/services/memory/schemas.py index 623e4aa93..78c69d800 100644 --- a/surfsense_backend/app/services/memory/schemas.py +++ b/surfsense_backend/app/services/memory/schemas.py @@ -2,9 +2,7 @@ from __future__ import annotations -from typing import Literal - -from pydantic import BaseModel, Field +from pydantic import BaseModel class MemoryLimits(BaseModel): @@ -19,19 +17,3 @@ class MemoryRead(BaseModel): memory_md: str limits: MemoryLimits - - -class MemoryExtractionDecision(BaseModel): - """Structured extraction result; avoids string sentinel parsing.""" - - action: Literal["no_update", "save"] = Field( - description="Choose no_update when nothing durable should be saved; choose save otherwise." - ) - reason: str | None = Field( - default=None, - description="Short reason for no_update, or brief summary of the memory update.", - ) - updated_memory: str | None = Field( - default=None, - description="The full updated markdown memory document when action is save.", - ) diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py index dd4459e77..c33b91679 100644 --- a/surfsense_backend/app/services/memory/service.py +++ b/surfsense_backend/app/services/memory/service.py @@ -8,18 +8,13 @@ from enum import StrEnum from typing import Any, Literal from uuid import UUID -from langchain_core.messages import HumanMessage from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.db import SearchSpace, User from app.services.memory.document import parse_memory_document, render_memory_document -from app.services.memory.prompts import ( - TEAM_MEMORY_EXTRACT_PROMPT, - USER_MEMORY_EXTRACT_PROMPT, -) from app.services.memory.rewrite import forced_rewrite -from app.services.memory.schemas import MemoryExtractionDecision, MemoryLimits +from app.services.memory.schemas import MemoryLimits from app.services.memory.validation import ( MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT, @@ -234,74 +229,3 @@ async def reset_memory( session=session, llm=None, ) - - -async def extract_and_save( - *, - scope: MemoryScope | str, - target_id: str | int | UUID, - user_message: str, - actor_display_name: str | None, - session: AsyncSession, - llm: Any, -) -> SaveResult: - normalized = _normalize_scope(scope) - current_memory = await read_memory( - scope=normalized, - target_id=target_id, - session=session, - ) - - if normalized is MemoryScope.USER: - first_name = ( - actor_display_name.strip().split()[0] - if actor_display_name and actor_display_name.strip() - else "The user" - ) - prompt = USER_MEMORY_EXTRACT_PROMPT.format( - current_memory=current_memory or "(empty)", - user_message=user_message, - user_name=first_name, - ) - else: - prompt = TEAM_MEMORY_EXTRACT_PROMPT.format( - current_memory=current_memory or "(empty)", - author=actor_display_name or "Unknown team member", - user_message=user_message, - ) - - try: - structured = llm.with_structured_output(MemoryExtractionDecision) - decision = await structured.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-extraction"]}, - ) - except Exception: - logger.exception("Structured memory extraction failed") - return SaveResult( - status="error", - message="Structured memory extraction failed.", - memory_md=current_memory, - ) - - if decision.action == "no_update": - return SaveResult( - status="no_op", - message=decision.reason or "No durable memory to persist.", - memory_md=current_memory, - ) - - if not decision.updated_memory: - return SaveResult( - status="error", - message="Structured memory extraction chose save without updated_memory.", - memory_md=current_memory, - ) - - return await save_memory( - scope=normalized, - target_id=target_id, - content=decision.updated_memory, - session=session, - llm=llm, - ) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 9a69b6164..564fd81de 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -39,10 +39,6 @@ from app.agents.new_chat.llm_config import ( load_agent_config, load_global_llm_config_by_id, ) -from app.agents.new_chat.memory_extraction import ( - extract_and_save_memory, - extract_and_save_team_memory, -) from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text from app.agents.new_chat.middleware.busy_mutex import ( end_turn, @@ -283,7 +279,6 @@ class StreamResult: accumulated_text: str = "" is_interrupted: bool = False sandbox_files: list[str] = field(default_factory=list) - agent_called_update_memory: bool = False request_id: str | None = None turn_id: str = "" filesystem_mode: str = "cloud" @@ -2208,36 +2203,6 @@ async def stream_new_chat( }, ) - # Fire background memory extraction if the agent didn't handle it. - # Shared threads write to team memory; private threads write to user memory. - if not stream_result.agent_called_update_memory: - memory_seed = user_query.strip() or ( - f"[{len(user_image_data_urls or [])} image(s)]" - if user_image_data_urls - else "(message)" - ) - if visibility == ChatVisibility.SEARCH_SPACE: - task = asyncio.create_task( - extract_and_save_team_memory( - user_message=memory_seed, - search_space_id=search_space_id, - llm=llm, - author_display_name=current_user_display_name, - ) - ) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - elif user_id: - task = asyncio.create_task( - extract_and_save_memory( - user_message=memory_seed, - user_id=user_id, - llm=llm, - ) - ) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - # Finish the step and message yield streaming_service.format_data("turn-status", {"status": "idle"}) yield streaming_service.format_finish_step() diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py index 9a309f9d7..50e7a1360 100644 --- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py @@ -48,4 +48,3 @@ async def stream_output( yield frame result.accumulated_text = state.accumulated_text - result.agent_called_update_memory = state.called_update_memory diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py index 391f14f24..1d3f1e88a 100644 --- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py @@ -11,7 +11,6 @@ class StreamingResult: accumulated_text: str = "" is_interrupted: bool = False sandbox_files: list[str] = field(default_factory=list) - agent_called_update_memory: bool = False request_id: str | None = None turn_id: str = "" filesystem_mode: str = "cloud" diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py index 57ab617c5..ad4a17d08 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py @@ -36,9 +36,6 @@ def iter_tool_end_frames( raw_output = event.get("data", {}).get("output", "") staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None - if tool_name == "update_memory": - state.called_update_memory = True - if hasattr(raw_output, "content"): content = raw_output.content if isinstance(content, str): diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py index 27898403d..f99fc8edb 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py @@ -32,7 +32,6 @@ class AgentEventRelayState: last_active_step_items: list[str] = field(default_factory=list) just_finished_tool: bool = False active_tool_depth: int = 0 - called_update_memory: bool = False current_reasoning_id: str | None = None pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict) diff --git a/surfsense_backend/tests/unit/services/test_memory_service.py b/surfsense_backend/tests/unit/services/test_memory_service.py index 0a45bf3aa..94918d25b 100644 --- a/surfsense_backend/tests/unit/services/test_memory_service.py +++ b/surfsense_backend/tests/unit/services/test_memory_service.py @@ -6,11 +6,9 @@ import pytest from app.services.memory import ( MemoryScope, - extract_and_save, reset_memory, save_memory, ) -from app.services.memory.schemas import MemoryExtractionDecision pytestmark = pytest.mark.unit @@ -31,17 +29,6 @@ class _FakeSession: self.rollback_calls += 1 -class _StructuredLLM: - def __init__(self, decision: MemoryExtractionDecision) -> None: - self.decision = decision - - def with_structured_output(self, _schema): - return self - - async def ainvoke(self, *_args, **_kwargs): - return self.decision - - @pytest.mark.asyncio async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None: target = SimpleNamespace(memory_md="") @@ -150,57 +137,3 @@ async def test_reset_memory_clears_memory(monkeypatch) -> None: assert result.status == "saved" assert target.memory_md == "" - - -@pytest.mark.asyncio -async def test_extract_and_save_no_update_does_not_commit(monkeypatch) -> None: - target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n") - session = _FakeSession() - - async def fake_load_target(**_kwargs): - return target - - monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) - - result = await extract_and_save( - scope=MemoryScope.USER, - target_id="00000000-0000-0000-0000-000000000000", - user_message="hello", - actor_display_name="Anish", - session=session, - llm=_StructuredLLM( - MemoryExtractionDecision(action="no_update", reason="Greeting only") - ), - ) - - assert result.status == "no_op" - assert session.commit_calls == 0 - - -@pytest.mark.asyncio -async def test_extract_and_save_persists_structured_update(monkeypatch) -> None: - target = SimpleNamespace(memory_md="") - session = _FakeSession() - - async def fake_load_target(**_kwargs): - return target - - monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) - - result = await extract_and_save( - scope=MemoryScope.USER, - target_id="00000000-0000-0000-0000-000000000000", - user_message="I work on SurfSense", - actor_display_name="Anish", - session=session, - llm=_StructuredLLM( - MemoryExtractionDecision( - action="save", - updated_memory="## Facts\n- 2026-05-19: Anish works on SurfSense\n", - ) - ), - ) - - assert result.status == "saved" - assert "SurfSense" in target.memory_md - assert session.commit_calls == 1 diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py index c0123b76d..c53dad5fb 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py @@ -89,7 +89,6 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None: "text_end:text-1", ] assert result.accumulated_text == "Hello world" - assert result.agent_called_update_memory is False async def test_stream_output_passes_runtime_context_to_agent() -> None: From c3db25302b50ff84bd0971697b0841a0071b118d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 11:03:08 +0200 Subject: [PATCH 46/63] perf(chat): kill auto-pin preflight + speculative build, rely on reactive 429 recovery The preflight pattern probed the LLM with a 1-token ping before each cold turn (when requested_llm_config_id==0, llm_config_id<0, and the 45s healthy TTL had expired) to detect 429s before fanning out into planner/classifier/title-gen. To absorb its ~1-5s RTT cost we built the agent speculatively in parallel; on 429 we discarded the build and repinned. Three problems with that design: 1. False security. Provider rate limits are token-bucket. A 1-token ping consumes ~5 tokens; the real request consumes 10-50K. The probe can return 200 while the real call still 429s. 2. Pure overhead in the common case. On warm-agent-cache turns the probe dominates wall time: ~2.5s of TTFT pure tax for ~99% of users who never see a 429. 3. The in-stream recovery loop (catch of _is_provider_rate_limited gated by not _first_event_logged) already does the right thing reactively: mark_runtime_cooldown -> resolve_or_get_pinned_llm_config_id with exclude_config_ids={previous} -> rebuild agent -> retry the stream. Preflight was never the only safety net; it was a redundant probe in front of one. Changes: - Delete _preflight_llm, _settle_speculative_agent_build, and the _PREFLIGHT_TIMEOUT_SEC / _PREFLIGHT_MAX_TOKENS constants. - Drop the parallel agent_build_task / preflight_task plumbing in both stream_new_chat and stream_resume_chat; build the agent inline with await _build_main_agent_for_thread(...). - Drop the unused is_recently_healthy / mark_healthy imports here (still exported from auto_model_pin_service since OpenRouter catalogue refresh and a few tests reference clear_healthy). - Remove the obsolete preflight + settle-speculative tests from test_stream_new_chat_contract.py. Net: -447 LOC. ~2.5s removed from TTFT on every cold preflight-eligible turn. 429 recovery path is unchanged - same repin/rebuild/retry, just not paid in advance on the healthy path. --- .../app/tasks/chat/stream_new_chat.py | 405 ++---------------- .../unit/test_stream_new_chat_contract.py | 122 ------ 2 files changed, 40 insertions(+), 487 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 3d639affb..da84e7350 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -64,8 +64,6 @@ from app.db import ( ) from app.prompts import TITLE_GENERATION_PROMPT from app.services.auto_model_pin_service import ( - is_recently_healthy, - mark_healthy, mark_runtime_cooldown, resolve_or_get_pinned_llm_config_id, ) @@ -502,54 +500,6 @@ def _is_provider_rate_limited(exc: BaseException) -> bool: ) -_PREFLIGHT_TIMEOUT_SEC: float = 2.5 -_PREFLIGHT_MAX_TOKENS: int = 1 - - -async def _preflight_llm(llm: Any) -> None: - """Issue a minimal completion to confirm the pinned model isn't 429'ing. - - Used before agent build / planner / classifier / title-gen so a known-bad - free OpenRouter deployment is detected and repinned before it cascades - into multiple wasted internal calls. The probe is intentionally cheap: - one token, low timeout, tagged ``surfsense:internal`` so token tracking - and SSE pipelines treat it as overhead rather than user output. - - Raises the original exception when the provider responds with a - rate-limit-shaped error so the caller can drive the cooldown/repin - branch via :func:`_is_provider_rate_limited`. Other transient failures - are swallowed — the caller continues to the normal stream path and the - in-stream recovery loop remains the safety net. - """ - from litellm import acompletion - - model = getattr(llm, "model", None) - if not model or model == "auto": - # Auto-mode router doesn't have a single deployment to ping; the - # router itself handles per-deployment rate-limit accounting. - return - - try: - await acompletion( - model=model, - messages=[{"role": "user", "content": "ping"}], - api_key=getattr(llm, "api_key", None), - api_base=getattr(llm, "api_base", None), - max_tokens=_PREFLIGHT_MAX_TOKENS, - timeout=_PREFLIGHT_TIMEOUT_SEC, - stream=False, - metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]}, - ) - except Exception as exc: - if _is_provider_rate_limited(exc): - raise - logging.getLogger(__name__).debug( - "auto_pin_preflight non_rate_limit_error model=%s err=%s", - model, - exc, - ) - - async def _build_main_agent_for_thread( agent_factory: Any, *, @@ -567,9 +517,9 @@ async def _build_main_agent_for_thread( disabled_tools: list[str] | None = None, mentioned_document_ids: list[int] | None = None, ) -> Any: - """Single (re)build path so the agent factory cannot drift across - initial build, preflight repin, and mid-stream 429 recovery for one - ``thread_id``: a graph swap mid-turn would corrupt checkpointer state.""" + """Single (re)build path so the agent factory cannot drift across the + initial build and mid-stream 429 recovery for one ``thread_id``: a + graph swap mid-turn would corrupt checkpointer state.""" return await agent_factory( llm=llm, search_space_id=search_space_id, @@ -587,29 +537,6 @@ async def _build_main_agent_for_thread( ) -async def _settle_speculative_agent_build(task: asyncio.Task[Any]) -> None: - """Wait for a discarded speculative agent build to release shared state. - - Used by the parallel preflight + agent-build path. The speculative build - closes over the request-scoped ``AsyncSession`` (for the brief connector - discovery / tool-factory window before its CPU work moves into a worker - thread). If preflight reports a 429 we want to fall back to the original - repin → reload → rebuild path, but we MUST NOT touch ``session`` again - until any in-flight session work owned by the speculative build has - fully settled — :class:`sqlalchemy.ext.asyncio.AsyncSession` is not - concurrency-safe and the same hazard cost us a hard ``InvalidRequestError`` - earlier in this PR (see ``connector_service`` parallel-gather revert). - - We simply ``await`` the task and swallow any exception: in this path the - build's outcome is irrelevant — success populates the agent cache (a free - side effect), failure is discarded. The wasted CPU is acceptable since - 429 fallbacks are rare and the original sequential code also paid the - full build cost on the same path. - """ - with contextlib.suppress(BaseException): - await task - - def _classify_stream_exception( exc: Exception, *, @@ -1237,39 +1164,6 @@ async def stream_new_chat( yield streaming_service.format_done() return - # Auto-mode preflight ping. Runs ONLY for thread-pinned auto cfgs - # (negative ids selected via ``resolve_or_get_pinned_llm_config_id``) - # whose health hasn't already been confirmed within the TTL window. - # Detecting a 429 here lets us repin BEFORE the planner/classifier/ - # title-generation LLM calls fan out and each independently hit the - # same upstream rate limit. - # - # PERF: preflight is a network round-trip to the LLM provider (~1-5s) - # and is independent of the agent build (CPU-bound, ~5-7s). They used - # to run sequentially → ``preflight + build`` on cold cache = 11.5s. - # We now kick off preflight as a background task FIRST, then run the - # synchronous setup work and the agent build in parallel. In the - # success path (the common case) total wall time drops to roughly - # ``max(preflight, build)`` — the preflight finishes during the - # agent compile and we just consume its result. In the rare 429 - # path the speculative build is awaited to completion (so its - # session usage is fully released) via - # :func:`_settle_speculative_agent_build`, then discarded, and - # we fall back to the original repin-and-rebuild flow. - preflight_needed = ( - requested_llm_config_id == 0 - and llm_config_id < 0 - and not is_recently_healthy(llm_config_id) - ) - preflight_task: asyncio.Task[None] | None = None - _t_preflight = 0.0 - if preflight_needed: - _t_preflight = time.perf_counter() - preflight_task = asyncio.create_task( - _preflight_llm(llm), - name=f"auto_pin_preflight:{llm_config_id}", - ) - # Create connector service _t0 = time.perf_counter() connector_service = ConnectorService(session, search_space_id=search_space_id) @@ -1303,136 +1197,26 @@ async def stream_new_chat( if use_multi_agent else create_surfsense_deep_agent ) - # Speculative agent build — runs in parallel with the preflight - # task (if any). Built with the *current* ``llm`` / ``agent_config``; - # if preflight reports 429 we will discard this future and rebuild - # against the freshly pinned config below. - agent_build_task = asyncio.create_task( - _build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - mentioned_document_ids=mentioned_document_ids, - ), - name="agent_build:stream_new_chat", + # Build the agent inline. Provider 429s surface through the + # in-stream recovery loop below (``_is_provider_rate_limited``), + # which repins the thread to an eligible alternative config and + # rebuilds the agent before the user sees any output. + agent = await _build_main_agent_for_thread( + agent_factory, + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=chat_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, + mentioned_document_ids=mentioned_document_ids, ) - - agent: Any = None - if preflight_task is not None: - try: - await preflight_task - mark_healthy(llm_config_id) - _perf_log.info( - "[stream_new_chat] auto_pin_preflight ok config_id=%s took=%.3fs (parallel)", - llm_config_id, - time.perf_counter() - _t_preflight, - ) - except Exception as preflight_exc: - # Both branches below need the session: the non-429 path - # may unwind via cleanup that uses ``session``, and the - # 429 path explicitly calls ``resolve_or_get_pinned_llm_config_id`` - # against it. Wait for the speculative build to release its - # session usage before we proceed. - await _settle_speculative_agent_build(agent_build_task) - if not _is_provider_rate_limited(preflight_exc): - raise - # 429: speculative agent is discarded; run the original - # repin → reload → rebuild path against the freshly - # pinned config. - previous_config_id = llm_config_id - mark_runtime_cooldown( - previous_config_id, reason="preflight_rate_limited" - ) - try: - llm_config_id = ( - await resolve_or_get_pinned_llm_config_id( - session, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - selected_llm_config_id=0, - exclude_config_ids={previous_config_id}, - requires_image_input=_requires_image_input, - ) - ).resolved_llm_config_id - except ValueError as pin_error: - yield _emit_stream_error( - message=str(pin_error), - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - - llm, agent_config, llm_load_error = await _load_llm_bundle( - llm_config_id - ) - if llm_load_error or not llm: - yield _emit_stream_error( - message=llm_load_error or "Failed to create LLM instance", - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - # Trust the freshly-resolved cfg for the remainder of this - # turn rather than recursing into another preflight; the - # in-stream 429 recovery loop is still in place as the - # safety net if even this fallback hits an upstream cap. - mark_healthy(llm_config_id) - _log_chat_stream_error( - flow=flow, - error_kind="rate_limited", - error_code="RATE_LIMITED", - severity="info", - is_expected=True, - request_id=request_id, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - message=( - "Auto-pinned model failed preflight; switched to another " - "eligible model and continuing." - ), - extra={ - "auto_runtime_recover": True, - "preflight": True, - "previous_config_id": previous_config_id, - "fallback_config_id": llm_config_id, - }, - ) - # Rebuild against the new llm/agent_config. Sequential - # here because we no longer have anything to overlap with. - agent = await agent_factory( - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - disabled_tools=disabled_tools, - mentioned_document_ids=mentioned_document_ids, - filesystem_selection=filesystem_selection, - ) - - if agent is None: - # Either no preflight was needed, or preflight succeeded — - # in both cases the speculative build is the agent we want. - agent = await agent_build_task _perf_log.info( "[stream_new_chat] Agent created in %.3fs", time.perf_counter() - _t0 ) @@ -2678,25 +2462,6 @@ async def stream_resume_chat( yield streaming_service.format_done() return - # Auto-mode preflight ping (resume path). Mirrors ``stream_new_chat``: - # one cheap probe before the agent is rebuilt so a 429'd pin gets - # repinned without burning planner/classifier/title calls first. - # See ``stream_new_chat`` for the full rationale on the speculative - # parallel build pattern below. - preflight_needed = ( - requested_llm_config_id == 0 - and llm_config_id < 0 - and not is_recently_healthy(llm_config_id) - ) - preflight_task: asyncio.Task[None] | None = None - _t_preflight = 0.0 - if preflight_needed: - _t_preflight = time.perf_counter() - preflight_task = asyncio.create_task( - _preflight_llm(llm), - name=f"auto_pin_preflight_resume:{llm_config_id}", - ) - _t0 = time.perf_counter() connector_service = ConnectorService(session, search_space_id=search_space_id) @@ -2726,115 +2491,25 @@ async def stream_resume_chat( if _app_config.MULTI_AGENT_CHAT_ENABLED else create_surfsense_deep_agent ) - agent_build_task = asyncio.create_task( - _build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - ), - name="agent_build:stream_resume", + # Build the agent inline. Provider 429s are handled by the + # in-stream recovery loop, which repins to an eligible + # alternative config and rebuilds the agent before the user sees + # any output. + agent = await _build_main_agent_for_thread( + agent_factory, + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=chat_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, ) - - agent: Any = None - if preflight_task is not None: - try: - await preflight_task - mark_healthy(llm_config_id) - _perf_log.info( - "[stream_resume] auto_pin_preflight ok config_id=%s took=%.3fs (parallel)", - llm_config_id, - time.perf_counter() - _t_preflight, - ) - except Exception as preflight_exc: - # Same session-safety rationale as ``stream_new_chat``. - await _settle_speculative_agent_build(agent_build_task) - if not _is_provider_rate_limited(preflight_exc): - raise - previous_config_id = llm_config_id - mark_runtime_cooldown( - previous_config_id, reason="preflight_rate_limited" - ) - try: - llm_config_id = ( - await resolve_or_get_pinned_llm_config_id( - session, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - selected_llm_config_id=0, - exclude_config_ids={previous_config_id}, - ) - ).resolved_llm_config_id - except ValueError as pin_error: - yield _emit_stream_error( - message=str(pin_error), - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - - llm, agent_config, llm_load_error = await _load_llm_bundle( - llm_config_id - ) - if llm_load_error or not llm: - yield _emit_stream_error( - message=llm_load_error or "Failed to create LLM instance", - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - mark_healthy(llm_config_id) - _log_chat_stream_error( - flow="resume", - error_kind="rate_limited", - error_code="RATE_LIMITED", - severity="info", - is_expected=True, - request_id=request_id, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - message=( - "Auto-pinned model failed preflight; switched to another " - "eligible model and continuing." - ), - extra={ - "auto_runtime_recover": True, - "preflight": True, - "previous_config_id": previous_config_id, - "fallback_config_id": llm_config_id, - }, - ) - agent = await _build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - ) - - if agent is None: - agent = await agent_build_task _perf_log.info( "[stream_resume] Agent created in %.3fs", time.perf_counter() - _t0 ) diff --git a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py index 208204ca9..19b06201f 100644 --- a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py +++ b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py @@ -209,128 +209,6 @@ def test_stream_exception_classifies_openrouter_429_payload(): assert extra is None -@pytest.mark.asyncio -async def test_preflight_swallows_non_rate_limit_errors_and_re_raises_429(monkeypatch): - """``_preflight_llm`` is best-effort. - - - On rate-limit shaped exceptions (provider 429) it MUST re-raise so the - caller can drive the cooldown/repin branch. - - On any other transient failure it MUST swallow the error so the normal - stream path continues without surfacing preflight noise to the user. - """ - from types import SimpleNamespace - - from app.tasks.chat.stream_new_chat import _preflight_llm - - class _RateLimitedError(Exception): - """Class-name carries 'RateLimit' so _is_provider_rate_limited triggers.""" - - rate_calls: list[dict] = [] - other_calls: list[dict] = [] - - async def _fake_acompletion_429(**kwargs): - rate_calls.append(kwargs) - raise _RateLimitedError("simulated 429") - - async def _fake_acompletion_other(**kwargs): - other_calls.append(kwargs) - raise RuntimeError("some unrelated transient failure") - - fake_llm = SimpleNamespace( - model="openrouter/google/gemma-4-31b-it:free", - api_key="test", - api_base=None, - ) - - import litellm # type: ignore[import-not-found] - - monkeypatch.setattr(litellm, "acompletion", _fake_acompletion_429) - with pytest.raises(_RateLimitedError): - await _preflight_llm(fake_llm) - assert len(rate_calls) == 1 - assert rate_calls[0]["max_tokens"] == 1 - assert rate_calls[0]["stream"] is False - - monkeypatch.setattr(litellm, "acompletion", _fake_acompletion_other) - # MUST NOT raise: non-rate-limit failures are swallowed. - await _preflight_llm(fake_llm) - assert len(other_calls) == 1 - - -@pytest.mark.asyncio -async def test_preflight_skipped_for_auto_router_model(): - """Router-mode ``model='auto'`` has no single deployment to ping; the - LiteLLM router itself owns per-deployment rate-limit accounting, so the - preflight helper must short-circuit instead of issuing a probe.""" - from types import SimpleNamespace - - from app.tasks.chat.stream_new_chat import _preflight_llm - - fake_llm = SimpleNamespace(model="auto", api_key="x", api_base=None) - # Should return without raising or making any LiteLLM call. - await _preflight_llm(fake_llm) - - -@pytest.mark.asyncio -async def test_settle_speculative_agent_build_swallows_exceptions(): - """``_settle_speculative_agent_build`` MUST always return cleanly so the - caller can safely re-touch the request-scoped session afterwards. - - The helper guards the parallel preflight + agent-build path: when the - speculative build is being discarded (429 or non-429 preflight failure) - we await it solely to release any in-flight ``AsyncSession`` usage — - the build's outcome is irrelevant. Any exception (including - ``CancelledError``) leaking out would skip the caller's recovery flow - and re-introduce the very session-concurrency hazard the helper exists - to prevent. - """ - import asyncio - - from app.tasks.chat.stream_new_chat import _settle_speculative_agent_build - - async def _raises() -> None: - raise RuntimeError("speculative build crashed") - - async def _succeeds() -> str: - return "agent" - - async def _slow() -> None: - await asyncio.sleep(0.05) - - for coro in (_raises(), _succeeds(), _slow()): - task = asyncio.create_task(coro) - await _settle_speculative_agent_build(task) - assert task.done() - - -@pytest.mark.asyncio -async def test_settle_speculative_agent_build_handles_already_done_task(): - """Done tasks (success or failure) must still be settled without raising.""" - import asyncio - - from app.tasks.chat.stream_new_chat import _settle_speculative_agent_build - - async def _ok() -> str: - return "ok" - - async def _bad() -> None: - raise ValueError("nope") - - ok_task = asyncio.create_task(_ok()) - bad_task = asyncio.create_task(_bad()) - # Drive both to completion before settling. - await asyncio.sleep(0) - await asyncio.sleep(0) - - await _settle_speculative_agent_build(ok_task) - await _settle_speculative_agent_build(bad_task) - assert ok_task.result() == "ok" - # ``bad_task`` exception was consumed by the settle helper; calling - # ``.exception()`` after the fact must still return the original error - # (the helper observes it but doesn't clear it). - assert isinstance(bad_task.exception(), ValueError) - - def test_stream_exception_classifies_thread_busy(): exc = BusyError(request_id="thread-123") kind, code, severity, is_expected, user_message, extra = _classify_stream_exception( From 8c9be9796a93a88022d9c8edbe6eed7d1ac57a4f Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 15:03:35 +0530 Subject: [PATCH 47/63] feat: add no-update sentinel handling to save_memory function and corresponding unit tests --- .../app/services/memory/service.py | 16 +++++++ .../unit/services/test_memory_service.py | 48 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py index c33b91679..feca000c9 100644 --- a/surfsense_backend/app/services/memory/service.py +++ b/surfsense_backend/app/services/memory/service.py @@ -29,6 +29,15 @@ from app.services.memory.validation import ( logger = logging.getLogger(__name__) +_NO_UPDATE_SENTINELS = frozenset( + { + "NO_UPDATE", + "NO UPDATE", + "NO_CHANGE", + "NO CHANGE", + } +) + class MemoryScope(StrEnum): USER = "user" @@ -149,6 +158,13 @@ async def save_memory( notice: str | None = None warnings: list[str] = [] + if next_content.upper() in _NO_UPDATE_SENTINELS: + return SaveResult( + status="no_op", + message="No memory update requested.", + memory_md=old_memory, + ) + if len(next_content) > MEMORY_HARD_LIMIT and llm is not None: rewritten = await forced_rewrite(next_content, llm) if rewritten is not None and len(rewritten) < len(next_content): diff --git a/surfsense_backend/tests/unit/services/test_memory_service.py b/surfsense_backend/tests/unit/services/test_memory_service.py index 94918d25b..820e6aa28 100644 --- a/surfsense_backend/tests/unit/services/test_memory_service.py +++ b/surfsense_backend/tests/unit/services/test_memory_service.py @@ -94,6 +94,54 @@ async def test_save_memory_rejects_long_no_heading_payload(monkeypatch) -> None: assert target.memory_md.startswith("## Facts") +@pytest.mark.asyncio +async def test_save_memory_no_update_sentinel_is_no_op(monkeypatch) -> None: + existing = "## Preferences\n- 2026-05-20: Existing preference\n" + target = SimpleNamespace(memory_md=existing) + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="NO_UPDATE", + session=session, + ) + + assert result.status == "no_op" + assert result.memory_md == existing + assert target.memory_md == existing + assert session.commit_calls == 0 + + +@pytest.mark.asyncio +async def test_save_memory_no_update_sentinel_is_case_insensitive(monkeypatch) -> None: + existing = "## Preferences\n- 2026-05-20: Existing preference\n" + target = SimpleNamespace(memory_md=existing) + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content=" no update ", + session=session, + ) + + assert result.status == "no_op" + assert result.memory_md == existing + assert target.memory_md == existing + assert session.commit_calls == 0 + + @pytest.mark.asyncio async def test_save_memory_grandfathers_existing_team_personal_heading( monkeypatch, From 71dead0406cf93a53029e08c3f6433d056bc6998 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 11:42:52 +0200 Subject: [PATCH 48/63] perf(kb-planner): route internal planner calls to dedicated small/fast LLM Adds an optional planner LLM role wired through KnowledgePriorityMiddleware so KB query rewriting, date extraction, and recency classification run on a cheap model (e.g. gpt-4o-mini, Haiku, Azure nano) instead of the user's chat LLM. Operators opt in by setting is_planner: true on exactly one global config; without it, behavior is unchanged. --- .../main_agent/knowledge_priority.py | 2 + .../app/agents/new_chat/chat_deepagent.py | 2 + .../new_chat/middleware/knowledge_search.py | 30 +++++++---- surfsense_backend/app/config/__init__.py | 13 +++++ .../app/config/global_llm_config.example.yaml | 53 +++++++++++++++++++ surfsense_backend/app/services/llm_service.py | 33 ++++++++++++ 6 files changed, 123 insertions(+), 10 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py index fcdb1c61e..27cee8b37 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py @@ -6,6 +6,7 @@ from langchain_core.language_models import BaseChatModel from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.new_chat.middleware import KnowledgePriorityMiddleware +from app.services.llm_service import get_planner_llm def build_knowledge_priority_mw( @@ -19,6 +20,7 @@ def build_knowledge_priority_mw( ) -> KnowledgePriorityMiddleware: return KnowledgePriorityMiddleware( llm=llm, + planner_llm=get_planner_llm(), search_space_id=search_space_id, filesystem_mode=filesystem_mode, available_connectors=available_connectors, diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 605c31416..f8db333ba 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -102,6 +102,7 @@ from app.agents.new_chat.tools.registry import ( ) from app.db import ChatVisibility from app.services.connector_service import ConnectorService +from app.services.llm_service import get_planner_llm from app.utils.perf import get_perf_logger _perf_log = get_perf_logger() @@ -1077,6 +1078,7 @@ def _build_compiled_agent_blocking( else None, KnowledgePriorityMiddleware( llm=llm, + planner_llm=get_planner_llm(), search_space_id=search_space_id, filesystem_mode=filesystem_mode, available_connectors=available_connectors, diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py index 98bbf3bd7..77b413940 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py +++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py @@ -579,6 +579,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] self, *, llm: BaseChatModel | None = None, + planner_llm: BaseChatModel | None = None, search_space_id: int, filesystem_mode: FilesystemMode = FilesystemMode.CLOUD, available_connectors: list[str] | None = None, @@ -588,6 +589,15 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] inject_system_message: bool = True, # For backwards compatibility ) -> None: self.llm = llm + # The planner LLM handles short, structured internal tasks (query + # rewriting, date extraction, recency classification). When an + # operator marks a global config ``is_planner: true`` we route + # those calls to a cheap/fast model (e.g. gpt-4o-mini, Haiku, Azure + # gpt-5.x-nano) instead of the user's chat LLM — those classification + # tasks don't need frontier-tier capability. Falls back to the chat + # LLM when no planner config is wired up so deployments without one + # keep working unchanged. + self.planner_llm = planner_llm or llm self.search_space_id = search_space_id self.filesystem_mode = filesystem_mode self.available_connectors = available_connectors @@ -598,7 +608,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] # Build the kb-planner private Runnable ONCE here so we don't pay # the ``create_agent`` compile cost (50-200ms) on every turn. # Disabled by default behind ``enable_kb_planner_runnable``; when - # off the planner falls back to the legacy ``self.llm.ainvoke`` + # off the planner falls back to the legacy ``planner_llm.ainvoke`` # path. self._planner: Runnable | None = None self._planner_compile_failed = False @@ -608,7 +618,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] Returns ``None`` when the feature flag is disabled, when the LLM is unavailable, or when ``create_agent`` raises (we fall back to the - legacy ``self.llm.ainvoke`` path in that case). Compilation happens + legacy ``planner_llm.ainvoke`` path in that case). Compilation happens lazily on first call, then memoized via ``self._planner``. The compiled agent is constructed without tools — the planner's @@ -618,7 +628,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] """ if self._planner is not None or self._planner_compile_failed: return self._planner - if self.llm is None: + if self.planner_llm is None: return None flags = get_flags() if not flags.enable_kb_planner_runnable or flags.disable_new_agent_stack: @@ -628,13 +638,13 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] try: self._planner = create_agent( - self.llm, + self.planner_llm, tools=[], middleware=[RetryAfterMiddleware(max_retries=2)], ) except Exception as exc: # pragma: no cover - defensive logger.warning( - "kb-planner Runnable compile failed; falling back to llm.ainvoke: %s", + "kb-planner Runnable compile failed; falling back to planner_llm.ainvoke: %s", exc, ) self._planner_compile_failed = True @@ -647,12 +657,12 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] messages: Sequence[BaseMessage], user_text: str, ) -> tuple[str, datetime | None, datetime | None, bool]: - if self.llm is None: + if self.planner_llm is None: return user_text, None, None, False recent_conversation = _render_recent_conversation( messages, - llm=self.llm, + llm=self.planner_llm, user_text=user_text, ) prompt = _build_kb_planner_prompt( @@ -663,8 +673,8 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] t0 = loop.time() # Prefer the compiled-once planner Runnable when enabled; otherwise - # fall back to ``self.llm.ainvoke``. The ``surfsense:internal`` tag - # is preserved on both paths so ``_stream_agent_events`` still + # fall back to ``planner_llm.ainvoke``. The ``surfsense:internal`` + # tag is preserved on both paths so ``_stream_agent_events`` still # suppresses the planner's intermediate events from the UI. planner = self._build_kb_planner_runnable() try: @@ -684,7 +694,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] else AIMessage(content="") ) else: - response = await self.llm.ainvoke( + response = await self.planner_llm.ainvoke( [HumanMessage(content=prompt)], config={"tags": ["surfsense:internal"]}, ) diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 448818e88..5643c048b 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -110,6 +110,19 @@ def load_global_llm_configs(): except Exception as e: print(f"Warning: Failed to score global LLM configs: {e}") + # Planner LLM is a singleton role. If an operator accidentally + # marks multiple configs ``is_planner: true``, only the first one + # is used at runtime — surface the others at startup so the + # mistake is caught before traffic, not silently buried. + planner_cfgs = [c for c in configs if c.get("is_planner") is True] + if len(planner_cfgs) > 1: + extra_ids = [c.get("id") for c in planner_cfgs[1:]] + print( + "Warning: Multiple global LLM configs marked is_planner=true " + f"(ids {[c.get('id') for c in planner_cfgs]}); using id " + f"{planner_cfgs[0].get('id')} and ignoring {extra_ids}" + ) + return configs except Exception as e: print(f"Warning: Failed to load global LLM configs: {e}") diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index d92640c8d..83d556754 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -258,6 +258,45 @@ global_llm_configs: use_default_system_instructions: true citations_enabled: true + # Example: Planner LLM - small, fast model used for internal utility tasks + # + # The PLANNER role handles short, structured internal calls (KB query + # rewriting, date extraction, recency classification, etc.) that don't + # need frontier-tier capability. Pointing the planner at a cheap+fast + # model (gpt-4o-mini, Claude Haiku, Azure gpt-5.x-nano, Groq Llama, ...) + # typically saves 500ms-1.5s per turn vs. routing those same internal + # calls through the user's chat model. + # + # Activation: + # - Mark EXACTLY ONE global config with ``is_planner: true``. + # - If multiple are marked, the first one wins and a WARNING is logged. + # - If none is marked, every internal call falls back to the user's + # chat LLM (same behavior as before this flag existed). + # + # This config is operator-only — it is NOT exposed in the user-facing + # model selector, never billed against premium quota, and the + # billing_tier / anonymous_enabled fields below are ignored. + - id: -9 + name: "Global Planner (GPT-4o mini)" + description: "Internal-only planner LLM for query rewriting and classification" + is_planner: true + billing_tier: "free" + anonymous_enabled: false + seo_enabled: false + quota_reserve_tokens: 1000 + provider: "OPENAI" + model_name: "gpt-4o-mini" + api_key: "sk-your-openai-api-key-here" + api_base: "" + rpm: 3500 + tpm: 200000 + litellm_params: + temperature: 0 + max_tokens: 1000 + system_instructions: "" + use_default_system_instructions: true + citations_enabled: false + # ============================================================================= # OpenRouter Integration # ============================================================================= @@ -493,6 +532,20 @@ global_vision_llm_configs: # - Lower temperature (0.3) is recommended for accurate screenshot analysis # - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions # +# PLANNER LLM NOTES: +# - is_planner: true marks a config as the internal-only planner LLM (small, +# fast model used for KB query rewriting, date extraction, recency +# classification, etc.). Only one config may carry this flag — if +# multiple do, the first one wins and a startup WARNING is logged. +# - When no config is marked is_planner, every internal utility call falls +# back to the user's chat LLM (the historical behavior). +# - Planner configs are NOT shown in the user-facing model selector and +# are NOT billed against the user's premium quota. Their billing_tier, +# anonymous_enabled, seo_* fields are ignored. +# - Recommended models: gpt-4o-mini, claude-3-5-haiku, gemini-1.5-flash, +# azure gpt-5.x-nano, groq llama3-8b — anything <200ms p50 on a 1-2k +# prompt. Frontier models here defeat the purpose of the flag. +# # TOKEN QUOTA & ANONYMOUS ACCESS NOTES: # - billing_tier: "free" or "premium". Controls whether registered users need premium token quota. # - anonymous_enabled: true/false. Whether the model appears in the public no-login catalog. diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index ade202c72..fa97fb33a 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -659,3 +659,36 @@ async def get_user_long_context_llm( return await get_document_summary_llm( session, search_space_id, disable_streaming=disable_streaming ) + + +def get_planner_llm() -> ChatLiteLLM | None: + """Return a planner LLM instance from the first global config marked + ``is_planner: true``, or ``None`` if no planner config is defined. + + The planner role handles short, structured internal tasks (KB search + planning: query rewriting, date extraction, recency classification). + These tasks are well-served by small/fast models (e.g. gpt-4o-mini, + Claude Haiku, Azure gpt-5.x-nano) — using the user's chat LLM for them + is unnecessarily expensive and slow. + + This helper reads from ``config.GLOBAL_LLM_CONFIGS`` (loaded at import + time from ``global_llm_config.yaml``) so it has no DB cost and can be + called synchronously from middleware/factory code. It returns the same + instance shape as the global path of ``get_search_space_llm_instance``. + + Callers MUST fall back to their chat LLM when this returns ``None`` so + deployments without a planner config keep working unchanged. + """ + from app.agents.new_chat.llm_config import create_chat_litellm_from_config + + planner_cfg = next( + ( + cfg + for cfg in config.GLOBAL_LLM_CONFIGS + if cfg.get("is_planner") is True + ), + None, + ) + if not planner_cfg: + return None + return create_chat_litellm_from_config(planner_cfg) From db8bffab38bc1eb3b39e0c805d29ea960a7c3e08 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 11:58:15 +0200 Subject: [PATCH 49/63] perf(prompt-cache): enable Azure prompt_cache_key routing hint Splits the OpenAI-family gate into per-param predicates so AZURE and AZURE_OPENAI configs now receive prompt_cache_key for backend routing affinity (Microsoft auto-caches GPT-4o+ deployments at >=1024 tokens; the key clusters same-prefix requests on the same GPU pool and raises hit rate on turn 2+). prompt_cache_retention stays opted out for Azure because litellm 1.83.14's Azure transformer would drop it silently; revisit when Azure's supported params list is updated. --- .../app/agents/new_chat/prompt_caching.py | 101 +++++++++++++----- .../agents/new_chat/test_prompt_caching.py | 63 +++++++++-- 2 files changed, 129 insertions(+), 35 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/prompt_caching.py b/surfsense_backend/app/agents/new_chat/prompt_caching.py index 9fe47cdac..b58a48266 100644 --- a/surfsense_backend/app/agents/new_chat/prompt_caching.py +++ b/surfsense_backend/app/agents/new_chat/prompt_caching.py @@ -39,9 +39,19 @@ For OpenAI-family configs we additionally pass: - ``prompt_cache_key=f"surfsense-thread-{thread_id}"`` — routing hint that raises hit rate by sending requests with a shared prefix to the same - backend. + backend. Supported by ``openai/``, ``deepseek/``, ``xai/``, and + ``azure/`` (added to LiteLLM's Azure transformer in + https://github.com/BerriAI/litellm/pull/20989, Feb 2026; verified + against ``AzureOpenAIConfig.get_supported_openai_params`` in our + installed litellm 1.83.14 for ``azure/gpt-4o``, ``azure/gpt-4o-mini``, + ``azure/gpt-5.4``, ``azure/gpt-5.4-mini``). - ``prompt_cache_retention="24h"`` — extends cache TTL beyond the default - 5-10 min in-memory cache. + 5-10 min in-memory cache. Set ONLY for OpenAI/DeepSeek/xAI: Azure's + server-side support landed in Microsoft's docs on 2026-05-13 but + LiteLLM 1.83.14's Azure transformer still omits it from its supported + params list, so it gets silently dropped by ``litellm.drop_params``. + Azure's default in-memory retention (5-10 min, max 1 h) already + bridges intra-conversation turns; revisit when LiteLLM bumps Azure. Safety net: ``litellm.drop_params=True`` is set globally in ``app.services.llm_service`` at module-load time. Any kwarg the destination @@ -81,13 +91,31 @@ _DEFAULT_INJECTION_POINTS: tuple[dict[str, Any], ...] = ( {"location": "message", "index": -1}, ) -# Providers (uppercase ``AgentConfig.provider`` values) that natively expose -# OpenAI-style automatic prompt caching with ``prompt_cache_key`` and -# ``prompt_cache_retention`` kwargs. Strict whitelist — many other providers -# in ``PROVIDER_MAP`` route through litellm's ``openai`` prefix without -# implementing the OpenAI prompt-cache surface (e.g. MOONSHOT, ZHIPU, -# MINIMAX), so we can't infer family from the litellm prefix alone. -_OPENAI_FAMILY_PROVIDERS: frozenset[str] = frozenset({"OPENAI", "DEEPSEEK", "XAI"}) +# Providers (uppercase ``AgentConfig.provider`` values) that accept the +# OpenAI ``prompt_cache_key`` routing hint. Microsoft's Azure OpenAI docs +# (2026-05-13) confirm automatic prompt caching applies to every GPT-4o +# or newer Azure deployment at ≥1024 tokens with no configuration needed, +# and that ``prompt_cache_key`` is combined with the prefix hash to +# improve routing affinity and therefore cache hit rate. LiteLLM's Azure +# transformer ships ``prompt_cache_key`` in its supported params as of +# https://github.com/BerriAI/litellm/pull/20989. +# +# Strict whitelist — many other providers in ``PROVIDER_MAP`` route +# through litellm's ``openai`` prefix without implementing the OpenAI +# prompt-cache surface (e.g. MOONSHOT, ZHIPU, MINIMAX), so we can't infer +# family from the litellm prefix alone. +_PROMPT_CACHE_KEY_PROVIDERS: frozenset[str] = frozenset( + {"OPENAI", "DEEPSEEK", "XAI", "AZURE", "AZURE_OPENAI"} +) + +# Subset of ``_PROMPT_CACHE_KEY_PROVIDERS`` that also accept +# ``prompt_cache_retention="24h"``. Azure is excluded: see module +# docstring — LiteLLM 1.83.14's Azure transformer omits the param so +# ``drop_params`` silently strips it. Re-add Azure once a future LiteLLM +# release wires it into ``AzureOpenAIConfig.get_supported_openai_params``. +_PROMPT_CACHE_RETENTION_PROVIDERS: frozenset[str] = frozenset( + {"OPENAI", "DEEPSEEK", "XAI"} +) def _is_router_llm(llm: BaseChatModel) -> bool: @@ -101,13 +129,13 @@ def _is_router_llm(llm: BaseChatModel) -> bool: return type(llm).__name__ == "ChatLiteLLMRouter" -def _is_openai_family_config(agent_config: AgentConfig | None) -> bool: - """Whether the config targets an OpenAI-style prompt-cache surface. +def _provider_supports_prompt_cache_key(agent_config: AgentConfig | None) -> bool: + """Whether the config targets a provider that accepts ``prompt_cache_key``. - Strict — only returns True when the user explicitly chose OPENAI, - DEEPSEEK, or XAI as the provider in their ``NewLLMConfig`` / - ``YAMLConfig``. Auto-mode and custom providers return False because - we can't statically know the destination. + Strict — only returns True for explicitly chosen OPENAI, DEEPSEEK, + XAI, AZURE, or AZURE_OPENAI providers. Auto-mode and custom + providers return False because we can't statically know the + destination and the router fans out across mixed providers. """ if agent_config is None or not agent_config.provider: return False @@ -115,7 +143,25 @@ def _is_openai_family_config(agent_config: AgentConfig | None) -> bool: return False if agent_config.custom_provider: return False - return agent_config.provider.upper() in _OPENAI_FAMILY_PROVIDERS + return agent_config.provider.upper() in _PROMPT_CACHE_KEY_PROVIDERS + + +def _provider_supports_prompt_cache_retention( + agent_config: AgentConfig | None, +) -> bool: + """Whether the config targets a provider that accepts ``prompt_cache_retention``. + + Tighter than :func:`_provider_supports_prompt_cache_key` — Azure + deployments are excluded until LiteLLM ships the param in its Azure + transformer (see module docstring). + """ + if agent_config is None or not agent_config.provider: + return False + if agent_config.is_auto_mode: + return False + if agent_config.custom_provider: + return False + return agent_config.provider.upper() in _PROMPT_CACHE_RETENTION_PROVIDERS def _get_or_init_model_kwargs(llm: BaseChatModel) -> dict[str, Any] | None: @@ -173,16 +219,23 @@ def apply_litellm_prompt_caching( dict(point) for point in _DEFAULT_INJECTION_POINTS ] - # OpenAI-family extras only when we statically know the destination is - # OpenAI / DeepSeek / xAI. Auto-mode router fans out across providers - # so we can't safely set OpenAI-only kwargs there (drop_params would - # strip them but it's wasteful to set them in the first place). + # OpenAI-style extras only when we statically know the destination + # accepts them. Auto-mode router fans out across mixed providers so + # we can't safely set destination-specific kwargs there (drop_params + # would strip them but it's wasteful to set them in the first + # place). if _is_router_llm(llm): return - if not _is_openai_family_config(agent_config): - return - if thread_id is not None and "prompt_cache_key" not in model_kwargs: + if ( + thread_id is not None + and "prompt_cache_key" not in model_kwargs + and _provider_supports_prompt_cache_key(agent_config) + ): model_kwargs["prompt_cache_key"] = f"surfsense-thread-{thread_id}" - if "prompt_cache_retention" not in model_kwargs: + + if ( + "prompt_cache_retention" not in model_kwargs + and _provider_supports_prompt_cache_retention(agent_config) + ): model_kwargs["prompt_cache_retention"] = "24h" diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py b/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py index 4cf53969d..c3de15c58 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py @@ -12,13 +12,19 @@ prompt caching. It mutates ``llm.model_kwargs`` so the kwargs flow to the deepagent stack accumulates multiple ``SystemMessage``\ s in ``state["messages"]`` and ``role: system`` would tag every one of them, blowing past Anthropic's 4-block ``cache_control`` cap. -2. Adds ``prompt_cache_key``/``prompt_cache_retention`` only for - single-model OPENAI/DEEPSEEK/XAI configs (where OpenAI's automatic - prompt-cache surface is available). -3. Treats ``ChatLiteLLMRouter`` (auto-mode) as universal-only — no - OpenAI-only kwargs because the router fans out across providers. -4. Idempotent: user-supplied values in ``model_kwargs`` are preserved. -5. Defensive: LLMs without a writable ``model_kwargs`` are silently +2. Adds ``prompt_cache_key`` for OPENAI/DEEPSEEK/XAI/AZURE/AZURE_OPENAI + configs (Microsoft's Azure transformer was added to LiteLLM in + https://github.com/BerriAI/litellm/pull/20989, Feb 2026). +3. Adds ``prompt_cache_retention="24h"`` ONLY for OPENAI/DEEPSEEK/XAI. + Azure's server-side support landed in Microsoft's docs on 2026-05-13 + but LiteLLM 1.83.14 hasn't wired it through yet, so we let Azure use + its default in-memory retention rather than send a param that + ``litellm.drop_params`` would silently strip. +4. Treats ``ChatLiteLLMRouter`` (auto-mode) as universal-only — no + destination-specific kwargs because the router fans out across + providers. +5. Idempotent: user-supplied values in ``model_kwargs`` are preserved. +6. Defensive: LLMs without a writable ``model_kwargs`` are silently skipped rather than raising. """ @@ -191,9 +197,9 @@ def test_does_not_overwrite_user_supplied_prompt_cache_key() -> None: @pytest.mark.parametrize("provider", ["OPENAI", "DEEPSEEK", "XAI"]) def test_sets_openai_family_extras(provider: str) -> None: - """OpenAI-style providers gain ``prompt_cache_key`` (raises hit rate - via routing affinity) and ``prompt_cache_retention="24h"`` (extends - cache TTL beyond the default 5-10 min).""" + """Native OpenAI-style providers gain ``prompt_cache_key`` (raises + hit rate via routing affinity) and ``prompt_cache_retention="24h"`` + (extends cache TTL beyond the default 5-10 min).""" cfg = _make_cfg(provider=provider) llm = _FakeLLM() @@ -203,6 +209,27 @@ def test_sets_openai_family_extras(provider: str) -> None: assert llm.model_kwargs["prompt_cache_retention"] == "24h" +@pytest.mark.parametrize("provider", ["AZURE", "AZURE_OPENAI"]) +def test_azure_gets_prompt_cache_key_only(provider: str) -> None: + """Azure configs gain ``prompt_cache_key`` for routing affinity + (Microsoft auto-caches every GPT-4o+ deployment at ≥1024 tokens; + the key clusters same-prefix requests on the same backend GPU pool + so hit rate climbs). They DO NOT get ``prompt_cache_retention`` + because LiteLLM 1.83.14's Azure transformer omits it from its + supported params list — ``drop_params`` would silently strip it. + Azure's default in-memory retention (5-10 min, max 1 h) is already + enough to cover intra-conversation turns; revisit when LiteLLM + bumps Azure to match its OpenAI surface.""" + cfg = _make_cfg(provider=provider, model_name="gpt-5.4") + llm = _FakeLLM(model="azure/gpt-5.4") + + apply_litellm_prompt_caching(llm, agent_config=cfg, thread_id=42) + + assert llm.model_kwargs["prompt_cache_key"] == "surfsense-thread-42" + assert "prompt_cache_retention" not in llm.model_kwargs + assert "cache_control_injection_points" in llm.model_kwargs + + def test_skips_prompt_cache_key_when_no_thread_id() -> None: """Without a thread id we can't construct a per-thread key. Retention is still useful so we set it (it's free).""" @@ -215,12 +242,26 @@ def test_skips_prompt_cache_key_when_no_thread_id() -> None: assert llm.model_kwargs["prompt_cache_retention"] == "24h" +def test_azure_skips_prompt_cache_key_when_no_thread_id() -> None: + """Azure without a thread id ends up with no extras (retention is + Azure-skipped, key needs a thread id) — universal injection points + still land.""" + cfg = _make_cfg(provider="AZURE", model_name="gpt-5.4") + llm = _FakeLLM(model="azure/gpt-5.4") + + apply_litellm_prompt_caching(llm, agent_config=cfg, thread_id=None) + + assert "prompt_cache_key" not in llm.model_kwargs + assert "prompt_cache_retention" not in llm.model_kwargs + assert "cache_control_injection_points" in llm.model_kwargs + + @pytest.mark.parametrize( "provider", ["ANTHROPIC", "BEDROCK", "VERTEX_AI", "GOOGLE_AI_STUDIO", "GROQ", "MOONSHOT"], ) def test_no_openai_extras_for_other_providers(provider: str) -> None: - """Non-OpenAI-family providers don't expose ``prompt_cache_key`` — + """Non-OpenAI-style providers don't expose ``prompt_cache_key`` — skip it. ``cache_control_injection_points`` is still set (universal).""" cfg = _make_cfg(provider=provider) llm = _FakeLLM() From 39c29d651f50dd958ef9fe20459f0b2de892d820 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 15:29:41 +0530 Subject: [PATCH 50/63] feat: enhance token display in MessageInfoDropdown with improved visual separation --- .../assistant-ui/assistant-message.tsx | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index ac1732441..f6c91e8bf 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -14,6 +14,7 @@ import { ClipboardPaste, CopyIcon, DownloadIcon, + Dot, ExternalLink, Globe, MessageCircleReply, @@ -330,9 +331,14 @@ const MessageInfoDropdown: FC<{ chatTurnId: string | null | undefined }> = ({ ch {icon} {name} - - {counts.total_tokens.toLocaleString()} tokens - {costMicros && costMicros > 0 ? ` · ${formatTurnCost(costMicros)}` : ""} + + {counts.total_tokens.toLocaleString()} tokens + {costMicros && costMicros > 0 ? ( + <> + ); @@ -342,11 +348,14 @@ const MessageInfoDropdown: FC<{ chatTurnId: string | null | undefined }> = ({ ch className="focus:bg-accent focus:text-accent-foreground relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()} > - - {usage.total_tokens.toLocaleString()} tokens - {usage.cost_micros && usage.cost_micros > 0 - ? ` · ${formatTurnCost(usage.cost_micros)}` - : ""} + + {usage.total_tokens.toLocaleString()} tokens + {usage.cost_micros && usage.cost_micros > 0 ? ( + <> + )} From ff2d621185e1db62dda666845ed6365a3d2b2052 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 18:37:21 +0530 Subject: [PATCH 51/63] chore: fix the pnpm version in GitHub Actions workflow --- .github/workflows/code-quality.yml | 36 ++++++++++++++---------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 0dd2e1809..2590f98b3 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -27,7 +27,7 @@ jobs: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} 2>/dev/null || git fetch origin ${{ github.base_ref }} 2>/dev/null || true - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' @@ -35,7 +35,7 @@ jobs: run: pip install pre-commit - name: Cache pre-commit hooks - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} @@ -74,7 +74,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -83,7 +83,7 @@ jobs: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} 2>/dev/null || git fetch origin ${{ github.base_ref }} 2>/dev/null || true - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' @@ -91,7 +91,7 @@ jobs: run: pip install pre-commit - name: Cache pre-commit hooks - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-security-${{ hashFiles('.pre-commit-config.yaml') }} @@ -131,21 +131,21 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install UV - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v8.1.0 - name: Check if backend files changed id: backend-changes - uses: dorny/paths-filter@v3 + uses: dorny/paths-filter@v4 with: filters: | backend: @@ -153,7 +153,7 @@ jobs: - name: Cache dependencies if: steps.backend-changes.outputs.backend == 'true' - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: | ~/.cache/uv @@ -171,7 +171,7 @@ jobs: - name: Cache pre-commit hooks if: steps.backend-changes.outputs.backend == 'true' - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-backend-${{ hashFiles('.pre-commit-config.yaml') }} @@ -212,7 +212,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -221,18 +221,16 @@ jobs: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} 2>/dev/null || git fetch origin ${{ github.base_ref }} 2>/dev/null || true - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: node-version: '18' - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - version: latest + uses: pnpm/action-setup@v6 - name: Check if frontend files changed id: frontend-changes - uses: dorny/paths-filter@v3 + uses: dorny/paths-filter@v4 with: filters: | web: @@ -254,7 +252,7 @@ jobs: run: pip install pre-commit - name: Cache pre-commit hooks - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-frontend-${{ hashFiles('.pre-commit-config.yaml') }} From 20152b1243ab0650c6c197b67fd9a61d8a2b1639 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 18:41:05 +0530 Subject: [PATCH 52/63] chore: update Node.js version to 20 in GitHub Actions workflow --- .github/workflows/code-quality.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 2590f98b3..f9042fee0 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -223,7 +223,7 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v6 with: - node-version: '18' + node-version: '20' - name: Install pnpm uses: pnpm/action-setup@v6 From 2de8ea550155ba17fb15d98f686792417d1e7e92 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 19:25:39 +0530 Subject: [PATCH 53/63] chore: update biome version in pre-commit configuration --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c02d2068b..1d814b430 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -67,7 +67,7 @@ repos: # Biome check for surfsense_web - id: biome-check-web name: biome-check-web - entry: bash -c 'cd surfsense_web && npx @biomejs/biome check --diagnostic-level=error .' + entry: bash -c 'cd surfsense_web && npx @biomejs/biome@2.4.6 check --diagnostic-level=error .' language: system files: ^surfsense_web/ pass_filenames: false From a786574484e223a41cf5c04fb4c8583d9aa3c152 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 19:29:09 +0530 Subject: [PATCH 54/63] chore: simplify job names in GitHub Actions workflow --- .github/workflows/code-quality.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index f9042fee0..fcecdd23f 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -11,7 +11,7 @@ concurrency: jobs: file-quality: - name: File Quality Checks + name: File Quality runs-on: ubuntu-latest if: github.event.pull_request.draft == false @@ -125,7 +125,7 @@ jobs: exit ${exit_code:-0} python-backend: - name: Python Backend Quality + name: Backend Quality runs-on: ubuntu-latest if: github.event.pull_request.draft == false @@ -206,7 +206,7 @@ jobs: exit ${exit_code:-0} typescript-frontend: - name: TypeScript/JavaScript Quality + name: Frontend Quality runs-on: ubuntu-latest if: github.event.pull_request.draft == false From c0aa4261ac0f8b9444e26ff9888a7c1e22815903 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 16:11:07 +0200 Subject: [PATCH 55/63] perf(mcp): persist list_tools discovery in connector.config.cached_tools Skip the ~1-3s MCP initialize + list_tools handshake on every cache miss by reading tool definitions from the connector row we already load. Lazy populate on first miss, self-heal on corrupt cache, zero schema migration. --- .../app/agents/new_chat/tools/mcp_tool.py | 122 ++++++++++------ .../agents/new_chat/tools/mcp_tools_cache.py | 94 +++++++++++++ .../new_chat/tools/test_mcp_tools_cache.py | 130 ++++++++++++++++++ 3 files changed, 304 insertions(+), 42 deletions(-) create mode 100644 surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py create mode 100644 surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py index b3c26f331..3d4679fb8 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py @@ -36,6 +36,11 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.middleware.dedup_tool_calls import dedup_key_full_args from app.agents.new_chat.tools.hitl import request_approval from app.agents.new_chat.tools.mcp_client import MCPClient +from app.agents.new_chat.tools.mcp_tools_cache import ( + CachedMCPTools, + read_cached_tools, + write_cached_tools, +) from app.db import SearchSourceConnector from app.services.mcp_oauth.registry import MCP_SERVICES, get_service_by_connector_type from app.utils.perf import get_perf_logger @@ -516,6 +521,7 @@ async def _load_http_mcp_tools( is_generic_mcp: bool = False, *, bypass_internal_hitl: bool = False, + cached_tools: CachedMCPTools | None = None, ) -> list[StructuredTool]: """Load tools from an HTTP-based MCP server. @@ -526,6 +532,8 @@ async def _load_http_mcp_tools( readonly_tools: Tool names that skip HITL approval (read-only operations). tool_name_prefix: If set, each tool name is prefixed for multi-account disambiguation (e.g. ``linear_25``). + cached_tools: If provided, skip live discovery and rebuild wrappers + from the persisted definitions. """ tools: list[StructuredTool] = [] @@ -549,15 +557,23 @@ async def _load_http_mcp_tools( allowed_set = set(allowed_tools) if allowed_tools else None - async def _discover(disc_headers: dict[str, str]) -> list[dict[str, Any]]: - """Connect, initialize, and list tools from the MCP server.""" + async def _discover( + disc_headers: dict[str, str], + ) -> tuple[dict[str, str | None], list[dict[str, Any]]]: + """Connect, initialize, and list tools — returns (serverInfo, tools).""" async with ( streamablehttp_client(url, headers=disc_headers) as (read, write, _), ClientSession(read, write) as session, ): - await session.initialize() + init_result = await session.initialize() + server_info: dict[str, str | None] = {"name": None, "version": None} + si = getattr(init_result, "serverInfo", None) + if si is not None: + server_info["name"] = getattr(si, "name", None) + server_info["version"] = getattr(si, "version", None) + response = await session.list_tools() - return [ + return server_info, [ { "name": tool.name, "description": tool.description or "", @@ -568,47 +584,65 @@ async def _load_http_mcp_tools( for tool in response.tools ] - try: - tool_definitions = await _discover(headers) - except Exception as first_err: - if not _is_auth_error(first_err) or connector_id is None: - logger.exception( - "Failed to connect to HTTP MCP server at '%s' (connector %d): %s", - url, - connector_id, - first_err, - ) - return tools - - logger.warning( - "HTTP MCP discovery for connector %d got 401 — attempting token refresh", - connector_id, - ) - fresh_headers = await _force_refresh_and_get_headers(connector_id) - if fresh_headers is None: - await _mark_connector_auth_expired(connector_id) - logger.error( - "HTTP MCP discovery for connector %d: token refresh failed, marking auth_expired", - connector_id, - ) - return tools - + if cached_tools is not None: + tool_definitions = [ + { + "name": td.name, + "description": td.description, + "input_schema": td.input_schema, + } + for td in cached_tools.tools + ] + else: try: - tool_definitions = await _discover(fresh_headers) - headers = fresh_headers - logger.info( - "HTTP MCP discovery for connector %d succeeded after 401 recovery", + server_info, tool_definitions = await _discover(headers) + except Exception as first_err: + if not _is_auth_error(first_err) or connector_id is None: + logger.exception( + "Failed to connect to HTTP MCP server at '%s' (connector %d): %s", + url, + connector_id, + first_err, + ) + return tools + + logger.warning( + "HTTP MCP discovery for connector %d got 401 — attempting token refresh", connector_id, ) - except Exception as retry_err: - logger.exception( - "HTTP MCP discovery for connector %d still failing after refresh: %s", - connector_id, - retry_err, - ) - if _is_auth_error(retry_err): + fresh_headers = await _force_refresh_and_get_headers(connector_id) + if fresh_headers is None: await _mark_connector_auth_expired(connector_id) - return tools + logger.error( + "HTTP MCP discovery for connector %d: token refresh failed, marking auth_expired", + connector_id, + ) + return tools + + try: + server_info, tool_definitions = await _discover(fresh_headers) + headers = fresh_headers + logger.info( + "HTTP MCP discovery for connector %d succeeded after 401 recovery", + connector_id, + ) + except Exception as retry_err: + logger.exception( + "HTTP MCP discovery for connector %d still failing after refresh: %s", + connector_id, + retry_err, + ) + if _is_auth_error(retry_err): + await _mark_connector_auth_expired(connector_id) + return tools + + await write_cached_tools( + connector_id, + tool_definitions, + server_name=server_info.get("name"), + server_version=server_info.get("version"), + transport=server_config.get("transport", "streamable-http"), + ) total_discovered = len(tool_definitions) @@ -1099,6 +1133,7 @@ async def load_mcp_tools( "tool_name_prefix": tool_name_prefix, "transport": server_config.get("transport", "stdio"), "is_generic_mcp": svc_cfg is None, + "cached_tools": read_cached_tools(connector), } ) @@ -1112,6 +1147,7 @@ async def load_mcp_tools( async def _discover_one(task: dict[str, Any]) -> list[StructuredTool]: discover_start = time.perf_counter() transport = task["transport"] + cached_tools = task.get("cached_tools") try: if transport in ("streamable-http", "http", "sse"): result = await asyncio.wait_for( @@ -1125,6 +1161,7 @@ async def load_mcp_tools( tool_name_prefix=task["tool_name_prefix"], is_generic_mcp=task.get("is_generic_mcp", False), bypass_internal_hitl=bypass_internal_hitl, + cached_tools=cached_tools, ), timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, ) @@ -1140,12 +1177,13 @@ async def load_mcp_tools( timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, ) _perf_log.info( - "[mcp_discover] connector=%s name=%r transport=%s tools=%d elapsed=%.3fs", + "[mcp_discover] connector=%s name=%r transport=%s tools=%d elapsed=%.3fs cache=%s", task["connector_id"], task["connector_name"], transport, len(result), time.perf_counter() - discover_start, + "hit" if cached_tools is not None else "miss", ) return result except TimeoutError: diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py new file mode 100644 index 000000000..3c79ed1d3 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py @@ -0,0 +1,94 @@ +"""Persist MCP ``list_tools`` results in ``SearchSourceConnector.config.cached_tools``.""" + +from __future__ import annotations + +import logging +from datetime import UTC, datetime +from typing import Any + +from pydantic import BaseModel, Field, ValidationError +from sqlalchemy import select +from sqlalchemy.orm.attributes import flag_modified + +from app.db import SearchSourceConnector, async_session_maker + +logger = logging.getLogger(__name__) + + +class CachedMCPToolDef(BaseModel): + name: str + description: str = "" + input_schema: dict[str, Any] = Field(default_factory=dict) + + +class CachedMCPTools(BaseModel): + discovered_at: datetime + server_version: str | None = None + server_name: str | None = None + transport: str | None = None + tools: list[CachedMCPToolDef] + + +def read_cached_tools(connector: SearchSourceConnector) -> CachedMCPTools | None: + """Return parsed cached tools or ``None`` if missing / corrupt (caller falls back to live discovery).""" + cfg = connector.config or {} + raw = cfg.get("cached_tools") + if not raw or not isinstance(raw, dict): + return None + + try: + return CachedMCPTools.model_validate(raw) + except ValidationError as exc: + logger.warning( + "MCP connector %d has corrupt cached_tools — falling back to live discovery: %s", + connector.id, + exc, + ) + return None + + +async def write_cached_tools( + connector_id: int, + tool_definitions: list[dict[str, Any]], + *, + server_name: str | None = None, + server_version: str | None = None, + transport: str | None = None, +) -> None: + """Best-effort persist; uses its own session so a write failure cannot poison the caller's transaction.""" + payload = CachedMCPTools( + discovered_at=datetime.now(UTC), + server_version=server_version, + server_name=server_name, + transport=transport, + tools=[CachedMCPToolDef.model_validate(td) for td in tool_definitions], + ) + + try: + async with async_session_maker() as session: + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == connector_id, + ) + ) + connector = result.scalars().first() + if connector is None: + return + + cfg = dict(connector.config or {}) + cfg["cached_tools"] = payload.model_dump(mode="json") + connector.config = cfg + flag_modified(connector, "config") + await session.commit() + + logger.info( + "Persisted cached_tools for MCP connector %d (%d tools)", + connector_id, + len(payload.tools), + ) + except Exception: + logger.warning( + "Failed to persist cached_tools for MCP connector %d", + connector_id, + exc_info=True, + ) diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py new file mode 100644 index 000000000..bae97ba9f --- /dev/null +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py @@ -0,0 +1,130 @@ +"""Unit tests for ``mcp_tools_cache``.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from types import SimpleNamespace + +import pytest + +from app.agents.new_chat.tools.mcp_tools_cache import ( + CachedMCPToolDef, + CachedMCPTools, + read_cached_tools, +) + +pytestmark = pytest.mark.unit + + +def _make_connector(config: dict | None) -> SimpleNamespace: + return SimpleNamespace(id=42, config=config) + + +def test_read_returns_none_when_config_is_none() -> None: + assert read_cached_tools(_make_connector(None)) is None + + +def test_read_returns_none_when_cached_tools_missing() -> None: + assert read_cached_tools(_make_connector({"server_config": {}})) is None + + +def test_read_returns_none_when_cached_tools_is_not_a_dict() -> None: + assert read_cached_tools(_make_connector({"cached_tools": []})) is None + assert read_cached_tools(_make_connector({"cached_tools": "stale"})) is None + + +def test_read_parses_minimal_valid_payload() -> None: + parsed = read_cached_tools( + _make_connector( + { + "cached_tools": { + "discovered_at": "2026-05-20T10:00:00+00:00", + "tools": [ + { + "name": "list_issues", + "description": "List Linear issues", + "input_schema": {"type": "object"}, + } + ], + } + } + ) + ) + assert parsed is not None + assert parsed.server_version is None + assert parsed.server_name is None + assert parsed.transport is None + assert len(parsed.tools) == 1 + assert parsed.tools[0].name == "list_issues" + + +def test_read_parses_full_payload_with_serverinfo() -> None: + parsed = read_cached_tools( + _make_connector( + { + "cached_tools": { + "discovered_at": "2026-05-20T10:00:00+00:00", + "server_version": "1.2.3", + "server_name": "atlassian-mcp", + "transport": "streamable-http", + "tools": [ + {"name": "create_issue", "input_schema": {}}, + {"name": "list_issues", "input_schema": {}}, + ], + } + } + ) + ) + assert parsed is not None + assert parsed.server_version == "1.2.3" + assert parsed.server_name == "atlassian-mcp" + assert parsed.transport == "streamable-http" + assert [t.name for t in parsed.tools] == ["create_issue", "list_issues"] + + +def test_read_returns_none_for_corrupt_payload(caplog) -> None: + parsed = read_cached_tools( + _make_connector( + { + "cached_tools": { + "discovered_at": "not-a-date", + "tools": "should-be-a-list", + } + } + ) + ) + assert parsed is None + assert any("corrupt cached_tools" in r.getMessage() for r in caplog.records) + + +def test_read_returns_none_when_tools_missing() -> None: + parsed = read_cached_tools( + _make_connector( + {"cached_tools": {"discovered_at": "2026-05-20T10:00:00+00:00"}} + ) + ) + assert parsed is None + + +def test_tool_def_defaults_description_and_schema() -> None: + td = CachedMCPToolDef.model_validate({"name": "ping"}) + assert td.description == "" + assert td.input_schema == {} + + +def test_model_dump_json_mode_is_round_trippable() -> None: + original = CachedMCPTools( + discovered_at=datetime(2026, 5, 20, 10, 0, 0, tzinfo=UTC), + server_version="1.2.3", + server_name="atlassian-mcp", + transport="streamable-http", + tools=[CachedMCPToolDef(name="list_issues")], + ) + payload = original.model_dump(mode="json") + + assert payload["discovered_at"] == "2026-05-20T10:00:00Z" + assert payload["tools"][0]["name"] == "list_issues" + + reparsed = CachedMCPTools.model_validate(payload) + assert reparsed.discovered_at == original.discovered_at + assert reparsed.tools[0].name == "list_issues" From 844b8ba6090408f9ebc9c9eb8a1ce03b291e21e8 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 19:42:47 +0530 Subject: [PATCH 56/63] chore: refactor GitHub Actions workflow to improve backend change detection and job dependencies --- .github/workflows/backend-tests.yml | 82 ++++++++++++++++------------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/.github/workflows/backend-tests.yml b/.github/workflows/backend-tests.yml index 6e0a6db6d..ec8a2e7f6 100644 --- a/.github/workflows/backend-tests.yml +++ b/.github/workflows/backend-tests.yml @@ -10,10 +10,30 @@ concurrency: cancel-in-progress: true jobs: + changes: + name: Changes + runs-on: ubuntu-latest + outputs: + backend: ${{ steps.filter.outputs.backend }} + + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Check changed files + id: filter + uses: dorny/paths-filter@v4 + with: + filters: | + backend: + - 'surfsense_backend/**' + - '.github/workflows/backend-tests.yml' + unit-tests: name: Unit Tests runs-on: ubuntu-latest - if: github.event.pull_request.draft == false + needs: changes + if: ${{ github.event.pull_request.draft == false && needs.changes.outputs.backend == 'true' }} env: EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 @@ -21,26 +41,15 @@ jobs: - name: Checkout code uses: actions/checkout@v6 - - name: Check if backend files changed - id: backend-changes - uses: dorny/paths-filter@v3 - with: - filters: | - backend: - - 'surfsense_backend/**' - - name: Set up Python - if: steps.backend-changes.outputs.backend == 'true' - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install UV - if: steps.backend-changes.outputs.backend == 'true' - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v8.1.0 - name: Cache dependencies - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: | @@ -51,26 +60,24 @@ jobs: python-deps- - name: Cache HuggingFace models - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: ~/.cache/huggingface key: hf-models-${{ env.EMBEDDING_MODEL }} - name: Install dependencies - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend run: uv sync - name: Run unit tests - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend run: uv run pytest -m unit integration-tests: name: Integration Tests runs-on: ubuntu-latest - if: github.event.pull_request.draft == false + needs: changes + if: ${{ github.event.pull_request.draft == false && needs.changes.outputs.backend == 'true' }} env: EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 @@ -93,26 +100,15 @@ jobs: - name: Checkout code uses: actions/checkout@v6 - - name: Check if backend files changed - id: backend-changes - uses: dorny/paths-filter@v3 - with: - filters: | - backend: - - 'surfsense_backend/**' - - name: Set up Python - if: steps.backend-changes.outputs.backend == 'true' - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install UV - if: steps.backend-changes.outputs.backend == 'true' - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v8.1.0 - name: Cache dependencies - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: | @@ -123,19 +119,16 @@ jobs: python-deps- - name: Cache HuggingFace models - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: ~/.cache/huggingface key: hf-models-${{ env.EMBEDDING_MODEL }} - name: Install dependencies - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend run: uv sync - name: Run integration tests - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend env: TEST_DATABASE_URL: postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_test @@ -146,14 +139,29 @@ jobs: test-gate: name: Test Gate runs-on: ubuntu-latest - needs: [unit-tests, integration-tests] + needs: [changes, unit-tests, integration-tests] if: always() steps: - name: Check all test jobs run: | - if [[ "${{ needs.unit-tests.result }}" == "failure" || - "${{ needs.integration-tests.result }}" == "failure" ]]; then + if [[ "${{ needs.changes.result }}" == "failure" || "${{ needs.changes.result }}" == "cancelled" ]]; then + echo "Backend change detection failed" + exit 1 + fi + + if [[ "${{ github.event.pull_request.draft }}" == "true" ]]; then + echo "Draft PR; backend tests skipped" + exit 0 + fi + + if [[ "${{ needs.changes.outputs.backend }}" != "true" ]]; then + echo "No backend changes detected; backend tests skipped" + exit 0 + fi + + if [[ "${{ needs.unit-tests.result }}" != "success" || + "${{ needs.integration-tests.result }}" != "success" ]]; then echo "Backend tests failed" exit 1 else From a6a0f7a373b039997ba8f1294edd30aa4cc39170 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 20:03:51 +0530 Subject: [PATCH 57/63] chore: streamline GitHub Actions workflow by removing change detection job and simplifying test conditions --- .github/workflows/backend-tests.yml | 49 +++++------------------------ 1 file changed, 8 insertions(+), 41 deletions(-) diff --git a/.github/workflows/backend-tests.yml b/.github/workflows/backend-tests.yml index ec8a2e7f6..7916dcf50 100644 --- a/.github/workflows/backend-tests.yml +++ b/.github/workflows/backend-tests.yml @@ -4,36 +4,19 @@ on: pull_request: branches: [main, dev] types: [opened, synchronize, reopened, ready_for_review] + paths: + - 'surfsense_backend/**' + - '.github/workflows/backend-tests.yml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: - changes: - name: Changes - runs-on: ubuntu-latest - outputs: - backend: ${{ steps.filter.outputs.backend }} - - steps: - - name: Checkout code - uses: actions/checkout@v6 - - - name: Check changed files - id: filter - uses: dorny/paths-filter@v4 - with: - filters: | - backend: - - 'surfsense_backend/**' - - '.github/workflows/backend-tests.yml' - unit-tests: name: Unit Tests runs-on: ubuntu-latest - needs: changes - if: ${{ github.event.pull_request.draft == false && needs.changes.outputs.backend == 'true' }} + if: github.event.pull_request.draft == false env: EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 @@ -76,8 +59,7 @@ jobs: integration-tests: name: Integration Tests runs-on: ubuntu-latest - needs: changes - if: ${{ github.event.pull_request.draft == false && needs.changes.outputs.backend == 'true' }} + if: github.event.pull_request.draft == false env: EMBEDDING_MODEL: sentence-transformers/all-MiniLM-L6-v2 @@ -139,29 +121,14 @@ jobs: test-gate: name: Test Gate runs-on: ubuntu-latest - needs: [changes, unit-tests, integration-tests] + needs: [unit-tests, integration-tests] if: always() steps: - name: Check all test jobs run: | - if [[ "${{ needs.changes.result }}" == "failure" || "${{ needs.changes.result }}" == "cancelled" ]]; then - echo "Backend change detection failed" - exit 1 - fi - - if [[ "${{ github.event.pull_request.draft }}" == "true" ]]; then - echo "Draft PR; backend tests skipped" - exit 0 - fi - - if [[ "${{ needs.changes.outputs.backend }}" != "true" ]]; then - echo "No backend changes detected; backend tests skipped" - exit 0 - fi - - if [[ "${{ needs.unit-tests.result }}" != "success" || - "${{ needs.integration-tests.result }}" != "success" ]]; then + if [[ "${{ needs.unit-tests.result }}" == "failure" || + "${{ needs.integration-tests.result }}" == "failure" ]]; then echo "Backend tests failed" exit 1 else From 58a975205da99e105d92924e7ffea4820a48fa8e Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 20 May 2026 20:06:38 +0530 Subject: [PATCH 58/63] chore: add workflow file to change detection filters for backend and frontend jobs --- .github/workflows/code-quality.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index fcecdd23f..27e587794 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -150,6 +150,7 @@ jobs: filters: | backend: - 'surfsense_backend/**' + - '.github/workflows/code-quality.yml' - name: Cache dependencies if: steps.backend-changes.outputs.backend == 'true' @@ -235,8 +236,10 @@ jobs: filters: | web: - 'surfsense_web/**' + - '.github/workflows/code-quality.yml' extension: - 'surfsense_browser_extension/**' + - '.github/workflows/code-quality.yml' - name: Install dependencies for web if: steps.frontend-changes.outputs.web == 'true' From 704d1bf18f507025220c269d412fc0da72151f2e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 17:43:27 +0200 Subject: [PATCH 59/63] refactor(mcp): per-connector cache refresh on lifecycle events Collapse the invalidate + warmup pair into a single refresh_mcp_tools_cache_for_connector(connector_id, search_space_id) helper and scope live discovery to the one connector that changed instead of the whole search space. - new mcp_tool.discover_single_mcp_connector: load one connector, refresh OAuth if needed, force live MCP discovery so its cached_tools row is rewritten; returned wrappers are discarded since the in-process LRU is rebuilt lazily on the next user query - mcp_tools_cache.refresh_mcp_tools_cache_for_connector: synchronously evicts the per-space LRU (LRU keys cannot scope finer) and schedules the per-connector prefetch via loop.create_task - routes (OAuth callback, MCP POST, MCP PUT) collapse their two back-to-back calls into a single refresh call; DELETE handlers keep using bare invalidate_mcp_tools_cache (nothing to prefetch) No new automated tests: the new functions are I/O glue (DB + network) where mocked unit tests would test implementation rather than behavior. The existing 9 unit tests for the cached_tools data shape are unchanged. --- .../app/agents/new_chat/tools/mcp_tool.py | 88 +++++++++++++++++++ .../agents/new_chat/tools/mcp_tools_cache.py | 51 +++++++++++ .../app/routes/mcp_oauth_route.py | 21 +++-- .../routes/search_source_connectors_routes.py | 12 ++- 4 files changed, 161 insertions(+), 11 deletions(-) diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py index 3d4679fb8..6c4cfb6be 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py @@ -1007,6 +1007,94 @@ def invalidate_mcp_tools_cache(search_space_id: int | None = None) -> None: _mcp_tools_cache.clear() +async def discover_single_mcp_connector(connector_id: int) -> None: + """Force live MCP discovery for one connector so its ``cached_tools`` row is fresh. + + ``_load_http_mcp_tools`` persists ``cached_tools`` as a side effect of any + live discovery; passing ``cached_tools=None`` here guarantees we go to the + network. The returned wrappers are discarded — the in-process LRU is + rebuilt lazily on the next user query. Stdio connectors are not cached and + are skipped. + """ + from app.db import async_session_maker + + started = time.perf_counter() + try: + async with async_session_maker() as session: + connector = await session.get(SearchSourceConnector, connector_id) + if connector is None: + logger.info( + "discover_single_mcp_connector: connector %d not found", + connector_id, + ) + return + + cfg = connector.config or {} + server_config = cfg.get("server_config", {}) + if not server_config or not isinstance(server_config, dict): + return + + transport = server_config.get("transport", "stdio") + if transport not in ("streamable-http", "http", "sse"): + return + + if cfg.get("mcp_oauth"): + server_config = await _maybe_refresh_mcp_oauth_token( + session, connector, cfg, server_config + ) + cfg = connector.config or {} + server_config = _inject_oauth_headers(cfg, server_config) + if server_config is None: + logger.info( + "discover_single_mcp_connector: OAuth token unavailable for connector %d", + connector_id, + ) + return + + ct = ( + connector.connector_type.value + if hasattr(connector.connector_type, "value") + else str(connector.connector_type) + ) + svc_cfg = get_service_by_connector_type(ct) + allowed_tools = svc_cfg.allowed_tools if svc_cfg else [] + readonly_tools = svc_cfg.readonly_tools if svc_cfg else frozenset() + + await asyncio.wait_for( + _load_http_mcp_tools( + connector.id, + connector.name, + server_config, + trusted_tools=cfg.get("trusted_tools", []), + allowed_tools=allowed_tools, + readonly_tools=readonly_tools, + tool_name_prefix=None, + is_generic_mcp=svc_cfg is None, + bypass_internal_hitl=True, + cached_tools=None, + ), + timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, + ) + + _perf_log.info( + "[mcp_prefetch] connector=%s elapsed=%.3fs", + connector_id, + time.perf_counter() - started, + ) + except TimeoutError: + logger.warning( + "discover_single_mcp_connector: connector %d timed out after %ds", + connector_id, + _MCP_DISCOVERY_TIMEOUT_SECONDS, + ) + except Exception: + logger.warning( + "discover_single_mcp_connector: failed for connector %d", + connector_id, + exc_info=True, + ) + + async def load_mcp_tools( session: AsyncSession, search_space_id: int, diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py index 3c79ed1d3..81027e1c4 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import logging from datetime import UTC, datetime from typing import Any @@ -14,6 +15,8 @@ from app.db import SearchSourceConnector, async_session_maker logger = logging.getLogger(__name__) +_pending_prefetch_tasks: set[asyncio.Task[None]] = set() + class CachedMCPToolDef(BaseModel): name: str @@ -92,3 +95,51 @@ async def write_cached_tools( connector_id, exc_info=True, ) + + +def refresh_mcp_tools_cache_for_connector( + connector_id: int, + search_space_id: int, +) -> None: + """Maintain the MCP tool cache after a single-connector lifecycle event. + + Synchronously evicts the in-process LRU for the connector's search space + (LRU keys are per-space, so eviction cannot be scoped finer), then schedules + a background live discovery for this connector alone so its persisted + ``cached_tools`` row is refreshed before the next user query. + + Idempotent. Eviction is best-effort; prefetch is best-effort and only runs + when an event loop is available. Neither path raises. + """ + try: + from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache + + invalidate_mcp_tools_cache(search_space_id) + except Exception: + logger.debug( + "MCP in-process cache eviction skipped for space %d", + search_space_id, + exc_info=True, + ) + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return + + task = loop.create_task(_run_connector_prefetch(connector_id)) + _pending_prefetch_tasks.add(task) + task.add_done_callback(_pending_prefetch_tasks.discard) + + +async def _run_connector_prefetch(connector_id: int) -> None: + from app.agents.new_chat.tools.mcp_tool import discover_single_mcp_connector + + try: + await discover_single_mcp_connector(connector_id) + except Exception: + logger.warning( + "MCP background prefetch failed for connector_id=%d", + connector_id, + exc_info=True, + ) diff --git a/surfsense_backend/app/routes/mcp_oauth_route.py b/surfsense_backend/app/routes/mcp_oauth_route.py index 1abc1f1ec..57248d631 100644 --- a/surfsense_backend/app/routes/mcp_oauth_route.py +++ b/surfsense_backend/app/routes/mcp_oauth_route.py @@ -428,7 +428,7 @@ async def mcp_oauth_callback( await session.commit() await session.refresh(db_connector) - _invalidate_cache(space_id) + _refresh_mcp_cache(db_connector.id, space_id) logger.info( "Re-authenticated %s MCP connector %s for user %s", @@ -481,7 +481,7 @@ async def mcp_oauth_callback( detail="A connector for this service already exists.", ) from e - _invalidate_cache(space_id) + _refresh_mcp_cache(new_connector.id, space_id) logger.info( "Created %s MCP connector %s for user %s in space %s", @@ -658,10 +658,17 @@ async def reauth_mcp_service( # --------------------------------------------------------------------------- -def _invalidate_cache(space_id: int) -> None: - try: - from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache +def _refresh_mcp_cache(connector_id: int, space_id: int) -> None: + """Evict the in-process MCP tool LRU and schedule background prefetch. - invalidate_mcp_tools_cache(space_id) + Wraps :func:`refresh_mcp_tools_cache_for_connector` so any failure is + isolated from the OAuth response flow. + """ + try: + from app.agents.new_chat.tools.mcp_tools_cache import ( + refresh_mcp_tools_cache_for_connector, + ) + + refresh_mcp_tools_cache_for_connector(connector_id, space_id) except Exception: - logger.debug("MCP cache invalidation skipped", exc_info=True) + logger.debug("MCP cache refresh skipped", exc_info=True) diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index e9ffb7050..1338fe16b 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -2650,9 +2650,11 @@ async def create_mcp_connector( f"for user {user.id} in search space {search_space_id}" ) - from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache + from app.agents.new_chat.tools.mcp_tools_cache import ( + refresh_mcp_tools_cache_for_connector, + ) - invalidate_mcp_tools_cache(search_space_id) + refresh_mcp_tools_cache_for_connector(db_connector.id, search_space_id) connector_read = SearchSourceConnectorRead.model_validate(db_connector) return MCPConnectorRead.from_connector(connector_read) @@ -2828,9 +2830,11 @@ async def update_mcp_connector( logger.info(f"Updated MCP connector {connector_id}") - from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache + from app.agents.new_chat.tools.mcp_tools_cache import ( + refresh_mcp_tools_cache_for_connector, + ) - invalidate_mcp_tools_cache(connector.search_space_id) + refresh_mcp_tools_cache_for_connector(connector.id, connector.search_space_id) connector_read = SearchSourceConnectorRead.model_validate(connector) return MCPConnectorRead.from_connector(connector_read) From 2be3f04df50e00c980c6bf388ec8d16cdf7f6aa8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 20 May 2026 19:11:00 +0200 Subject: [PATCH 60/63] chore(scripts): drop one-off MCP session lifetime probe The probe answered its question (informing the cached_tools persistence design). Future MCP session-pooling work, if revived, can recreate it. --- .../scripts/probe_mcp_session_lifetime.py | 563 ------------------ 1 file changed, 563 deletions(-) delete mode 100644 surfsense_backend/scripts/probe_mcp_session_lifetime.py diff --git a/surfsense_backend/scripts/probe_mcp_session_lifetime.py b/surfsense_backend/scripts/probe_mcp_session_lifetime.py deleted file mode 100644 index 66be5bc14..000000000 --- a/surfsense_backend/scripts/probe_mcp_session_lifetime.py +++ /dev/null @@ -1,563 +0,0 @@ -"""Probe MCP server session lifetime / staleness behavior — read-only. - -Goal ----- -Empirically answer two questions for our actual third-party MCP servers -(Atlassian, Linear, Slack, ClickUp, Airtable, ...): - -1. How expensive is the initial ``initialize`` handshake (``init=`` cost)? -2. How long can a ``ClientSession`` sit idle and still survive a - subsequent ``list_tools()`` call? - -This script informs the design choice between - -* per-call sessions (current, ~1s init tax per call), -* per-turn session reuse (LangChain-style, holds a session for the - duration of a chat turn), -* a long-lived session pool (IBM-style, sessions reused across turns). - -The probe is read-only: it only ever calls ``session.list_tools()``, -which is the safest MCP method. No tool calls against user data are -performed. - -Usage ------ -Run from the repo root or from ``surfsense_backend/``:: - - uv run python -m scripts.probe_mcp_session_lifetime - uv run python -m scripts.probe_mcp_session_lifetime --quick - uv run python -m scripts.probe_mcp_session_lifetime --connectors 7,19,20 - uv run python -m scripts.probe_mcp_session_lifetime --intervals 5,30,60,300 - -Output ------- -* Live progress to stderr (``[connector=7 t=+30s] OK 0.142s``). -* Final per-connector table to stdout. -* Raw results JSON to ``./mcp_session_probe_.json``. - -The default test reaches 1800s of idle (~30 min). Use ``--quick`` to -stop at 60s for fast iteration. All connectors probe concurrently so -total wall-clock time equals the longest interval, not the sum. -""" - -from __future__ import annotations - -import argparse -import asyncio -import json -import logging -import os -import sys -import time -from dataclasses import asdict, dataclass, field -from datetime import datetime -from typing import Any - -_HERE = os.path.dirname(os.path.abspath(__file__)) -_BACKEND_ROOT = os.path.dirname(_HERE) -if _BACKEND_ROOT not in sys.path: - sys.path.insert(0, _BACKEND_ROOT) - -import httpx # noqa: E402 -from mcp import ClientSession # noqa: E402 -from mcp.client.streamable_http import streamable_http_client # noqa: E402 -from sqlalchemy import cast, select # noqa: E402 -from sqlalchemy.dialects.postgresql import JSONB # noqa: E402 - -from app.agents.new_chat.tools.mcp_tool import ( # noqa: E402 - _inject_oauth_headers, - _maybe_refresh_mcp_oauth_token, -) -from app.db import SearchSourceConnector, async_session_maker # noqa: E402 - -logging.basicConfig( - level=logging.WARNING, - format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s", - stream=sys.stderr, -) -logging.getLogger("httpx").setLevel(logging.ERROR) -logging.getLogger("mcp").setLevel(logging.ERROR) -logger = logging.getLogger("mcp_probe") -logger.setLevel(logging.INFO) - - -DEFAULT_INTERVALS_SECONDS = [5, 30, 60, 300, 900, 1800] -QUICK_INTERVALS_SECONDS = [5, 30, 60] -PER_CALL_TIMEOUT_SECONDS = 60.0 - - -@dataclass -class CheckpointResult: - """One ``list_tools()`` call against a long-lived session.""" - - idle_seconds_target: int - elapsed_since_open_seconds: float - elapsed_since_last_call_seconds: float - success: bool - latency_seconds: float | None - tools_returned: int | None - error_type: str | None - error_message: str | None - - -@dataclass -class ConnectorProbeResult: - """Per-connector aggregated probe outcome.""" - - connector_id: int - connector_name: str - connector_type: str - url: str - init_latency_seconds: float | None - first_call_latency_seconds: float | None - checkpoints: list[CheckpointResult] = field(default_factory=list) - fatal_error: str | None = None - - -# --------------------------------------------------------------------------- -# Connector loading + auth -# --------------------------------------------------------------------------- - - -async def _fetch_connectors( - connector_ids: list[int] | None, -) -> list[SearchSourceConnector]: - """Pull every MCP-shaped connector (or only the requested IDs).""" - async with async_session_maker() as session: - stmt = select(SearchSourceConnector).filter( - cast(SearchSourceConnector.config, JSONB).has_key("server_config"), - ) - if connector_ids: - stmt = stmt.filter(SearchSourceConnector.id.in_(connector_ids)) - result = await session.execute(stmt) - connectors = list(result.scalars()) - - if connector_ids: - found_ids = {c.id for c in connectors} - missing = [cid for cid in connector_ids if cid not in found_ids] - if missing: - logger.warning("Requested connector IDs not found: %s", missing) - return connectors - - -async def _resolve_authed_server_config( - connector: SearchSourceConnector, -) -> dict[str, Any] | None: - """Refresh OAuth (if needed) and return a server_config with auth headers. - - Returns ``None`` if the connector cannot be probed (missing url, - decrypt failure, no refresh token, etc.). - """ - cfg = connector.config or {} - server_config = cfg.get("server_config", {}) - if not isinstance(server_config, dict): - return None - - if cfg.get("mcp_oauth"): - async with async_session_maker() as session: - attached = await session.get(SearchSourceConnector, connector.id) - if attached is None: - return None - refreshed = await _maybe_refresh_mcp_oauth_token( - session, - attached, - attached.config or {}, - server_config, - ) - attached_cfg = attached.config or {} - server_config = _inject_oauth_headers(attached_cfg, refreshed) - if server_config is None: - return None - return server_config - - -# --------------------------------------------------------------------------- -# The actual probe -# --------------------------------------------------------------------------- - - -def _classify_error(exc: BaseException) -> tuple[str, str]: - """Return ``(short_label, human_message)`` for a failed call.""" - name = type(exc).__name__ - msg = str(exc) or repr(exc) - if isinstance(exc, asyncio.TimeoutError): - return "timeout", f"call exceeded {PER_CALL_TIMEOUT_SECONDS}s" - if "404" in msg or "Not Found" in msg or "session" in msg.lower(): - return "session_expired", msg - if "401" in msg or "Unauthorized" in msg: - return "auth_401", msg - if "ClosedResourceError" in name or "Closed" in name: - return "stream_closed", msg - if "Connection" in name or "ConnectError" in name: - return "connection_error", msg - return name, msg - - -async def _probe_one_connector( - connector: SearchSourceConnector, - intervals: list[int], -) -> ConnectorProbeResult: - """Open a single long-lived session, call ``list_tools`` at each interval.""" - connector_type = ( - connector.connector_type.value - if hasattr(connector.connector_type, "value") - else str(connector.connector_type) - ) - server_config = await _resolve_authed_server_config(connector) - if server_config is None: - return ConnectorProbeResult( - connector_id=connector.id, - connector_name=connector.name, - connector_type=connector_type, - url="(unresolved)", - init_latency_seconds=None, - first_call_latency_seconds=None, - fatal_error="failed_to_resolve_server_config", - ) - - url = server_config.get("url") - headers = server_config.get("headers", {}) - if not url: - return ConnectorProbeResult( - connector_id=connector.id, - connector_name=connector.name, - connector_type=connector_type, - url="(missing)", - init_latency_seconds=None, - first_call_latency_seconds=None, - fatal_error="missing_url", - ) - - transport = server_config.get("transport", "streamable-http") - if transport not in ("streamable-http", "http", "sse"): - return ConnectorProbeResult( - connector_id=connector.id, - connector_name=connector.name, - connector_type=connector_type, - url=url, - init_latency_seconds=None, - first_call_latency_seconds=None, - fatal_error=f"unsupported_transport:{transport}", - ) - - result = ConnectorProbeResult( - connector_id=connector.id, - connector_name=connector.name, - connector_type=connector_type, - url=url, - init_latency_seconds=None, - first_call_latency_seconds=None, - ) - - open_started = time.perf_counter() - last_call_at: float | None = None - - # Manually drive the context-manager protocol so the session lives - # across our sleep intervals. ``streamable_http_client`` spawns a - # background task for the SSE receive loop; ``ClientSession`` spawns - # another for request multiplexing. We must close them in reverse order. - http_client = httpx.AsyncClient(headers=headers, timeout=PER_CALL_TIMEOUT_SECONDS) - transport_cm = None - session_cm = None - session = None - try: - transport_cm = streamable_http_client(url, http_client=http_client) - read, write, _ = await transport_cm.__aenter__() - session_cm = ClientSession(read, write) - session = await session_cm.__aenter__() - - init_start = time.perf_counter() - await asyncio.wait_for(session.initialize(), timeout=PER_CALL_TIMEOUT_SECONDS) - result.init_latency_seconds = time.perf_counter() - init_start - logger.info( - "[connector=%s name=%r] init=%.3fs", - connector.id, - connector.name, - result.init_latency_seconds, - ) - - first_call_start = time.perf_counter() - first_response = await asyncio.wait_for( - session.list_tools(), timeout=PER_CALL_TIMEOUT_SECONDS - ) - result.first_call_latency_seconds = time.perf_counter() - first_call_start - last_call_at = time.perf_counter() - logger.info( - "[connector=%s name=%r] first_call=%.3fs tools=%d", - connector.id, - connector.name, - result.first_call_latency_seconds, - len(first_response.tools), - ) - - for interval in intervals: - target_elapsed = open_started + ( - result.init_latency_seconds + result.first_call_latency_seconds + interval - ) - sleep_for = max(0.0, target_elapsed - time.perf_counter()) - await asyncio.sleep(sleep_for) - - call_start = time.perf_counter() - elapsed_since_open = call_start - open_started - elapsed_since_last = call_start - (last_call_at or call_start) - try: - response = await asyncio.wait_for( - session.list_tools(), timeout=PER_CALL_TIMEOUT_SECONDS - ) - latency = time.perf_counter() - call_start - last_call_at = time.perf_counter() - checkpoint = CheckpointResult( - idle_seconds_target=interval, - elapsed_since_open_seconds=round(elapsed_since_open, 3), - elapsed_since_last_call_seconds=round(elapsed_since_last, 3), - success=True, - latency_seconds=round(latency, 3), - tools_returned=len(response.tools), - error_type=None, - error_message=None, - ) - logger.info( - "[connector=%s t=+%ds] OK %.3fs (tools=%d)", - connector.id, - interval, - latency, - len(response.tools), - ) - result.checkpoints.append(checkpoint) - except Exception as exc: # noqa: BLE001 - label, msg = _classify_error(exc) - latency_at_failure = time.perf_counter() - call_start - checkpoint = CheckpointResult( - idle_seconds_target=interval, - elapsed_since_open_seconds=round(elapsed_since_open, 3), - elapsed_since_last_call_seconds=round(elapsed_since_last, 3), - success=False, - latency_seconds=round(latency_at_failure, 3), - tools_returned=None, - error_type=label, - error_message=msg[:300], - ) - logger.warning( - "[connector=%s t=+%ds] FAILED %s after %.3fs: %s", - connector.id, - interval, - label, - latency_at_failure, - msg[:200], - ) - result.checkpoints.append(checkpoint) - # Session is presumed dead — further checkpoints would all - # fail the same way and just waste wall time. - break - - except Exception as exc: # noqa: BLE001 - label, msg = _classify_error(exc) - result.fatal_error = f"{label}: {msg[:200]}" - logger.exception( - "[connector=%s] fatal during open/init: %s", - connector.id, - exc, - ) - finally: - if session_cm is not None: - try: - await session_cm.__aexit__(None, None, None) - except Exception: - pass - if transport_cm is not None: - try: - await transport_cm.__aexit__(None, None, None) - except Exception: - pass - try: - await http_client.aclose() - except Exception: - pass - - return result - - -# --------------------------------------------------------------------------- -# Reporting -# --------------------------------------------------------------------------- - - -def _render_table(results: list[ConnectorProbeResult]) -> str: - """Pretty-print a per-connector summary suitable for the terminal.""" - lines: list[str] = [] - lines.append("=" * 100) - lines.append("MCP Session Lifetime Probe Results") - lines.append("=" * 100) - - for result in results: - lines.append("") - lines.append( - f"Connector {result.connector_id} | {result.connector_type} | " - f"{result.connector_name!r}" - ) - lines.append(f" url: {result.url}") - if result.fatal_error: - lines.append(f" FATAL: {result.fatal_error}") - continue - lines.append( - f" init handshake: " - f"{result.init_latency_seconds:.3f}s" - if result.init_latency_seconds is not None - else " init handshake: (failed)" - ) - lines.append( - f" first list_tools (cold): " - f"{result.first_call_latency_seconds:.3f}s" - if result.first_call_latency_seconds is not None - else " first list_tools: (failed)" - ) - if not result.checkpoints: - lines.append(" (no idle checkpoints recorded)") - continue - lines.append( - f" {'idle_s':>8} | {'since_last':>10} | {'outcome':>16} | " - f"{'latency':>9} | {'tools':>5}" - ) - for cp in result.checkpoints: - outcome = "OK" if cp.success else (cp.error_type or "FAIL") - latency = f"{cp.latency_seconds:.3f}s" if cp.latency_seconds is not None else "-" - tools = str(cp.tools_returned) if cp.tools_returned is not None else "-" - lines.append( - f" {cp.idle_seconds_target:>8} | " - f"{cp.elapsed_since_last_call_seconds:>10.1f} | " - f"{outcome:>16} | " - f"{latency:>9} | " - f"{tools:>5}" - ) - - lines.append("") - lines.append("=" * 100) - lines.append("Summary") - lines.append("=" * 100) - survived: dict[int, list[int]] = {} - for result in results: - for cp in result.checkpoints: - if cp.success: - survived.setdefault(cp.idle_seconds_target, []).append( - result.connector_id - ) - if survived: - for interval in sorted(survived): - ids = sorted(survived[interval]) - lines.append( - f" Idle {interval:>5}s: {len(ids)}/{len(results)} connectors " - f"survived ({ids})" - ) - else: - lines.append(" (no successful checkpoints)") - return "\n".join(lines) - - -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - - -def _parse_int_list(value: str) -> list[int]: - return [int(x) for x in value.split(",") if x.strip()] - - -def _parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - description="Probe MCP server session lifetime (read-only)", - ) - parser.add_argument( - "--connectors", - type=_parse_int_list, - default=None, - help="Comma-separated connector IDs to probe. Default: all MCP connectors.", - ) - parser.add_argument( - "--intervals", - type=_parse_int_list, - default=None, - help="Comma-separated idle intervals in seconds. " - f"Default: {DEFAULT_INTERVALS_SECONDS}", - ) - parser.add_argument( - "--quick", - action="store_true", - help=f"Short run (intervals={QUICK_INTERVALS_SECONDS}) for fast iteration.", - ) - parser.add_argument( - "--output", - type=str, - default=None, - help="Optional path for the raw JSON results.", - ) - return parser.parse_args() - - -async def _async_main() -> int: - args = _parse_args() - if args.intervals is not None: - intervals = args.intervals - elif args.quick: - intervals = QUICK_INTERVALS_SECONDS - else: - intervals = DEFAULT_INTERVALS_SECONDS - - longest = max(intervals) if intervals else 0 - logger.info( - "Probing intervals=%s (longest=%ds, ~%dmin total wall time)", - intervals, - longest, - (longest + 30) // 60, - ) - - connectors = await _fetch_connectors(args.connectors) - if not connectors: - logger.error("No MCP connectors found to probe.") - return 2 - logger.info( - "Probing %d connector(s): %s", - len(connectors), - [f"{c.id}:{c.name}" for c in connectors], - ) - - started_at = time.time() - results = await asyncio.gather( - *[_probe_one_connector(c, intervals) for c in connectors], - return_exceptions=False, - ) - elapsed = time.time() - started_at - logger.info("All probes complete in %.1fs", elapsed) - - table = _render_table(results) - print(table) - - output_path = ( - args.output - or f"mcp_session_probe_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" - ) - with open(output_path, "w", encoding="utf-8") as fp: - json.dump( - { - "started_at": datetime.fromtimestamp(started_at).isoformat(), - "elapsed_seconds": round(elapsed, 1), - "intervals_tested": intervals, - "results": [asdict(r) for r in results], - }, - fp, - indent=2, - ) - logger.info("Raw results saved to %s", output_path) - return 0 - - -def main() -> None: - try: - exit_code = asyncio.run(_async_main()) - except KeyboardInterrupt: - logger.warning("Interrupted by user") - exit_code = 130 - sys.exit(exit_code) - - -if __name__ == "__main__": - main() From 1a4400c923ae390dd5fdd21172bb2b905475e2f2 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 21 May 2026 13:37:55 -0700 Subject: [PATCH 61/63] refactor(env): streamline BACKEND_URL usage in GoogleLoginButton and DocumentTabContent; update connector-status-config for Composio Google Drive connector maintenance --- surfsense_web/app/(home)/login/GoogleLoginButton.tsx | 5 +---- .../connector-popup/config/connector-status-config.json | 5 +++++ .../components/layout/ui/tabs/DocumentTabContent.tsx | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx index 2e5785f80..7e703a70c 100644 --- a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx +++ b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx @@ -5,7 +5,7 @@ import { Logo } from "@/components/Logo"; import { Button } from "@/components/ui/button"; import { trackLoginAttempt } from "@/lib/posthog/events"; import { AmbientBackground } from "./AmbientBackground"; -<<<<<<< HEAD +import { BACKEND_URL } from "@/lib/env-config"; function GoogleGLogo({ className }: { className?: string }) { return ( @@ -35,9 +35,6 @@ function GoogleGLogo({ className }: { className?: string }) { ); } -======= -import { BACKEND_URL } from "@/lib/env-config"; ->>>>>>> 1127aedb4 (refactor(env): replace inline process.env reads with BACKEND_URL in editor, chat, dashboard and settings) export function GoogleLoginButton() { const t = useTranslations("auth"); const [isRedirecting, setIsRedirecting] = useState(false); diff --git a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json index b4e85eab0..466446da9 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json +++ b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json @@ -19,6 +19,11 @@ "enabled": false, "status": "maintenance", "statusMessage": "Rework in progress." + }, + "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": { + "enabled": false, + "status": "maintenance", + "statusMessage": "Temporarily unavailable due to an upstream Composio bug (ComposioHQ/composio#3471) that returns malformed presigned URLs for Drive file downloads. Use the native Google Drive connector in the meantime." } }, "globalSettings": { diff --git a/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx b/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx index 20edc5de3..ef51eee3c 100644 --- a/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx +++ b/surfsense_web/components/layout/ui/tabs/DocumentTabContent.tsx @@ -10,7 +10,7 @@ import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; import { Spinner } from "@/components/ui/spinner"; import { authenticatedFetch, getBearerToken, redirectToLogin } from "@/lib/auth-utils"; -import { BACKEND_URL, BACKEND_URL } from "@/lib/env-config"; +import { BACKEND_URL } from "@/lib/env-config"; const LARGE_DOCUMENT_THRESHOLD = 2 * 1024 * 1024; // 2MB interface DocumentContent { From cacb27e007cfbd45027eb0dad6da4982ed74eabb Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 21 May 2026 14:41:32 -0700 Subject: [PATCH 62/63] fix: citations in agent responses --- .../system_prompt/prompts/citations/on.md | 45 ++++++++++++++++--- .../knowledge_base/description_readonly.md | 2 +- .../knowledge_base/system_prompt_cloud.md | 37 +++++++++++++++ .../knowledge_base/system_prompt_desktop.md | 4 ++ .../system_prompt_readonly_cloud.md | 39 ++++++++++++++++ .../system_prompt_readonly_desktop.md | 4 ++ 6 files changed, 123 insertions(+), 8 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md index b200f7a9a..e61a0bffb 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md @@ -1,11 +1,42 @@ -Apply chunk citations only when the runtime injects `` / -`` blocks. +Citations reach the answer through two channels. Use whichever applies — and +never invent ids you didn't see. Citation ids are resolved by exact-match +lookup; a wrong id silently breaks the link, so when in doubt, omit. + +### Channel A — chunk blocks injected this turn +When `search_surfsense_docs` or `web_search` returns `` / +`` blocks in this turn: 1. For each factual statement taken from those chunks, add - `[citation:chunk_id]` using the exact id from ``. -2. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated). -3. Never invent or normalise ids; if unsure, omit. -4. Plain brackets only — no markdown links, no footnote numbering. -5. If no chunk-tagged documents appear this turn, do not fabricate citations. + `[citation:chunk_id]` using the **exact** id from a visible + `` tag. Copy digit-for-digit (or the URL verbatim); + do not retype from memory. +2. `` is the parent doc id, **not** a citation source — + only ids inside `` count. +3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated, + each id copied individually). +4. Never invent, normalise, or guess at adjacent ids; if unsure, omit. +5. Plain brackets only — no markdown links, no footnote numbering. + +### Channel B — citations relayed by a `task` specialist +A `task(...)` tool message may contain `[citation:]` markers +the specialist already attached to its prose. The specialist saw the +underlying `` blocks; you didn't. So: + +1. **Preserve those markers verbatim** in your final answer — do not + reformat, renumber, drop, or wrap them in markdown links. When you + paraphrase a specialist sentence, copy the marker character-for- + character; do not regenerate the id from memory (LLMs reliably + corrupt nearby digits). +2. Keep each marker attached to the sentence the specialist attached + it to. +3. Do **not** add new `[citation:…]` markers of your own to a + specialist's prose; if a fact has no marker, the specialist + couldn't tie it to a chunk and neither can you. +4. When a specialist returns JSON, the citation markers live inside + the prose-bearing fields (e.g. a summary or excerpt). Pull them + along with the surrounding sentence when you quote. + +If neither channel surfaces citation markers this turn, do not fabricate +them. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md index d6837ec92..e989e3ee6 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md @@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs. -The specialist returns plain prose with absolute paths. +The specialist returns plain prose with absolute paths and `[citation:]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md index 514ec6639..2ae21c271 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md @@ -35,6 +35,43 @@ Map outcomes to your `status`: You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. +## Chunk citations in your prose + +When `read_file` returns a KB-indexed document under `/documents/`, the response includes `` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:]` to the sentence stating that fact, using the **exact** id from the `` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation. + +### Where chunk ids live in `read_file` output + +A KB document's XML has three numeric attributes — only **one** is a citation source: + +``` + + + 42 ← NOT a citation. Parent doc id; ignore for citations. + ... + + + ← Index hint; the same id also appears below. + + + + ← This is the citation source. + + + +``` + +### Rules + +- Use the **exact** id from a `` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory. +- Before emitting `[citation:N]`, confirm the literal substring `` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation. +- Never cite `` — that's the parent doc, not a chunk. +- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick. +- Prefer **fewer accurate citations** over many speculative ones. +- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`. +- Plain square brackets only — no markdown links, no parentheses, no footnote numbers. +- Tool results without `` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none. +- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits. + ## Examples **Example 1 — happy path write (path discovered from existing convention):** diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md index bfa96ee5b..4e5465aaf 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md @@ -35,6 +35,10 @@ Map outcomes to your `status`: You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. `chunk_ids` apply only to `` hits; for local-file operations leave them `null`. Never report values you did not actually see. +## Chunk citations in your prose + +In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work. + ## Examples **Example 1 — happy path write (path discovered from existing convention):** diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md index 3abfcd8b9..c7813e71d 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md @@ -27,3 +27,42 @@ Reply in plain prose: - Cite every claim with an absolute path under `/documents/`. - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content. - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop. + +## Chunk citations + +When the evidence for a claim came from a `read_file` response that included `` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation. + +### Where chunk ids live in `read_file` output + +A KB document's XML has three numeric attributes — only **one** is a citation source: + +``` + + + 42 ← NOT a citation. Parent doc id; ignore for citations. + ... + + + ← Index hint; the same id also appears below. + + + + ← This is the citation source. + + + +``` + +### Rules + +- Use the **exact** id from a `` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory. +- Before emitting `[citation:N]`, confirm the literal substring `` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation. +- Never cite `` — that's the parent doc, not a chunk. +- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick. +- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids. +- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`. +- Plain square brackets only — no markdown links, no parentheses, no footnote numbers. +- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without ``), skip the citation. +- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference. + +Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].` diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md index 1b3d72b64..2ea711e44 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md @@ -28,3 +28,7 @@ Reply in plain prose: - Cite every claim with an absolute path. - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content. - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop. + +## Chunk citations + +In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against. From 2e589091d85d9ee3fd3169d7980d31846321b95a Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 21 May 2026 14:44:33 -0700 Subject: [PATCH 63/63] feat: bumped version to 0.0.25 --- VERSION | 2 +- surfsense_backend/pyproject.toml | 2 +- surfsense_backend/uv.lock | 2 +- surfsense_browser_extension/package.json | 2 +- surfsense_desktop/package.json | 2 +- surfsense_web/package.json | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/VERSION b/VERSION index b056f4120..2678ff8d6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.24 +0.0.25 diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 26fee1bc3..cd2a6921a 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "surf-new-backend" -version = "0.0.24" +version = "0.0.25" description = "SurfSense Backend" requires-python = ">=3.12" dependencies = [ diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index c4e6b5c89..953aebbef 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -7947,7 +7947,7 @@ wheels = [ [[package]] name = "surf-new-backend" -version = "0.0.24" +version = "0.0.25" source = { editable = "." } dependencies = [ { name = "alembic" }, diff --git a/surfsense_browser_extension/package.json b/surfsense_browser_extension/package.json index 028e653b3..2f17899a8 100644 --- a/surfsense_browser_extension/package.json +++ b/surfsense_browser_extension/package.json @@ -1,7 +1,7 @@ { "name": "surfsense_browser_extension", "displayName": "Surfsense Browser Extension", - "version": "0.0.24", + "version": "0.0.25", "description": "Extension to collect Browsing History for SurfSense.", "author": "https://github.com/MODSetter", "engines": { diff --git a/surfsense_desktop/package.json b/surfsense_desktop/package.json index 68032e9f4..0ad279ece 100644 --- a/surfsense_desktop/package.json +++ b/surfsense_desktop/package.json @@ -1,6 +1,6 @@ { "name": "surfsense-desktop", - "version": "0.0.24", + "version": "0.0.25", "description": "SurfSense Desktop App", "main": "dist/main.js", "scripts": { diff --git a/surfsense_web/package.json b/surfsense_web/package.json index 640d5c207..213adbaad 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -1,6 +1,6 @@ { "name": "surfsense_web", - "version": "0.0.24", + "version": "0.0.25", "private": true, "packageManager": "pnpm@10.26.0", "description": "SurfSense Frontend",