diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py deleted file mode 100644 index 5fcd0fb08..000000000 --- a/surfsense_backend/app/agents/new_chat/memory_extraction.py +++ /dev/null @@ -1,223 +0,0 @@ -"""Post-response memory extraction for the SurfSense agent. - -After each agent response, a background task calls a lightweight LLM to decide -whether the user's message contains any long-term information worth persisting -(preferences, background, goals, instructions, etc.). This ensures memory -updates are never missed regardless of whether the main agent called -``update_memory`` during the conversation. - -The function re-reads memory from the database so it always sees the latest -state — including any updates the agent may have already made. -""" - -from __future__ import annotations - -import logging -from typing import Any -from uuid import UUID - -from langchain_core.messages import HumanMessage -from sqlalchemy import select - -from app.agents.new_chat.tools.update_memory import _save_memory -from app.db import ChatVisibility, SearchSpace, User, shielded_async_session - -logger = logging.getLogger(__name__) - -_MEMORY_EXTRACT_PROMPT = """\ -You are a memory extraction assistant. Analyze the user's message and decide \ -if it contains any long-term information worth persisting to memory. - -Worth remembering: preferences, background/identity, goals, projects, \ -instructions, tools/languages they use, decisions, expertise, workplace. - -NOT worth remembering: greetings, one-off factual questions, session \ -logistics, ephemeral requests, follow-up clarifications with no new personal info. - -If the message contains memorizable information, output the FULL updated \ -memory document with the new facts merged into the existing content. Follow \ -these rules: -- Use the same ## section structure as the existing memory. -- Keep entries as single concise bullet points (under 120 chars each). -- Add (YYYY-MM) date suffixes on time-sensitive entries. -- Never remove or modify sections marked with (pinned). -- If a new fact contradicts an existing entry, update the existing entry. -- Do not duplicate information that is already present. -- Standard sections: \ -"## About the user (pinned)", "## Preferences", "## Instructions (pinned)", \ -"## Current context" - -If nothing is worth remembering, output exactly: NO_UPDATE - - -{current_memory} - - - -{user_message} -""" - -_TEAM_MEMORY_EXTRACT_PROMPT = """\ -You are a memory extraction assistant for a team workspace. Analyze the \ -user's message and decide if it contains any long-term team information \ -worth persisting to the shared memory. - -Worth remembering: team decisions, conventions, coding standards, key facts \ -about the project/team, processes, architecture decisions. - -NOT worth remembering: greetings, personal preferences, one-off questions, \ -ephemeral requests. - -If the message contains memorizable information, output the FULL updated \ -memory document with the new facts merged into the existing content. Follow \ -these rules: -- Use the same ## section structure as the existing memory. -- Keep entries as single concise bullet points (under 120 chars each). -- Add (YYYY-MM) date suffixes on time-sensitive entries. -- Never remove or modify sections marked with (pinned). -- Standard sections: \ -"## Team decisions (pinned)", "## Conventions (pinned)", "## Key facts", \ -"## Current priorities" - -If nothing is worth remembering, output exactly: NO_UPDATE - - -{current_memory} - - - -{user_message} -""" - - -async def _call_extraction_llm( - llm: Any, - prompt_template: str, - current_memory: str, - user_message: str, -) -> str | None: - """Run the extraction LLM and return the updated memory, or ``None``.""" - prompt = prompt_template.format( - current_memory=current_memory or "(empty)", - user_message=user_message, - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-extraction"]}, - ) - text = ( - response.content if isinstance(response.content, str) else str(response.content) - ).strip() - - if text == "NO_UPDATE" or not text: - return None - return text - - -async def extract_and_save_memory( - *, - user_message: str, - user_id: str | None, - search_space_id: int, - thread_visibility: ChatVisibility | None, - llm: Any, -) -> None: - """Background task: extract memorizable info and persist it. - - This function is designed to be fire-and-forget — it catches all - exceptions internally and never propagates them. - """ - if not user_id: - return - - visibility = thread_visibility or ChatVisibility.PRIVATE - - try: - await _extract_user_memory(user_message, user_id, llm) - except Exception: - logger.exception("Background user memory extraction failed") - - if visibility == ChatVisibility.SEARCH_SPACE: - try: - await _extract_team_memory(user_message, search_space_id, llm) - except Exception: - logger.exception("Background team memory extraction failed") - - -async def _extract_user_memory( - user_message: str, - user_id: str, - llm: Any, -) -> None: - """Extract and persist user memory updates.""" - uid = UUID(user_id) if isinstance(user_id, str) else user_id - - async with shielded_async_session() as session: - result = await session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return - - old_memory = user.memory_md - updated = await _call_extraction_llm( - llm, _MEMORY_EXTRACT_PROMPT, old_memory or "", user_message - ) - if updated is None: - logger.debug("Memory extraction: no update needed (user %s)", uid) - return - - save_result = await _save_memory( - updated_memory=updated, - old_memory=old_memory, - llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="memory", - ) - logger.info( - "Background memory extraction for user %s: %s", - uid, - save_result.get("status"), - ) - - -async def _extract_team_memory( - user_message: str, - search_space_id: int, - llm: Any, -) -> None: - """Extract and persist team memory updates.""" - async with shielded_async_session() as session: - result = await session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return - - old_memory = space.shared_memory_md - updated = await _call_extraction_llm( - llm, _TEAM_MEMORY_EXTRACT_PROMPT, old_memory or "", user_message - ) - if updated is None: - logger.debug( - "Team memory extraction: no update needed (space %s)", - search_space_id, - ) - return - - save_result = await _save_memory( - updated_memory=updated, - old_memory=old_memory, - llm=llm, - apply_fn=lambda content: setattr(space, "shared_memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="team memory", - ) - logger.info( - "Background team memory extraction for space %s: %s", - search_space_id, - save_result.get("status"), - ) diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py index 8e692dfcb..05b8d2be3 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py +++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py @@ -59,12 +59,14 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] async with shielded_async_session() as session: if self.user_id is not None: - user_memory, is_persisted = await self._load_user_memory(session) + user_memory, display_name = await self._load_user_memory(session) + if display_name: + first_name = display_name.split()[0] + memory_blocks.append(f"{first_name}") if user_memory: chars = len(user_memory) - persisted = "true" if is_persisted else "false" memory_blocks.append( - f'\n' + f'\n' f"{user_memory}\n" f"" ) @@ -91,42 +93,19 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] return {"messages": new_messages} - async def _load_user_memory(self, session: AsyncSession) -> tuple[str | None, bool]: - """Return (memory_content, is_persisted). - - When the user has no saved memory but has a display name, a seed - document is created and **persisted to the database immediately** - so the LLM doesn't need to make a tool call to save it. - """ + async def _load_user_memory(self, session: AsyncSession) -> tuple[str | None, str | None]: + """Return (memory_content, display_name).""" try: result = await session.execute( select(User.memory_md, User.display_name).where(User.id == self.user_id) ) row = result.one_or_none() if row is None: - return None, True - - memory_md, display_name = row - - if memory_md: - return memory_md, True - - if display_name: - first_name = display_name.split()[0] - seed = f"## About the user (pinned)\n- Name: {first_name}" - await session.execute( - User.__table__.update() - .where(User.id == self.user_id) - .values(memory_md=seed) - ) - await session.commit() - logger.info("Auto-persisted memory seed for user %s", self.user_id) - return seed, True - - return None, True + return None, None + return row.memory_md or None, row.display_name except Exception: logger.exception("Failed to load user memory") - return None, True + return None, None async def _load_team_memory(self, session: AsyncSession) -> str | None: try: diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 4b621dd3c..f811deda9 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -40,6 +40,13 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE: * Tool-usage actions like generating reports, podcasts, images, or scraping webpages + +IMPORTANT — After understanding each user message, ALWAYS check: does this message +reveal durable facts about the user (role, interests, preferences, projects, +background, or standing instructions)? If yes, you MUST call update_memory +alongside your normal response — do not defer this to a later turn. + + """ @@ -71,6 +78,13 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE: * Tool-usage actions like generating reports, podcasts, images, or scraping webpages + +IMPORTANT — After understanding each user message, ALWAYS check: does this message +reveal durable facts about the team (decisions, conventions, architecture, processes, +or key facts)? If yes, you MUST call update_memory alongside your normal response — +do not defer this to a later turn. + + """ @@ -257,56 +271,52 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { `limit` attributes show your current usage and the maximum allowed size. - This is your curated long-term memory — the distilled essence of what you know about the user, not raw conversation logs. - - Note: The system automatically extracts memorizable information from every - conversation in the background. Use this tool primarily for: - * Explicit user requests: "remember this", "keep in mind", "note that", "forget X" - * Restructuring or reorganizing the memory document - * Correcting outdated or wrong entries - * **If has persisted="false"** — you MUST still call update_memory - to persist the seed. - - Skip truly ephemeral info (one-off questions, greetings, session logistics). + - You are the sole mechanism for persisting memory — there is no background extraction. + Call update_memory when: + * The user explicitly asks to remember or forget something + * The user shares durable facts or preferences that will matter in future conversations + - The user's name is already provided via — do not store it in memory. + - Do not store short-lived or ephemeral info: one-off questions, greetings, + session logistics, or things that only matter for the current task. - Args: - updated_memory: The FULL updated markdown document (not a diff). Merge new facts with existing ones, update contradictions, remove outdated entries. Treat every update as a curation pass — consolidate, don't just append. - Include inline dates (YYYY-MM) on entries where temporal context matters (facts that - may change, decisions, context). Skip dates on timeless preferences and instructions. + - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated. - Keep it concise and well under the character limit shown in . - You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit): - ## About the user (pinned) — name, role, background, company (with date if it may change) + ## About the user (pinned) — role, background, company ## Preferences — languages, tools, frameworks, response style ## Instructions (pinned) — standing instructions, things to always/never do - ## Current context — ongoing projects, goals, deadlines (with date) - Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each). - - Each time-sensitive entry MUST include a (YYYY-MM) date suffix. - Sections with `(pinned)` in the heading are protected — the system will reject any update that removes them. Users can add `(pinned)` to any `##` heading to protect it. - - During consolidation, prioritize keeping: pinned sections > preferences > current context. + - During consolidation, prioritize keeping: pinned sections > preferences. """, "shared": """ - update_memory: Update the team's shared memory document for this search space. - Your current team memory is already in in your context. The `chars` and `limit` attributes show current usage and the maximum allowed size. - This is the team's curated long-term memory — decisions, conventions, key facts. - - Note: The system automatically extracts memorizable team information from every - conversation in the background. Use this tool primarily for: - * Explicit requests: "let's remember that", "note this decision", "forget X" - * Restructuring or reorganizing the team memory document - * Correcting outdated or wrong entries - - Skip truly ephemeral info (one-off questions, greetings, session logistics). + - You are the sole mechanism for persisting team memory — there is no background extraction. + Call update_memory when: + * A team member explicitly asks to remember or forget something + * The conversation surfaces durable team decisions, conventions, or facts + that will matter in future conversations + - Do not store short-lived or ephemeral info: one-off questions, greetings, + session logistics, or things that only matter for the current task. - Args: - updated_memory: The FULL updated markdown document (not a diff). Merge new facts with existing ones, update contradictions, remove outdated entries. Treat every update as a curation pass — consolidate, don't just append. - Include inline dates (YYYY-MM) on decisions and time-sensitive entries. + - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated. - Keep it concise and well under the character limit shown in . - You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit): - ## Team decisions (pinned) — agreed-upon choices with rationale and date + ## Team decisions (pinned) — agreed-upon choices with rationale ## Conventions (pinned) — coding standards, tools, processes, naming patterns ## Key facts — where things are, how things work, team structure ## Current priorities — active projects, deadlines, blockers - Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each). - - Each time-sensitive entry MUST include a (YYYY-MM) date suffix. - Sections with `(pinned)` in the heading are protected — the system will reject any update that removes them. Users can add `(pinned)` to any `##` heading to protect it. - During consolidation, prioritize keeping: pinned sections > key facts > current priorities. @@ -317,28 +327,26 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = { "update_memory": { "private": """ -- contains "## About the user (pinned)\\n- Name: Alex" - User: "I'm a university student, explain astrophage to me" - - Memory is not yet persisted AND the user casually shared that they are a student. - You MUST call update_memory to persist the seed plus the new fact: - update_memory(updated_memory="## About the user (pinned)\\n- Name: Alex\\n- University student\\n") -- User: "Remember that I prefer TypeScript over JavaScript" - - Timeless preference, no date needed. You see the current and merge: - update_memory(updated_memory="## About the user (pinned)\\n- Senior developer\\n\\n## Preferences\\n- Prefers TypeScript over JavaScript\\n...") -- User: "I actually moved to Google last month" - - Fact that changes over time, include date: - update_memory(updated_memory="## About the user (pinned)\\n- Senior developer at Google (since 2026-03, previously Acme Corp)\\n...") -- User: "I'm building a SaaS app with Next.js and Supabase" - - Implicit project info shared as context. Save it: - update_memory(updated_memory="## About the user (pinned)\\n- Name: Alex\\n\\n## Current context\\n- Building a SaaS app with Next.js and Supabase (2026-04)\\n") +- is empty. User: "I'm a space enthusiast, explain astrophage to me" + - The user casually shared a durable fact about themselves. Save it: + update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n") +- User: "Remember that I prefer concise answers over detailed explanations" + - Durable preference. You see the current and merge: + update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...") +- User: "I actually moved to Tokyo last month" + - Updated fact, date prefix reflects when recorded: + update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...") +- User: "I'm a freelance photographer working on a nature documentary" + - Durable background info. Save it under About the user: + update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n") """, "shared": """ -- User: "Let's remember that we decided to use GraphQL" - - Decision with date: - update_memory(updated_memory="## Team decisions (pinned)\\n- 2026-04: Adopted GraphQL over REST for new APIs\\n...") -- User: "Our deploy process uses Railway auto-deploys" - - Key fact, no date needed: - update_memory(updated_memory="## Key facts\\n- Deploy pipeline: git push -> Railway auto-deploys in ~3min\\n...") +- User: "Let's remember that we decided to do weekly standup meetings on Mondays" + - Durable team decision: + update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...") +- User: "Our office is in downtown Seattle, 5th floor" + - Durable team fact: + update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...") """, }, } diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py index d8172bfcd..991e8338e 100644 --- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py +++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py @@ -6,9 +6,10 @@ always sees the current memory in / tags injected by MemoryInjectionMiddleware, so it passes the FULL updated document each time. Overflow handling: - - Soft limit (15K chars): advisory warning returned alongside a successful save. - - Hard limit (25K chars): save rejected; an automatic LLM-driven consolidation - is attempted before falling back to the error. + - Soft limit (18K chars): an automatic LLM-driven consolidation is attempted + to proactively keep memory lean. The save always succeeds. + - Hard limit (25K chars): save rejected if memory still exceeds this after + consolidation. - Pinned sections: headings containing ``(pinned)`` are protected — the system rejects any update that drops them and auto-restores them during consolidation. - Diff validation: warns when entire ``##`` sections are dropped or when the @@ -31,7 +32,7 @@ from app.db import SearchSpace, User logger = logging.getLogger(__name__) -MEMORY_SOFT_LIMIT = 15_000 +MEMORY_SOFT_LIMIT = 18_000 MEMORY_HARD_LIMIT = 25_000 _PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE) @@ -188,7 +189,7 @@ RULES: preferences > current context. 5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. 6. Each entry must be a single bullet point. -7. Preserve (YYYY-MM) date suffixes on time-sensitive entries. +7. Every bullet MUST keep its (YYYY-MM-DD) date prefix. 8. Output ONLY the consolidated markdown — no explanations, no wrapping. @@ -259,25 +260,19 @@ async def _save_memory( if pinned_err: return {"status": "error", "message": pinned_err} - # --- hard-limit gate with auto-consolidation fallback --- + # --- auto-consolidate proactively at the soft limit --- + if len(content) > MEMORY_SOFT_LIMIT and llm is not None: + consolidated = await _auto_consolidate(content, llm) + if consolidated is not None: + if old_memory: + consolidated = _restore_missing_pinned(old_memory, consolidated) + if len(consolidated) < len(content): + content = consolidated + + # --- hard-limit gate (reject if still too large after consolidation) --- size_err = _validate_memory_size(content) if size_err: - if llm is None: - return size_err - - consolidated = await _auto_consolidate(content, llm) - if consolidated is None: - return size_err - - # Restore any pinned sections the consolidation LLM may have dropped - if old_memory: - consolidated = _restore_missing_pinned(old_memory, consolidated) - - recheck = _validate_memory_size(consolidated) - if recheck: - return recheck - - content = consolidated + return size_err # --- persist --- try: diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index d49400c18..0a6c34e81 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -37,7 +37,6 @@ from app.agents.new_chat.llm_config import ( load_agent_config, load_llm_config_from_yaml, ) -from app.agents.new_chat.memory_extraction import extract_and_save_memory from app.db import ( ChatVisibility, NewChatMessage, @@ -60,8 +59,6 @@ from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_hea _perf_log = get_perf_logger() -_background_tasks: set[asyncio.Task] = set() - def format_mentioned_surfsense_docs_as_context( documents: list[SurfsenseDocsDocument], @@ -1525,19 +1522,6 @@ async def stream_new_chat( yield streaming_service.format_done() return - if user_id and llm is not None: - _mem_task = asyncio.create_task( - extract_and_save_memory( - user_message=user_query, - user_id=user_id, - search_space_id=search_space_id, - thread_visibility=visibility, - llm=llm, - ) - ) - _background_tasks.add(_mem_task) - _mem_task.add_done_callback(_background_tasks.discard) - # If the title task didn't finish during streaming, await it now if title_task is not None and not title_emitted: generated_title = await title_task diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx index e8c632eb3..85f2db695 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx @@ -135,7 +135,7 @@ export function MemoryContent() { onClick={handleClear} disabled={saving || !savedMemory} > - Clear All + Reset Memory