diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py new file mode 100644 index 000000000..12ba560f3 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py @@ -0,0 +1,227 @@ +"""Post-response memory extraction for the SurfSense agent. + +After each agent response, a background task calls a lightweight LLM to decide +whether the user's message contains any long-term information worth persisting +(preferences, background, goals, instructions, etc.). This ensures memory +updates are never missed regardless of whether the main agent called +``update_memory`` during the conversation. + +The function re-reads memory from the database so it always sees the latest +state — including any updates the agent may have already made. +""" + +from __future__ import annotations + +import logging +from typing import Any +from uuid import UUID + +from langchain_core.messages import HumanMessage +from sqlalchemy import select + +from app.agents.new_chat.tools.update_memory import _save_memory +from app.db import ChatVisibility, SearchSpace, User, shielded_async_session + +logger = logging.getLogger(__name__) + +_MEMORY_EXTRACT_PROMPT = """\ +You are a memory extraction assistant. Analyze the user's message and decide \ +if it contains any long-term information worth persisting to memory. + +Worth remembering: preferences, background/identity, goals, projects, \ +instructions, tools/languages they use, decisions, expertise, workplace. + +NOT worth remembering: greetings, one-off factual questions, session \ +logistics, ephemeral requests, follow-up clarifications with no new personal info. + +If the message contains memorizable information, output the FULL updated \ +memory document with the new facts merged into the existing content. Follow \ +these rules: +- Use the same ## section structure as the existing memory. +- Keep entries as single concise bullet points (under 120 chars each). +- Add (YYYY-MM) date suffixes on time-sensitive entries. +- Never remove or modify sections marked with (pinned). +- If a new fact contradicts an existing entry, update the existing entry. +- Do not duplicate information that is already present. +- Standard sections: \ +"## About the user (pinned)", "## Preferences", "## Instructions (pinned)", \ +"## Current context" + +If nothing is worth remembering, output exactly: NO_UPDATE + + +{current_memory} + + + +{user_message} +""" + +_TEAM_MEMORY_EXTRACT_PROMPT = """\ +You are a memory extraction assistant for a team workspace. Analyze the \ +user's message and decide if it contains any long-term team information \ +worth persisting to the shared memory. + +Worth remembering: team decisions, conventions, coding standards, key facts \ +about the project/team, processes, architecture decisions. + +NOT worth remembering: greetings, personal preferences, one-off questions, \ +ephemeral requests. + +If the message contains memorizable information, output the FULL updated \ +memory document with the new facts merged into the existing content. Follow \ +these rules: +- Use the same ## section structure as the existing memory. +- Keep entries as single concise bullet points (under 120 chars each). +- Add (YYYY-MM) date suffixes on time-sensitive entries. +- Never remove or modify sections marked with (pinned). +- Standard sections: \ +"## Team decisions (pinned)", "## Conventions (pinned)", "## Key facts", \ +"## Current priorities" + +If nothing is worth remembering, output exactly: NO_UPDATE + + +{current_memory} + + + +{user_message} +""" + + +async def _call_extraction_llm( + llm: Any, + prompt_template: str, + current_memory: str, + user_message: str, +) -> str | None: + """Run the extraction LLM and return the updated memory, or ``None``.""" + prompt = prompt_template.format( + current_memory=current_memory or "(empty)", + user_message=user_message, + ) + response = await llm.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-extraction"]}, + ) + text = ( + response.content + if isinstance(response.content, str) + else str(response.content) + ).strip() + + if text == "NO_UPDATE" or not text: + return None + return text + + +async def extract_and_save_memory( + *, + user_message: str, + user_id: str | None, + search_space_id: int, + thread_visibility: ChatVisibility | None, + llm: Any, +) -> None: + """Background task: extract memorizable info and persist it. + + This function is designed to be fire-and-forget — it catches all + exceptions internally and never propagates them. + """ + if not user_id: + return + + visibility = thread_visibility or ChatVisibility.PRIVATE + + try: + await _extract_user_memory(user_message, user_id, llm) + except Exception: + logger.exception("Background user memory extraction failed") + + if visibility == ChatVisibility.SEARCH_SPACE: + try: + await _extract_team_memory(user_message, search_space_id, llm) + except Exception: + logger.exception("Background team memory extraction failed") + + +async def _extract_user_memory( + user_message: str, + user_id: str, + llm: Any, +) -> None: + """Extract and persist user memory updates.""" + uid = UUID(user_id) if isinstance(user_id, str) else user_id + + async with shielded_async_session() as session: + result = await session.execute( + select(User).where(User.id == uid) + ) + user = result.scalars().first() + if not user: + return + + old_memory = user.memory_md + updated = await _call_extraction_llm( + llm, _MEMORY_EXTRACT_PROMPT, old_memory or "", user_message + ) + if updated is None: + logger.debug("Memory extraction: no update needed (user %s)", uid) + return + + save_result = await _save_memory( + updated_memory=updated, + old_memory=old_memory, + llm=llm, + apply_fn=lambda content: setattr(user, "memory_md", content), + commit_fn=session.commit, + rollback_fn=session.rollback, + label="memory", + ) + logger.info( + "Background memory extraction for user %s: %s", + uid, + save_result.get("status"), + ) + + +async def _extract_team_memory( + user_message: str, + search_space_id: int, + llm: Any, +) -> None: + """Extract and persist team memory updates.""" + async with shielded_async_session() as session: + result = await session.execute( + select(SearchSpace).where(SearchSpace.id == search_space_id) + ) + space = result.scalars().first() + if not space: + return + + old_memory = space.shared_memory_md + updated = await _call_extraction_llm( + llm, _TEAM_MEMORY_EXTRACT_PROMPT, old_memory or "", user_message + ) + if updated is None: + logger.debug( + "Team memory extraction: no update needed (space %s)", + search_space_id, + ) + return + + save_result = await _save_memory( + updated_memory=updated, + old_memory=old_memory, + llm=llm, + apply_fn=lambda content: setattr(space, "shared_memory_md", content), + commit_fn=session.commit, + rollback_fn=session.rollback, + label="team memory", + ) + logger.info( + "Background team memory extraction for space %s: %s", + search_space_id, + save_result.get("status"), + ) diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 64b6406ad..e150d1bec 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -257,17 +257,13 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { `limit` attributes show your current usage and the maximum allowed size. - This is your curated long-term memory — the distilled essence of what you know about the user, not raw conversation logs. - - **If has persisted="false"**, the memory has NOT been saved to the - database yet (it is a seed). You MUST call update_memory during this conversation to - persist it, merging in any new information you learn from the user. - - Call update_memory when the user shares long-term information about themselves, - whether explicitly or casually as part of a request: - * Identity & background mentioned in passing: "I'm a student", "at my company we...", - "I'm working on a React app", "I'm a data scientist" - * Explicit requests: "remember this", "keep in mind", "note that" - * Preferences: "I prefer X", "I like Y", "always do Z" - * Facts: name, role, company, expertise, current projects - * Standing instructions: response format, communication style + - Note: The system automatically extracts memorizable information from every + conversation in the background. Use this tool primarily for: + * Explicit user requests: "remember this", "keep in mind", "note that", "forget X" + * Restructuring or reorganizing the memory document + * Correcting outdated or wrong entries + * **If has persisted="false"** — you MUST still call update_memory + to persist the seed. - Skip truly ephemeral info (one-off questions, greetings, session logistics). - Args: - updated_memory: The FULL updated markdown document (not a diff). @@ -292,12 +288,11 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { - Your current team memory is already in in your context. The `chars` and `limit` attributes show current usage and the maximum allowed size. - This is the team's curated long-term memory — decisions, conventions, key facts. - - Call update_memory when the team shares long-term information, whether explicitly - or as part of a broader discussion: - * Team decisions: "let's use X", "we decided to go with Y" - * Conventions: coding standards, processes, naming patterns - * Key facts: where things are, how things work, team structure - * Priorities: active projects, deadlines, blockers + - Note: The system automatically extracts memorizable team information from every + conversation in the background. Use this tool primarily for: + * Explicit requests: "let's remember that", "note this decision", "forget X" + * Restructuring or reorganizing the team memory document + * Correcting outdated or wrong entries - Skip truly ephemeral info (one-off questions, greetings, session logistics). - Args: - updated_memory: The FULL updated markdown document (not a diff). diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 5ff907459..1cb858c51 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -29,6 +29,7 @@ from sqlalchemy.future import select from sqlalchemy.orm import selectinload from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent +from app.agents.new_chat.memory_extraction import extract_and_save_memory from app.agents.new_chat.checkpointer import get_checkpointer from app.agents.new_chat.llm_config import ( AgentConfig, @@ -1524,6 +1525,19 @@ async def stream_new_chat( yield streaming_service.format_done() return + if user_id and llm is not None: + _mem_task = asyncio.create_task( + extract_and_save_memory( + user_message=user_query, + user_id=user_id, + search_space_id=search_space_id, + thread_visibility=visibility, + llm=llm, + ) + ) + _background_tasks.add(_mem_task) + _mem_task.add_done_callback(_background_tasks.discard) + # If the title task didn't finish during streaming, await it now if title_task is not None and not title_emitted: generated_title = await title_task