diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py
deleted file mode 100644
index 5fcd0fb08..000000000
--- a/surfsense_backend/app/agents/new_chat/memory_extraction.py
+++ /dev/null
@@ -1,223 +0,0 @@
-"""Post-response memory extraction for the SurfSense agent.
-
-After each agent response, a background task calls a lightweight LLM to decide
-whether the user's message contains any long-term information worth persisting
-(preferences, background, goals, instructions, etc.). This ensures memory
-updates are never missed regardless of whether the main agent called
-``update_memory`` during the conversation.
-
-The function re-reads memory from the database so it always sees the latest
-state — including any updates the agent may have already made.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any
-from uuid import UUID
-
-from langchain_core.messages import HumanMessage
-from sqlalchemy import select
-
-from app.agents.new_chat.tools.update_memory import _save_memory
-from app.db import ChatVisibility, SearchSpace, User, shielded_async_session
-
-logger = logging.getLogger(__name__)
-
-_MEMORY_EXTRACT_PROMPT = """\
-You are a memory extraction assistant. Analyze the user's message and decide \
-if it contains any long-term information worth persisting to memory.
-
-Worth remembering: preferences, background/identity, goals, projects, \
-instructions, tools/languages they use, decisions, expertise, workplace.
-
-NOT worth remembering: greetings, one-off factual questions, session \
-logistics, ephemeral requests, follow-up clarifications with no new personal info.
-
-If the message contains memorizable information, output the FULL updated \
-memory document with the new facts merged into the existing content. Follow \
-these rules:
-- Use the same ## section structure as the existing memory.
-- Keep entries as single concise bullet points (under 120 chars each).
-- Add (YYYY-MM) date suffixes on time-sensitive entries.
-- Never remove or modify sections marked with (pinned).
-- If a new fact contradicts an existing entry, update the existing entry.
-- Do not duplicate information that is already present.
-- Standard sections: \
-"## About the user (pinned)", "## Preferences", "## Instructions (pinned)", \
-"## Current context"
-
-If nothing is worth remembering, output exactly: NO_UPDATE
-
-
-{current_memory}
-
-
-
-{user_message}
-"""
-
-_TEAM_MEMORY_EXTRACT_PROMPT = """\
-You are a memory extraction assistant for a team workspace. Analyze the \
-user's message and decide if it contains any long-term team information \
-worth persisting to the shared memory.
-
-Worth remembering: team decisions, conventions, coding standards, key facts \
-about the project/team, processes, architecture decisions.
-
-NOT worth remembering: greetings, personal preferences, one-off questions, \
-ephemeral requests.
-
-If the message contains memorizable information, output the FULL updated \
-memory document with the new facts merged into the existing content. Follow \
-these rules:
-- Use the same ## section structure as the existing memory.
-- Keep entries as single concise bullet points (under 120 chars each).
-- Add (YYYY-MM) date suffixes on time-sensitive entries.
-- Never remove or modify sections marked with (pinned).
-- Standard sections: \
-"## Team decisions (pinned)", "## Conventions (pinned)", "## Key facts", \
-"## Current priorities"
-
-If nothing is worth remembering, output exactly: NO_UPDATE
-
-
-{current_memory}
-
-
-
-{user_message}
-"""
-
-
-async def _call_extraction_llm(
- llm: Any,
- prompt_template: str,
- current_memory: str,
- user_message: str,
-) -> str | None:
- """Run the extraction LLM and return the updated memory, or ``None``."""
- prompt = prompt_template.format(
- current_memory=current_memory or "(empty)",
- user_message=user_message,
- )
- response = await llm.ainvoke(
- [HumanMessage(content=prompt)],
- config={"tags": ["surfsense:internal", "memory-extraction"]},
- )
- text = (
- response.content if isinstance(response.content, str) else str(response.content)
- ).strip()
-
- if text == "NO_UPDATE" or not text:
- return None
- return text
-
-
-async def extract_and_save_memory(
- *,
- user_message: str,
- user_id: str | None,
- search_space_id: int,
- thread_visibility: ChatVisibility | None,
- llm: Any,
-) -> None:
- """Background task: extract memorizable info and persist it.
-
- This function is designed to be fire-and-forget — it catches all
- exceptions internally and never propagates them.
- """
- if not user_id:
- return
-
- visibility = thread_visibility or ChatVisibility.PRIVATE
-
- try:
- await _extract_user_memory(user_message, user_id, llm)
- except Exception:
- logger.exception("Background user memory extraction failed")
-
- if visibility == ChatVisibility.SEARCH_SPACE:
- try:
- await _extract_team_memory(user_message, search_space_id, llm)
- except Exception:
- logger.exception("Background team memory extraction failed")
-
-
-async def _extract_user_memory(
- user_message: str,
- user_id: str,
- llm: Any,
-) -> None:
- """Extract and persist user memory updates."""
- uid = UUID(user_id) if isinstance(user_id, str) else user_id
-
- async with shielded_async_session() as session:
- result = await session.execute(select(User).where(User.id == uid))
- user = result.scalars().first()
- if not user:
- return
-
- old_memory = user.memory_md
- updated = await _call_extraction_llm(
- llm, _MEMORY_EXTRACT_PROMPT, old_memory or "", user_message
- )
- if updated is None:
- logger.debug("Memory extraction: no update needed (user %s)", uid)
- return
-
- save_result = await _save_memory(
- updated_memory=updated,
- old_memory=old_memory,
- llm=llm,
- apply_fn=lambda content: setattr(user, "memory_md", content),
- commit_fn=session.commit,
- rollback_fn=session.rollback,
- label="memory",
- )
- logger.info(
- "Background memory extraction for user %s: %s",
- uid,
- save_result.get("status"),
- )
-
-
-async def _extract_team_memory(
- user_message: str,
- search_space_id: int,
- llm: Any,
-) -> None:
- """Extract and persist team memory updates."""
- async with shielded_async_session() as session:
- result = await session.execute(
- select(SearchSpace).where(SearchSpace.id == search_space_id)
- )
- space = result.scalars().first()
- if not space:
- return
-
- old_memory = space.shared_memory_md
- updated = await _call_extraction_llm(
- llm, _TEAM_MEMORY_EXTRACT_PROMPT, old_memory or "", user_message
- )
- if updated is None:
- logger.debug(
- "Team memory extraction: no update needed (space %s)",
- search_space_id,
- )
- return
-
- save_result = await _save_memory(
- updated_memory=updated,
- old_memory=old_memory,
- llm=llm,
- apply_fn=lambda content: setattr(space, "shared_memory_md", content),
- commit_fn=session.commit,
- rollback_fn=session.rollback,
- label="team memory",
- )
- logger.info(
- "Background team memory extraction for space %s: %s",
- search_space_id,
- save_result.get("status"),
- )
diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
index 8e692dfcb..05b8d2be3 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
@@ -59,12 +59,14 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
async with shielded_async_session() as session:
if self.user_id is not None:
- user_memory, is_persisted = await self._load_user_memory(session)
+ user_memory, display_name = await self._load_user_memory(session)
+ if display_name:
+ first_name = display_name.split()[0]
+ memory_blocks.append(f"{first_name}")
if user_memory:
chars = len(user_memory)
- persisted = "true" if is_persisted else "false"
memory_blocks.append(
- f'\n'
+ f'\n'
f"{user_memory}\n"
f""
)
@@ -91,42 +93,19 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
return {"messages": new_messages}
- async def _load_user_memory(self, session: AsyncSession) -> tuple[str | None, bool]:
- """Return (memory_content, is_persisted).
-
- When the user has no saved memory but has a display name, a seed
- document is created and **persisted to the database immediately**
- so the LLM doesn't need to make a tool call to save it.
- """
+ async def _load_user_memory(self, session: AsyncSession) -> tuple[str | None, str | None]:
+ """Return (memory_content, display_name)."""
try:
result = await session.execute(
select(User.memory_md, User.display_name).where(User.id == self.user_id)
)
row = result.one_or_none()
if row is None:
- return None, True
-
- memory_md, display_name = row
-
- if memory_md:
- return memory_md, True
-
- if display_name:
- first_name = display_name.split()[0]
- seed = f"## About the user (pinned)\n- Name: {first_name}"
- await session.execute(
- User.__table__.update()
- .where(User.id == self.user_id)
- .values(memory_md=seed)
- )
- await session.commit()
- logger.info("Auto-persisted memory seed for user %s", self.user_id)
- return seed, True
-
- return None, True
+ return None, None
+ return row.memory_md or None, row.display_name
except Exception:
logger.exception("Failed to load user memory")
- return None, True
+ return None, None
async def _load_team_memory(self, session: AsyncSession) -> str | None:
try:
diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index 4b621dd3c..f811deda9 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -40,6 +40,13 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
+
+IMPORTANT — After understanding each user message, ALWAYS check: does this message
+reveal durable facts about the user (role, interests, preferences, projects,
+background, or standing instructions)? If yes, you MUST call update_memory
+alongside your normal response — do not defer this to a later turn.
+
+
"""
@@ -71,6 +78,13 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
+
+IMPORTANT — After understanding each user message, ALWAYS check: does this message
+reveal durable facts about the team (decisions, conventions, architecture, processes,
+or key facts)? If yes, you MUST call update_memory alongside your normal response —
+do not defer this to a later turn.
+
+
"""
@@ -257,56 +271,52 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
`limit` attributes show your current usage and the maximum allowed size.
- This is your curated long-term memory — the distilled essence of what you know about
the user, not raw conversation logs.
- - Note: The system automatically extracts memorizable information from every
- conversation in the background. Use this tool primarily for:
- * Explicit user requests: "remember this", "keep in mind", "note that", "forget X"
- * Restructuring or reorganizing the memory document
- * Correcting outdated or wrong entries
- * **If has persisted="false"** — you MUST still call update_memory
- to persist the seed.
- - Skip truly ephemeral info (one-off questions, greetings, session logistics).
+ - You are the sole mechanism for persisting memory — there is no background extraction.
+ Call update_memory when:
+ * The user explicitly asks to remember or forget something
+ * The user shares durable facts or preferences that will matter in future conversations
+ - The user's name is already provided via — do not store it in memory.
+ - Do not store short-lived or ephemeral info: one-off questions, greetings,
+ session logistics, or things that only matter for the current task.
- Args:
- updated_memory: The FULL updated markdown document (not a diff).
Merge new facts with existing ones, update contradictions, remove outdated entries.
Treat every update as a curation pass — consolidate, don't just append.
- Include inline dates (YYYY-MM) on entries where temporal context matters (facts that
- may change, decisions, context). Skip dates on timeless preferences and instructions.
+ - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in .
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
- ## About the user (pinned) — name, role, background, company (with date if it may change)
+ ## About the user (pinned) — role, background, company
## Preferences — languages, tools, frameworks, response style
## Instructions (pinned) — standing instructions, things to always/never do
- ## Current context — ongoing projects, goals, deadlines (with date)
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- - Each time-sensitive entry MUST include a (YYYY-MM) date suffix.
- Sections with `(pinned)` in the heading are protected — the system will reject any
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
- - During consolidation, prioritize keeping: pinned sections > preferences > current context.
+ - During consolidation, prioritize keeping: pinned sections > preferences.
""",
"shared": """
- update_memory: Update the team's shared memory document for this search space.
- Your current team memory is already in in your context. The `chars`
and `limit` attributes show current usage and the maximum allowed size.
- This is the team's curated long-term memory — decisions, conventions, key facts.
- - Note: The system automatically extracts memorizable team information from every
- conversation in the background. Use this tool primarily for:
- * Explicit requests: "let's remember that", "note this decision", "forget X"
- * Restructuring or reorganizing the team memory document
- * Correcting outdated or wrong entries
- - Skip truly ephemeral info (one-off questions, greetings, session logistics).
+ - You are the sole mechanism for persisting team memory — there is no background extraction.
+ Call update_memory when:
+ * A team member explicitly asks to remember or forget something
+ * The conversation surfaces durable team decisions, conventions, or facts
+ that will matter in future conversations
+ - Do not store short-lived or ephemeral info: one-off questions, greetings,
+ session logistics, or things that only matter for the current task.
- Args:
- updated_memory: The FULL updated markdown document (not a diff).
Merge new facts with existing ones, update contradictions, remove outdated entries.
Treat every update as a curation pass — consolidate, don't just append.
- Include inline dates (YYYY-MM) on decisions and time-sensitive entries.
+ - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in .
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
- ## Team decisions (pinned) — agreed-upon choices with rationale and date
+ ## Team decisions (pinned) — agreed-upon choices with rationale
## Conventions (pinned) — coding standards, tools, processes, naming patterns
## Key facts — where things are, how things work, team structure
## Current priorities — active projects, deadlines, blockers
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- - Each time-sensitive entry MUST include a (YYYY-MM) date suffix.
- Sections with `(pinned)` in the heading are protected — the system will reject any
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
- During consolidation, prioritize keeping: pinned sections > key facts > current priorities.
@@ -317,28 +327,26 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
_MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
"update_memory": {
"private": """
-- contains "## About the user (pinned)\\n- Name: Alex"
- User: "I'm a university student, explain astrophage to me"
- - Memory is not yet persisted AND the user casually shared that they are a student.
- You MUST call update_memory to persist the seed plus the new fact:
- update_memory(updated_memory="## About the user (pinned)\\n- Name: Alex\\n- University student\\n")
-- User: "Remember that I prefer TypeScript over JavaScript"
- - Timeless preference, no date needed. You see the current and merge:
- update_memory(updated_memory="## About the user (pinned)\\n- Senior developer\\n\\n## Preferences\\n- Prefers TypeScript over JavaScript\\n...")
-- User: "I actually moved to Google last month"
- - Fact that changes over time, include date:
- update_memory(updated_memory="## About the user (pinned)\\n- Senior developer at Google (since 2026-03, previously Acme Corp)\\n...")
-- User: "I'm building a SaaS app with Next.js and Supabase"
- - Implicit project info shared as context. Save it:
- update_memory(updated_memory="## About the user (pinned)\\n- Name: Alex\\n\\n## Current context\\n- Building a SaaS app with Next.js and Supabase (2026-04)\\n")
+- is empty. User: "I'm a space enthusiast, explain astrophage to me"
+ - The user casually shared a durable fact about themselves. Save it:
+ update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n")
+- User: "Remember that I prefer concise answers over detailed explanations"
+ - Durable preference. You see the current and merge:
+ update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
+- User: "I actually moved to Tokyo last month"
+ - Updated fact, date prefix reflects when recorded:
+ update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
+- User: "I'm a freelance photographer working on a nature documentary"
+ - Durable background info. Save it under About the user:
+ update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
""",
"shared": """
-- User: "Let's remember that we decided to use GraphQL"
- - Decision with date:
- update_memory(updated_memory="## Team decisions (pinned)\\n- 2026-04: Adopted GraphQL over REST for new APIs\\n...")
-- User: "Our deploy process uses Railway auto-deploys"
- - Key fact, no date needed:
- update_memory(updated_memory="## Key facts\\n- Deploy pipeline: git push -> Railway auto-deploys in ~3min\\n...")
+- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
+ - Durable team decision:
+ update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
+- User: "Our office is in downtown Seattle, 5th floor"
+ - Durable team fact:
+ update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...")
""",
},
}
diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
index d8172bfcd..991e8338e 100644
--- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py
+++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
@@ -6,9 +6,10 @@ always sees the current memory in / tags injected
by MemoryInjectionMiddleware, so it passes the FULL updated document each time.
Overflow handling:
- - Soft limit (15K chars): advisory warning returned alongside a successful save.
- - Hard limit (25K chars): save rejected; an automatic LLM-driven consolidation
- is attempted before falling back to the error.
+ - Soft limit (18K chars): an automatic LLM-driven consolidation is attempted
+ to proactively keep memory lean. The save always succeeds.
+ - Hard limit (25K chars): save rejected if memory still exceeds this after
+ consolidation.
- Pinned sections: headings containing ``(pinned)`` are protected — the system
rejects any update that drops them and auto-restores them during consolidation.
- Diff validation: warns when entire ``##`` sections are dropped or when the
@@ -31,7 +32,7 @@ from app.db import SearchSpace, User
logger = logging.getLogger(__name__)
-MEMORY_SOFT_LIMIT = 15_000
+MEMORY_SOFT_LIMIT = 18_000
MEMORY_HARD_LIMIT = 25_000
_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE)
@@ -188,7 +189,7 @@ RULES:
preferences > current context.
5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
6. Each entry must be a single bullet point.
-7. Preserve (YYYY-MM) date suffixes on time-sensitive entries.
+7. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
8. Output ONLY the consolidated markdown — no explanations, no wrapping.
@@ -259,25 +260,19 @@ async def _save_memory(
if pinned_err:
return {"status": "error", "message": pinned_err}
- # --- hard-limit gate with auto-consolidation fallback ---
+ # --- auto-consolidate proactively at the soft limit ---
+ if len(content) > MEMORY_SOFT_LIMIT and llm is not None:
+ consolidated = await _auto_consolidate(content, llm)
+ if consolidated is not None:
+ if old_memory:
+ consolidated = _restore_missing_pinned(old_memory, consolidated)
+ if len(consolidated) < len(content):
+ content = consolidated
+
+ # --- hard-limit gate (reject if still too large after consolidation) ---
size_err = _validate_memory_size(content)
if size_err:
- if llm is None:
- return size_err
-
- consolidated = await _auto_consolidate(content, llm)
- if consolidated is None:
- return size_err
-
- # Restore any pinned sections the consolidation LLM may have dropped
- if old_memory:
- consolidated = _restore_missing_pinned(old_memory, consolidated)
-
- recheck = _validate_memory_size(consolidated)
- if recheck:
- return recheck
-
- content = consolidated
+ return size_err
# --- persist ---
try:
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index d49400c18..0a6c34e81 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -37,7 +37,6 @@ from app.agents.new_chat.llm_config import (
load_agent_config,
load_llm_config_from_yaml,
)
-from app.agents.new_chat.memory_extraction import extract_and_save_memory
from app.db import (
ChatVisibility,
NewChatMessage,
@@ -60,8 +59,6 @@ from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_hea
_perf_log = get_perf_logger()
-_background_tasks: set[asyncio.Task] = set()
-
def format_mentioned_surfsense_docs_as_context(
documents: list[SurfsenseDocsDocument],
@@ -1525,19 +1522,6 @@ async def stream_new_chat(
yield streaming_service.format_done()
return
- if user_id and llm is not None:
- _mem_task = asyncio.create_task(
- extract_and_save_memory(
- user_message=user_query,
- user_id=user_id,
- search_space_id=search_space_id,
- thread_visibility=visibility,
- llm=llm,
- )
- )
- _background_tasks.add(_mem_task)
- _mem_task.add_done_callback(_background_tasks.discard)
-
# If the title task didn't finish during streaming, await it now
if title_task is not None and not title_emitted:
generated_title = await title_task
diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
index e8c632eb3..85f2db695 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
@@ -135,7 +135,7 @@ export function MemoryContent() {
onClick={handleClear}
disabled={saving || !savedMemory}
>
- Clear All
+ Reset Memory