diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py
index ceff6ff41..e9f060457 100644
--- a/surfsense_backend/app/agents/new_chat/memory_extraction.py
+++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py
@@ -1,11 +1,8 @@
"""Background memory extraction for the SurfSense agent.
After each agent response, if the agent did not call ``update_memory`` during
-the turn, this module runs a lightweight LLM call to decide whether the user's
-message contains any long-term information worth persisting.
-
-Only user (personal) memory is handled here — team memory relies on explicit
-agent calls.
+the turn, this module can run a lightweight LLM call to decide whether the
+latest message contains long-term information worth persisting.
"""
from __future__ import annotations
@@ -18,7 +15,7 @@ from langchain_core.messages import HumanMessage
from sqlalchemy import select
from app.agents.new_chat.tools.update_memory import _save_memory
-from app.db import User, shielded_async_session
+from app.db import SearchSpace, User, shielded_async_session
logger = logging.getLogger(__name__)
@@ -55,6 +52,51 @@ If nothing is worth remembering, output exactly: NO_UPDATE
{user_message}
"""
+_TEAM_MEMORY_EXTRACT_PROMPT = """\
+You are a team-memory extraction assistant. Analyze the latest message and \
+decide if it contains durable TEAM-level information worth persisting.
+
+High-precision rule: if uncertain, output NO_UPDATE.
+
+Worth remembering (team-level only):
+- Explicit decisions (e.g. "we decided to use X")
+- Team conventions/standards (naming, review policy, coding norms)
+- Long-lived architecture/process facts
+- Stable project constraints, owners, recurring schedules
+
+NOT worth remembering:
+- Personal preferences or biography of one person
+- Questions, brainstorming, tentative ideas, or speculation
+- One-off requests, status updates, TODOs, logistics for this session
+- Anything not clearly adopted by the team
+
+If the message contains memorizable team information, output the FULL updated \
+team memory document with new facts merged into existing content. Follow rules:
+- Use the same ## section structure as the existing memory.
+- Keep entries as single concise bullet points (under 120 chars each).
+- Every bullet MUST start with a (YYYY-MM-DD) date prefix.
+- If a new fact contradicts an existing entry, update the existing entry.
+- Do not duplicate existing information.
+- NEVER use personal sections like "## About the user", "## Preferences", \
+ or "## Instructions".
+- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored.
+- Standard sections: "## Team decisions", "## Team conventions", \
+"## Key facts", "## Current priorities"
+
+If nothing is worth remembering, output exactly: NO_UPDATE
+
+
+{current_memory}
+
+
+
+{author}
+
+
+
+{user_message}
+"""
+
async def extract_and_save_memory(
*,
@@ -105,6 +147,7 @@ async def extract_and_save_memory(
commit_fn=session.commit,
rollback_fn=session.rollback,
label="memory",
+ scope="user",
)
logger.info(
"Background memory extraction for user %s: %s",
@@ -113,3 +156,69 @@ async def extract_and_save_memory(
)
except Exception:
logger.exception("Background user memory extraction failed")
+
+
+async def extract_and_save_team_memory(
+ *,
+ user_message: str,
+ search_space_id: int | None,
+ llm: Any,
+ author_display_name: str | None = None,
+) -> None:
+ """Background task: extract team-level memory and persist it.
+
+ Runs only for shared threads. Designed to be fire-and-forget and catches
+ exceptions internally.
+ """
+ if not search_space_id:
+ return
+
+ try:
+ async with shielded_async_session() as session:
+ result = await session.execute(
+ select(SearchSpace).where(SearchSpace.id == search_space_id)
+ )
+ space = result.scalars().first()
+ if not space:
+ return
+
+ old_memory = space.shared_memory_md
+ prompt = _TEAM_MEMORY_EXTRACT_PROMPT.format(
+ current_memory=old_memory or "(empty)",
+ author=author_display_name or "Unknown team member",
+ user_message=user_message,
+ )
+ response = await llm.ainvoke(
+ [HumanMessage(content=prompt)],
+ config={"tags": ["surfsense:internal", "team-memory-extraction"]},
+ )
+ text = (
+ response.content
+ if isinstance(response.content, str)
+ else str(response.content)
+ ).strip()
+
+ if text == "NO_UPDATE" or not text:
+ logger.debug(
+ "Team memory extraction: no update needed (space %s)",
+ search_space_id,
+ )
+ return
+
+ save_result = await _save_memory(
+ updated_memory=text,
+ old_memory=old_memory,
+ llm=llm,
+ apply_fn=lambda content: setattr(space, "shared_memory_md", content),
+ commit_fn=session.commit,
+ rollback_fn=session.rollback,
+ label="team memory",
+ scope="team",
+ )
+ logger.info(
+ "Background team memory extraction for space %s: %s",
+ search_space_id,
+ save_result.get("status"),
+ )
+ except Exception:
+ logger.exception("Background team memory extraction failed")
diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
index 137a71edf..74a9611fd 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
@@ -1,10 +1,8 @@
"""Memory injection middleware for the SurfSense agent.
-Loads the user's personal memory (User.memory_md) and, for shared threads,
-the team memory (SearchSpace.shared_memory_md) from the database and injects
-them into the system prompt as / XML blocks on
-every turn. This ensures the LLM always has the full memory context without
-requiring a tool call.
+Injects memory markdown into the system prompt on every turn:
+- Private threads: only personal memory ()
+- Shared threads: only team memory ()
"""
from __future__ import annotations
@@ -58,7 +56,25 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
memory_blocks: list[str] = []
async with shielded_async_session() as session:
- if self.user_id is not None:
+ if self.visibility == ChatVisibility.SEARCH_SPACE:
+ team_memory = await self._load_team_memory(session)
+ if team_memory:
+ chars = len(team_memory)
+ memory_blocks.append(
+ f'\n'
+ f"{team_memory}\n"
+ f""
+ )
+ if chars > MEMORY_SOFT_LIMIT:
+ memory_blocks.append(
+ f"Team memory is at "
+ f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
+ f"the hard limit. On your next update_memory call, consolidate "
+ f"by merging duplicates, removing outdated entries, and "
+ f"shortening descriptions before adding anything new."
+ f""
+ )
+ elif self.user_id is not None:
user_memory, display_name = await self._load_user_memory(session)
if display_name:
first_name = display_name.split()[0]
@@ -80,25 +96,6 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
f""
)
- if self.visibility == ChatVisibility.SEARCH_SPACE:
- team_memory = await self._load_team_memory(session)
- if team_memory:
- chars = len(team_memory)
- memory_blocks.append(
- f'\n'
- f"{team_memory}\n"
- f""
- )
- if chars > MEMORY_SOFT_LIMIT:
- memory_blocks.append(
- f"Team memory is at "
- f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
- f"the hard limit. On your next update_memory call, consolidate "
- f"by merging duplicates, removing outdated entries, and "
- f"shortening descriptions before adding anything new."
- f""
- )
-
if not memory_blocks:
return None
diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index 403019d96..e310d02eb 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -284,9 +284,13 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in .
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
- ## About the user — role, background, company
- ## Preferences — languages, tools, frameworks, response style
- ## Instructions — standing instructions, things to always/never do
+ ## About the user
+ ## Preferences
+ ## Instructions
+ - Section guidance:
+ * About the user: role, background, company, durable identity context
+ * Preferences: languages, tools, frameworks, response style preferences
+ * Instructions: standing instructions, things to always/never do
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- During consolidation, prioritize keeping: identity/instructions > preferences.
""",
@@ -295,6 +299,8 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Your current team memory is already in in your context. The `chars`
and `limit` attributes show current usage and the maximum allowed size.
- This is the team's curated long-term memory — decisions, conventions, key facts.
+ - NEVER store personal memory in team memory (e.g. personal bio, individual
+ preferences, or user-only standing instructions).
- Call update_memory when:
* A team member explicitly asks to remember or forget something
* The conversation surfaces durable team decisions, conventions, or facts
@@ -308,10 +314,15 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in .
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
- ## Team decisions — agreed-upon choices with rationale
- ## Conventions — coding standards, tools, processes, naming patterns
- ## Key facts — where things are, how things work, team structure
- ## Current priorities — active projects, deadlines, blockers
+ ## Team decisions
+ ## Conventions
+ ## Key facts
+ ## Current priorities
+ - Section guidance:
+ * Team decisions: agreed choices and durable technical/product decisions
+ * Conventions: coding standards, tools, processes, naming patterns
+ * Key facts: stable facts about org/team/system setup
+ * Current priorities: active projects, near-term goals, important blockers
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.
""",
diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
index e8eb967fa..24577e975 100644
--- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py
+++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
@@ -18,7 +18,7 @@ from __future__ import annotations
import logging
import re
-from typing import Any
+from typing import Any, Literal
from uuid import UUID
from langchain_core.messages import HumanMessage
@@ -34,6 +34,15 @@ MEMORY_SOFT_LIMIT = 18_000
MEMORY_HARD_LIMIT = 25_000
_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)
+_HEADING_NORMALIZE_RE = re.compile(r"\s+")
+
+_USER_ONLY_HEADINGS = {"about the user", "preferences", "instructions"}
+_TEAM_ONLY_HEADINGS = {
+ "team decisions",
+ "conventions",
+ "key facts",
+ "current priorities",
+}
# ---------------------------------------------------------------------------
@@ -46,6 +55,45 @@ def _extract_headings(memory: str) -> set[str]:
return set(_SECTION_HEADING_RE.findall(memory))
+def _normalize_heading(heading: str) -> str:
+ """Normalize heading text for robust scope checks."""
+ return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower())
+
+
+def _validate_memory_scope(
+ content: str, scope: Literal["user", "team"]
+) -> dict[str, Any] | None:
+ """Reject cross-scope headings (user sections in team memory and vice versa)."""
+ headings = {_normalize_heading(h) for h in _extract_headings(content)}
+ if not headings:
+ return None
+
+ if scope == "team":
+ leaked = sorted(headings & _USER_ONLY_HEADINGS)
+ if leaked:
+ return {
+ "status": "error",
+ "message": (
+ "Team memory cannot include personal sections: "
+ + ", ".join(leaked)
+ + ". Use team sections only."
+ ),
+ }
+ return None
+
+ leaked = sorted(headings & _TEAM_ONLY_HEADINGS)
+ if leaked:
+ return {
+ "status": "error",
+ "message": (
+ "User memory cannot include team sections: "
+ + ", ".join(leaked)
+ + ". Use personal sections only."
+ ),
+ }
+ return None
+
+
def _validate_diff(old_memory: str | None, new_memory: str) -> list[str]:
"""Return a list of warning strings about suspicious changes."""
if not old_memory:
@@ -166,6 +214,7 @@ async def _save_memory(
commit_fn,
rollback_fn,
label: str,
+ scope: Literal["user", "team"],
) -> dict[str, Any]:
"""Validate, optionally force-rewrite if over the hard limit, save, and
return a response dict.
@@ -200,6 +249,10 @@ async def _save_memory(
if size_err:
return size_err
+ scope_err = _validate_memory_scope(content, scope)
+ if scope_err:
+ return scope_err
+
# --- persist ---
try:
apply_fn(content)
@@ -270,6 +323,7 @@ def create_update_memory_tool(
commit_fn=db_session.commit,
rollback_fn=db_session.rollback,
label="memory",
+ scope="user",
)
except Exception as e:
logger.exception("Failed to update user memory: %s", e)
@@ -319,6 +373,7 @@ def create_update_team_memory_tool(
commit_fn=db_session.commit,
rollback_fn=db_session.rollback,
label="team memory",
+ scope="team",
)
except Exception as e:
logger.exception("Failed to update team memory: %s", e)
diff --git a/surfsense_backend/app/routes/memory_routes.py b/surfsense_backend/app/routes/memory_routes.py
index 6ec535626..565656f60 100644
--- a/surfsense_backend/app/routes/memory_routes.py
+++ b/surfsense_backend/app/routes/memory_routes.py
@@ -132,6 +132,7 @@ async def edit_user_memory(
commit_fn=session.commit,
rollback_fn=session.rollback,
label="memory",
+ scope="user",
)
if result.get("status") == "error":
diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py
index d29315c80..6c608c532 100644
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@@ -56,7 +56,9 @@ RULES:
2. If the instruction asks to remove something, remove the matching entry.
3. If the instruction asks to change something, update the matching entry.
4. Preserve the existing ## section structure and all other entries.
-5. Output ONLY the updated markdown — no explanations, no wrapping.
+5. NEVER use personal sections like "## About the user", "## Preferences", or
+ "## Instructions". Team memory must stay team-scoped.
+6. Output ONLY the updated markdown — no explanations, no wrapping.
{current_memory}
@@ -372,6 +374,7 @@ async def edit_team_memory(
commit_fn=session.commit,
rollback_fn=session.rollback,
label="team memory",
+ scope="team",
)
if save_result.get("status") == "error":
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 3c7b9af7e..fd118528e 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -37,7 +37,10 @@ from app.agents.new_chat.llm_config import (
load_agent_config,
load_llm_config_from_yaml,
)
-from app.agents.new_chat.memory_extraction import extract_and_save_memory
+from app.agents.new_chat.memory_extraction import (
+ extract_and_save_memory,
+ extract_and_save_team_memory,
+)
from app.db import (
ChatVisibility,
NewChatMessage,
@@ -1545,15 +1548,26 @@ async def stream_new_chat(
chat_id, generated_title
)
- # Fire background memory extraction if the agent didn't handle it
- if not stream_result.agent_called_update_memory and user_id:
- asyncio.create_task(
- extract_and_save_memory(
- user_message=user_query,
- user_id=user_id,
- llm=llm,
+ # Fire background memory extraction if the agent didn't handle it.
+ # Shared threads write to team memory; private threads write to user memory.
+ if not stream_result.agent_called_update_memory:
+ if visibility == ChatVisibility.SEARCH_SPACE:
+ asyncio.create_task(
+ extract_and_save_team_memory(
+ user_message=user_query,
+ search_space_id=search_space_id,
+ llm=llm,
+ author_display_name=current_user_display_name,
+ )
+ )
+ elif user_id:
+ asyncio.create_task(
+ extract_and_save_memory(
+ user_message=user_query,
+ user_id=user_id,
+ llm=llm,
+ )
)
- )
# Finish the step and message
yield streaming_service.format_finish_step()
diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py
new file mode 100644
index 000000000..caef29623
--- /dev/null
+++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py
@@ -0,0 +1,83 @@
+"""Unit tests for memory scope validation."""
+
+import pytest
+
+from app.agents.new_chat.tools.update_memory import _save_memory, _validate_memory_scope
+
+pytestmark = pytest.mark.unit
+
+
+class _Recorder:
+ def __init__(self) -> None:
+ self.applied_content: str | None = None
+ self.commit_calls = 0
+ self.rollback_calls = 0
+
+ def apply(self, content: str) -> None:
+ self.applied_content = content
+
+ async def commit(self) -> None:
+ self.commit_calls += 1
+
+ async def rollback(self) -> None:
+ self.rollback_calls += 1
+
+
+def test_validate_memory_scope_rejects_user_sections_in_team_scope() -> None:
+ content = "## About the user\n- (2026-04-10) Student studying DSA\n"
+ result = _validate_memory_scope(content, "team")
+ assert result is not None
+ assert result["status"] == "error"
+ assert "personal sections" in result["message"]
+
+
+def test_validate_memory_scope_rejects_team_sections_in_user_scope() -> None:
+ content = "## Team decisions\n- (2026-04-10) Python-first backend policy\n"
+ result = _validate_memory_scope(content, "user")
+ assert result is not None
+ assert result["status"] == "error"
+ assert "team sections" in result["message"]
+
+
+def test_validate_memory_scope_normalizes_heading_case_and_spacing() -> None:
+ content = "## About The User \n- (2026-04-10) Student\n"
+ result = _validate_memory_scope(content, "team")
+ assert result is not None
+ assert result["status"] == "error"
+
+
+@pytest.mark.asyncio
+async def test_save_memory_blocks_cross_scope_write_before_commit() -> None:
+ recorder = _Recorder()
+ result = await _save_memory(
+ updated_memory="## About the user\n- (2026-04-10) Student\n",
+ old_memory=None,
+ llm=None,
+ apply_fn=recorder.apply,
+ commit_fn=recorder.commit,
+ rollback_fn=recorder.rollback,
+ label="team memory",
+ scope="team",
+ )
+ assert result["status"] == "error"
+ assert recorder.commit_calls == 0
+ assert recorder.applied_content is None
+
+
+@pytest.mark.asyncio
+async def test_save_memory_allows_valid_scope_and_commits() -> None:
+ recorder = _Recorder()
+ content = "## Team decisions\n- (2026-04-10) Python-first backend policy\n"
+ result = await _save_memory(
+ updated_memory=content,
+ old_memory=None,
+ llm=None,
+ apply_fn=recorder.apply,
+ commit_fn=recorder.commit,
+ rollback_fn=recorder.rollback,
+ label="team memory",
+ scope="team",
+ )
+ assert result["status"] == "saved"
+ assert recorder.commit_calls == 1
+ assert recorder.applied_content == content