diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py
new file mode 100644
index 000000000..ceff6ff41
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py
@@ -0,0 +1,115 @@
+"""Background memory extraction for the SurfSense agent.
+
+After each agent response, if the agent did not call ``update_memory`` during
+the turn, this module runs a lightweight LLM call to decide whether the user's
+message contains any long-term information worth persisting.
+
+Only user (personal) memory is handled here — team memory relies on explicit
+agent calls.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+from uuid import UUID
+
+from langchain_core.messages import HumanMessage
+from sqlalchemy import select
+
+from app.agents.new_chat.tools.update_memory import _save_memory
+from app.db import User, shielded_async_session
+
+logger = logging.getLogger(__name__)
+
+_MEMORY_EXTRACT_PROMPT = """\
+You are a memory extraction assistant. Analyze the user's message and decide \
+if it contains any long-term information worth persisting to memory.
+
+Worth remembering: preferences, background/identity, goals, projects, \
+instructions, tools/languages they use, decisions, expertise, workplace — \
+durable facts that will matter in future conversations.
+
+NOT worth remembering: greetings, one-off factual questions, session \
+logistics, ephemeral requests, follow-up clarifications with no new personal \
+info, things that only matter for the current task.
+
+If the message contains memorizable information, output the FULL updated \
+memory document with the new facts merged into the existing content. Follow \
+these rules:
+- Use the same ## section structure as the existing memory.
+- Keep entries as single concise bullet points (under 120 chars each).
+- Every bullet MUST start with a (YYYY-MM-DD) date prefix.
+- If a new fact contradicts an existing entry, update the existing entry.
+- Do not duplicate information that is already present.
+- Standard sections: \
+"## About the user", "## Preferences", "## Instructions"
+
+If nothing is worth remembering, output exactly: NO_UPDATE
+
+
+{current_memory}
+
+
+
+{user_message}
+"""
+
+
+async def extract_and_save_memory(
+ *,
+ user_message: str,
+ user_id: str | None,
+ llm: Any,
+) -> None:
+ """Background task: extract memorizable info and persist it.
+
+ Designed to be fire-and-forget — catches all exceptions internally.
+ """
+ if not user_id:
+ return
+
+ try:
+ uid = UUID(user_id) if isinstance(user_id, str) else user_id
+
+ async with shielded_async_session() as session:
+ result = await session.execute(select(User).where(User.id == uid))
+ user = result.scalars().first()
+ if not user:
+ return
+
+ old_memory = user.memory_md
+ prompt = _MEMORY_EXTRACT_PROMPT.format(
+ current_memory=old_memory or "(empty)",
+ user_message=user_message,
+ )
+ response = await llm.ainvoke(
+ [HumanMessage(content=prompt)],
+ config={"tags": ["surfsense:internal", "memory-extraction"]},
+ )
+ text = (
+ response.content
+ if isinstance(response.content, str)
+ else str(response.content)
+ ).strip()
+
+ if text == "NO_UPDATE" or not text:
+ logger.debug("Memory extraction: no update needed (user %s)", uid)
+ return
+
+ save_result = await _save_memory(
+ updated_memory=text,
+ old_memory=old_memory,
+ llm=llm,
+ apply_fn=lambda content: setattr(user, "memory_md", content),
+ commit_fn=session.commit,
+ rollback_fn=session.rollback,
+ label="memory",
+ )
+ logger.info(
+ "Background memory extraction for user %s: %s",
+ uid,
+ save_result.get("status"),
+ )
+ except Exception:
+ logger.exception("Background user memory extraction failed")
diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
index 05b8d2be3..cd09e6b45 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
@@ -19,7 +19,7 @@ from langgraph.runtime import Runtime
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
-from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
+from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT
from app.db import ChatVisibility, SearchSpace, User, shielded_async_session
logger = logging.getLogger(__name__)
@@ -70,6 +70,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
f"{user_memory}\n"
f""
)
+ if chars > MEMORY_SOFT_LIMIT:
+ memory_blocks.append(
+ f"Your personal memory is at "
+ f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
+ f"the hard limit. On your next update_memory call, consolidate "
+ f"by merging duplicates, removing outdated entries, and "
+ f"shortening descriptions before adding anything new."
+ f""
+ )
if self.visibility == ChatVisibility.SEARCH_SPACE:
team_memory = await self._load_team_memory(session)
@@ -80,6 +89,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
f"{team_memory}\n"
f""
)
+ if chars > MEMORY_SOFT_LIMIT:
+ memory_blocks.append(
+ f"Team memory is at "
+ f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
+ f"the hard limit. On your next update_memory call, consolidate "
+ f"by merging duplicates, removing outdated entries, and "
+ f"shortening descriptions before adding anything new."
+ f""
+ )
if not memory_blocks:
return None
diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index f811deda9..403019d96 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -271,8 +271,7 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
`limit` attributes show your current usage and the maximum allowed size.
- This is your curated long-term memory — the distilled essence of what you know about
the user, not raw conversation logs.
- - You are the sole mechanism for persisting memory — there is no background extraction.
- Call update_memory when:
+ - Call update_memory when:
* The user explicitly asks to remember or forget something
* The user shares durable facts or preferences that will matter in future conversations
- The user's name is already provided via — do not store it in memory.
@@ -285,21 +284,18 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in .
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
- ## About the user (pinned) — role, background, company
+ ## About the user — role, background, company
## Preferences — languages, tools, frameworks, response style
- ## Instructions (pinned) — standing instructions, things to always/never do
+ ## Instructions — standing instructions, things to always/never do
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- - Sections with `(pinned)` in the heading are protected — the system will reject any
- update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
- - During consolidation, prioritize keeping: pinned sections > preferences.
+ - During consolidation, prioritize keeping: identity/instructions > preferences.
""",
"shared": """
- update_memory: Update the team's shared memory document for this search space.
- Your current team memory is already in in your context. The `chars`
and `limit` attributes show current usage and the maximum allowed size.
- This is the team's curated long-term memory — decisions, conventions, key facts.
- - You are the sole mechanism for persisting team memory — there is no background extraction.
- Call update_memory when:
+ - Call update_memory when:
* A team member explicitly asks to remember or forget something
* The conversation surfaces durable team decisions, conventions, or facts
that will matter in future conversations
@@ -312,14 +308,12 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in .
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
- ## Team decisions (pinned) — agreed-upon choices with rationale
- ## Conventions (pinned) — coding standards, tools, processes, naming patterns
+ ## Team decisions — agreed-upon choices with rationale
+ ## Conventions — coding standards, tools, processes, naming patterns
## Key facts — where things are, how things work, team structure
## Current priorities — active projects, deadlines, blockers
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- - Sections with `(pinned)` in the heading are protected — the system will reject any
- update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
- - During consolidation, prioritize keeping: pinned sections > key facts > current priorities.
+ - During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.
""",
},
}
@@ -329,21 +323,21 @@ _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
"private": """
- is empty. User: "I'm a space enthusiast, explain astrophage to me"
- The user casually shared a durable fact about themselves. Save it:
- update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n")
+ update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n")
- User: "Remember that I prefer concise answers over detailed explanations"
- Durable preference. You see the current and merge:
- update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
+ update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
- User: "I actually moved to Tokyo last month"
- Updated fact, date prefix reflects when recorded:
- update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
+ update_memory(updated_memory="## About the user\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
- User: "I'm a freelance photographer working on a nature documentary"
- Durable background info. Save it under About the user:
- update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
+ update_memory(updated_memory="## About the user\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
""",
"shared": """
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
- Durable team decision:
- update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
+ update_memory(updated_memory="## Team decisions\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
- User: "Our office is in downtown Seattle, 5th floor"
- Durable team fact:
update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...")
diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
index 991e8338e..b487fb4a0 100644
--- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py
+++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
@@ -6,12 +6,10 @@ always sees the current memory in / tags injected
by MemoryInjectionMiddleware, so it passes the FULL updated document each time.
Overflow handling:
- - Soft limit (18K chars): an automatic LLM-driven consolidation is attempted
- to proactively keep memory lean. The save always succeeds.
- - Hard limit (25K chars): save rejected if memory still exceeds this after
- consolidation.
- - Pinned sections: headings containing ``(pinned)`` are protected — the system
- rejects any update that drops them and auto-restores them during consolidation.
+ - Soft limit (18K chars): a warning is returned telling the agent to
+ consolidate on the next update.
+ - Hard limit (25K chars): a forced LLM-driven rewrite compresses the document.
+ If it still exceeds the limit after rewriting, the save is rejected.
- Diff validation: warns when entire ``##`` sections are dropped or when the
document shrinks by more than 60%.
"""
@@ -35,74 +33,9 @@ logger = logging.getLogger(__name__)
MEMORY_SOFT_LIMIT = 18_000
MEMORY_HARD_LIMIT = 25_000
-_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE)
_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)
-# ---------------------------------------------------------------------------
-# Pinned-section helpers
-# ---------------------------------------------------------------------------
-
-
-def _extract_pinned_headings(memory: str) -> set[str]:
- """Return the set of ``## …`` headings that contain ``(pinned)``."""
- return set(_PINNED_RE.findall(memory))
-
-
-def _extract_section_map(memory: str) -> dict[str, str]:
- """Split *memory* into ``{heading_text: full_section_content}``."""
- sections: dict[str, str] = {}
- parts = _SECTION_HEADING_RE.split(memory)
- # parts: [preamble, heading1, body1, heading2, body2, …]
- for i in range(1, len(parts) - 1, 2):
- heading = parts[i].strip()
- body = parts[i + 1]
- sections[heading] = f"## {heading}\n{body}"
- return sections
-
-
-def _validate_pinned_preserved(old_memory: str | None, new_memory: str) -> str | None:
- """Return an error message if pinned headings from *old_memory* are missing
- in *new_memory*, else ``None``."""
- if not old_memory:
- return None
- old_pinned = _extract_pinned_headings(old_memory)
- if not old_pinned:
- return None
- new_pinned = _extract_pinned_headings(new_memory)
- dropped = old_pinned - new_pinned
- if dropped:
- names = ", ".join(sorted(dropped))
- return (
- f"Cannot remove pinned sections: {names}. "
- "These sections are protected and must be preserved. "
- "Re-include them and call update_memory again."
- )
- return None
-
-
-def _restore_missing_pinned(old_memory: str, consolidated: str) -> str:
- """Prepend any pinned sections from *old_memory* that are absent in
- *consolidated*."""
- old_pinned = _extract_pinned_headings(old_memory)
- if not old_pinned:
- return consolidated
- new_pinned = _extract_pinned_headings(consolidated)
- dropped = old_pinned - new_pinned
- if not dropped:
- return consolidated
-
- old_sections = _extract_section_map(old_memory)
- restored_parts: list[str] = []
- for heading in sorted(dropped):
- raw_heading = heading.removeprefix("## ").strip()
- if raw_heading in old_sections:
- restored_parts.append(old_sections[raw_heading].rstrip())
- if restored_parts:
- return "\n\n".join(restored_parts) + "\n\n" + consolidated
- return consolidated
-
-
# ---------------------------------------------------------------------------
# Diff validation
# ---------------------------------------------------------------------------
@@ -173,37 +106,35 @@ def _soft_warning(content: str) -> str | None:
# ---------------------------------------------------------------------------
-# Auto-consolidation via a separate LLM call
+# Forced rewrite when memory exceeds the hard limit
# ---------------------------------------------------------------------------
-_CONSOLIDATION_PROMPT = """\
+_FORCED_REWRITE_PROMPT = """\
You are a memory curator. The following memory document exceeds the character \
limit and must be shortened.
RULES:
1. Rewrite the document to be under {target} characters.
-2. Sections whose headings contain "(pinned)" MUST be preserved EXACTLY as-is \
- — do not modify, shorten, or remove them.
-3. Only consolidate non-pinned content.
-4. Priority for keeping content: pinned sections > identity/instructions > \
- preferences > current context.
-5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
-6. Each entry must be a single bullet point.
-7. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
-8. Output ONLY the consolidated markdown — no explanations, no wrapping.
+2. Preserve all ## section headings.
+3. Priority for keeping content: identity/instructions > preferences > \
+ current context.
+4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
+5. Each entry must be a single bullet point.
+6. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
+7. Output ONLY the consolidated markdown — no explanations, no wrapping.
{content}
"""
-async def _auto_consolidate(content: str, llm: Any) -> str | None:
- """Use a focused LLM call to consolidate *content* under the soft limit.
+async def _forced_rewrite(content: str, llm: Any) -> str | None:
+ """Use a focused LLM call to compress *content* under the hard limit.
- Returns the consolidated string, or ``None`` if consolidation fails.
+ Returns the rewritten string, or ``None`` if the call fails.
"""
try:
- prompt = _CONSOLIDATION_PROMPT.format(target=MEMORY_SOFT_LIMIT, content=content)
+ prompt = _FORCED_REWRITE_PROMPT.format(target=MEMORY_HARD_LIMIT, content=content)
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal"]},
@@ -215,7 +146,7 @@ async def _auto_consolidate(content: str, llm: Any) -> str | None:
)
return text.strip()
except Exception:
- logger.exception("Auto-consolidation LLM call failed")
+ logger.exception("Forced rewrite LLM call failed")
return None
@@ -234,16 +165,17 @@ async def _save_memory(
rollback_fn,
label: str,
) -> dict[str, Any]:
- """Validate, optionally auto-consolidate, save, and return a response dict.
+ """Validate, optionally force-rewrite if over the hard limit, save, and
+ return a response dict.
Parameters
----------
updated_memory : str
The new document the agent submitted.
old_memory : str | None
- The previously persisted document (for diff / pinned checks).
+ The previously persisted document (for diff checks).
llm : Any | None
- LLM instance for auto-consolidation (may be ``None``).
+ LLM instance for forced rewrite (may be ``None``).
apply_fn : callable(str) -> None
Callback that sets the new memory on the ORM object.
commit_fn : coroutine
@@ -255,21 +187,13 @@ async def _save_memory(
"""
content = updated_memory
- # --- pinned-section gate (before any size check) ---
- pinned_err = _validate_pinned_preserved(old_memory, content)
- if pinned_err:
- return {"status": "error", "message": pinned_err}
+ # --- forced rewrite if over the hard limit ---
+ if len(content) > MEMORY_HARD_LIMIT and llm is not None:
+ rewritten = await _forced_rewrite(content, llm)
+ if rewritten is not None and len(rewritten) < len(content):
+ content = rewritten
- # --- auto-consolidate proactively at the soft limit ---
- if len(content) > MEMORY_SOFT_LIMIT and llm is not None:
- consolidated = await _auto_consolidate(content, llm)
- if consolidated is not None:
- if old_memory:
- consolidated = _restore_missing_pinned(old_memory, consolidated)
- if len(consolidated) < len(content):
- content = consolidated
-
- # --- hard-limit gate (reject if still too large after consolidation) ---
+ # --- hard-limit gate (reject if still too large after rewrite) ---
size_err = _validate_memory_size(content)
if size_err:
return size_err
@@ -290,7 +214,7 @@ async def _save_memory(
}
if content is not updated_memory:
- resp["notice"] = "Memory was automatically consolidated to fit within limits."
+ resp["notice"] = "Memory was automatically rewritten to fit within limits."
diff_warnings = _validate_diff(old_memory, content)
if diff_warnings:
diff --git a/surfsense_backend/app/routes/memory_routes.py b/surfsense_backend/app/routes/memory_routes.py
index aa8b1be28..6ec535626 100644
--- a/surfsense_backend/app/routes/memory_routes.py
+++ b/surfsense_backend/app/routes/memory_routes.py
@@ -1,13 +1,24 @@
"""Routes for user memory management (personal memory.md)."""
+from __future__ import annotations
+
+import logging
+
from fastapi import APIRouter, Depends, HTTPException
+from langchain_core.messages import HumanMessage
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
-from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
+from app.agents.new_chat.llm_config import (
+ create_chat_litellm_from_agent_config,
+ load_agent_llm_config_for_search_space,
+)
+from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory
from app.db import User, get_async_session
from app.users import current_active_user
+logger = logging.getLogger(__name__)
+
router = APIRouter()
@@ -19,6 +30,33 @@ class MemoryUpdate(BaseModel):
memory_md: str
+class MemoryEditRequest(BaseModel):
+ query: str
+ search_space_id: int
+
+
+_MEMORY_EDIT_PROMPT = """\
+You are a memory editor. The user wants to modify their memory document. \
+Apply the user's instruction to the existing memory document and output the \
+FULL updated document.
+
+RULES:
+1. If the instruction asks to add something, add it in the appropriate \
+## section with a (YYYY-MM-DD) date prefix using today's date.
+2. If the instruction asks to remove something, remove the matching entry.
+3. If the instruction asks to change something, update the matching entry.
+4. Preserve the existing ## section structure and all other entries.
+5. Output ONLY the updated markdown — no explanations, no wrapping.
+
+
+{current_memory}
+
+
+
+{instruction}
+"""
+
+
@router.get("/users/me/memory", response_model=MemoryRead)
async def get_user_memory(
user: User = Depends(current_active_user),
@@ -44,3 +82,60 @@ async def update_user_memory(
await session.commit()
await session.refresh(user, ["memory_md"])
return MemoryRead(memory_md=user.memory_md or "")
+
+
+@router.post("/users/me/memory/edit", response_model=MemoryRead)
+async def edit_user_memory(
+ body: MemoryEditRequest,
+ user: User = Depends(current_active_user),
+ session: AsyncSession = Depends(get_async_session),
+):
+ """Apply a natural language edit to the user's personal memory via LLM."""
+ agent_config = await load_agent_llm_config_for_search_space(
+ session, body.search_space_id
+ )
+ if not agent_config:
+ raise HTTPException(status_code=500, detail="No LLM configuration available.")
+ llm = create_chat_litellm_from_agent_config(agent_config)
+ if not llm:
+ raise HTTPException(status_code=500, detail="Failed to create LLM instance.")
+
+ await session.refresh(user, ["memory_md"])
+ current_memory = user.memory_md or ""
+
+ prompt = _MEMORY_EDIT_PROMPT.format(
+ current_memory=current_memory or "(empty)",
+ instruction=body.query,
+ )
+ try:
+ response = await llm.ainvoke(
+ [HumanMessage(content=prompt)],
+ config={"tags": ["surfsense:internal", "memory-edit"]},
+ )
+ updated = (
+ response.content
+ if isinstance(response.content, str)
+ else str(response.content)
+ ).strip()
+ except Exception as e:
+ logger.exception("Memory edit LLM call failed: %s", e)
+ raise HTTPException(status_code=500, detail="Memory edit failed.") from e
+
+ if not updated:
+ raise HTTPException(status_code=400, detail="LLM returned empty result.")
+
+ result = await _save_memory(
+ updated_memory=updated,
+ old_memory=current_memory,
+ llm=llm,
+ apply_fn=lambda content: setattr(user, "memory_md", content),
+ commit_fn=session.commit,
+ rollback_fn=session.rollback,
+ label="memory",
+ )
+
+ if result.get("status") == "error":
+ raise HTTPException(status_code=400, detail=result["message"])
+
+ await session.refresh(user, ["memory_md"])
+ return MemoryRead(memory_md=user.memory_md or "")
diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py
index 0650b8dfe..e66fc5f0c 100644
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@@ -1,11 +1,17 @@
import logging
from fastapi import APIRouter, Depends, HTTPException
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel as PydanticBaseModel
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
-from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
+from app.agents.new_chat.llm_config import (
+ create_chat_litellm_from_agent_config,
+ load_agent_llm_config_for_search_space,
+)
+from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory
from app.config import config
from app.db import (
ImageGenerationConfig,
@@ -35,6 +41,32 @@ logger = logging.getLogger(__name__)
router = APIRouter()
+class _TeamMemoryEditRequest(PydanticBaseModel):
+ query: str
+
+
+_TEAM_MEMORY_EDIT_PROMPT = """\
+You are a memory editor for a team workspace. The user wants to modify the \
+team's shared memory document. Apply the user's instruction to the existing \
+memory document and output the FULL updated document.
+
+RULES:
+1. If the instruction asks to add something, add it in the appropriate \
+## section with a (YYYY-MM-DD) date prefix using today's date.
+2. If the instruction asks to remove something, remove the matching entry.
+3. If the instruction asks to change something, update the matching entry.
+4. Preserve the existing ## section structure and all other entries.
+5. Output ONLY the updated markdown — no explanations, no wrapping.
+
+
+{current_memory}
+
+
+
+{instruction}
+"""
+
+
async def create_default_roles_and_membership(
session: AsyncSession,
search_space_id: int,
@@ -280,6 +312,79 @@ async def update_search_space(
) from e
+@router.post(
+ "/searchspaces/{search_space_id}/memory/edit",
+ response_model=SearchSpaceRead,
+)
+async def edit_team_memory(
+ search_space_id: int,
+ body: _TeamMemoryEditRequest,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """Apply a natural language edit to the team memory via LLM."""
+ await check_search_space_access(session, user, search_space_id)
+
+ agent_config = await load_agent_llm_config_for_search_space(
+ session, search_space_id
+ )
+ if not agent_config:
+ raise HTTPException(status_code=500, detail="No LLM configuration available.")
+ llm = create_chat_litellm_from_agent_config(agent_config)
+ if not llm:
+ raise HTTPException(status_code=500, detail="Failed to create LLM instance.")
+
+ result = await session.execute(
+ select(SearchSpace).filter(SearchSpace.id == search_space_id)
+ )
+ db_search_space = result.scalars().first()
+ if not db_search_space:
+ raise HTTPException(status_code=404, detail="Search space not found")
+
+ current_memory = db_search_space.shared_memory_md or ""
+
+ prompt = _TEAM_MEMORY_EDIT_PROMPT.format(
+ current_memory=current_memory or "(empty)",
+ instruction=body.query,
+ )
+ try:
+ response = await llm.ainvoke(
+ [HumanMessage(content=prompt)],
+ config={"tags": ["surfsense:internal", "memory-edit"]},
+ )
+ updated = (
+ response.content
+ if isinstance(response.content, str)
+ else str(response.content)
+ ).strip()
+ except Exception as e:
+ logger.exception("Team memory edit LLM call failed: %s", e)
+ raise HTTPException(
+ status_code=500, detail="Team memory edit failed."
+ ) from e
+
+ if not updated:
+ raise HTTPException(status_code=400, detail="LLM returned empty result.")
+
+ save_result = await _save_memory(
+ updated_memory=updated,
+ old_memory=current_memory,
+ llm=llm,
+ apply_fn=lambda content: setattr(
+ db_search_space, "shared_memory_md", content
+ ),
+ commit_fn=session.commit,
+ rollback_fn=session.rollback,
+ label="team memory",
+ )
+
+ if save_result.get("status") == "error":
+ raise HTTPException(status_code=400, detail=save_result["message"])
+
+ await session.refresh(db_search_space)
+ return db_search_space
+
+
@router.delete("/searchspaces/{search_space_id}", response_model=dict)
async def delete_search_space(
search_space_id: int,
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 0a6c34e81..e12d189ac 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -30,6 +30,7 @@ from sqlalchemy.orm import selectinload
from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
from app.agents.new_chat.checkpointer import get_checkpointer
+from app.agents.new_chat.memory_extraction import extract_and_save_memory
from app.agents.new_chat.llm_config import (
AgentConfig,
create_chat_litellm_from_agent_config,
@@ -139,6 +140,7 @@ class StreamResult:
is_interrupted: bool = False
interrupt_value: dict[str, Any] | None = None
sandbox_files: list[str] = field(default_factory=list) # unused, kept for compat
+ agent_called_update_memory: bool = False
async def _stream_agent_events(
@@ -181,6 +183,7 @@ async def _stream_agent_events(
last_active_step_items: list[str] = initial_step_items or []
just_finished_tool: bool = False
active_tool_depth: int = 0 # Track nesting: >0 means we're inside a tool
+ called_update_memory: bool = False
def next_thinking_step_id() -> str:
nonlocal thinking_step_counter
@@ -488,6 +491,9 @@ async def _stream_agent_events(
tool_name = event.get("name", "unknown_tool")
raw_output = event.get("data", {}).get("output", "")
+ if tool_name == "update_memory":
+ called_update_memory = True
+
if hasattr(raw_output, "content"):
content = raw_output.content
if isinstance(content, str):
@@ -1109,6 +1115,7 @@ async def _stream_agent_events(
yield completion_event
result.accumulated_text = accumulated_text
+ result.agent_called_update_memory = called_update_memory
state = await agent.aget_state(config)
is_interrupted = state.tasks and any(task.interrupts for task in state.tasks)
@@ -1538,6 +1545,16 @@ async def stream_new_chat(
chat_id, generated_title
)
+ # Fire background memory extraction if the agent didn't handle it
+ if not stream_result.agent_called_update_memory and user_id:
+ asyncio.create_task(
+ extract_and_save_memory(
+ user_message=user_query,
+ user_id=user_id,
+ llm=llm,
+ )
+ )
+
# Finish the step and message
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
index 85f2db695..8403c641c 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
@@ -1,13 +1,16 @@
"use client";
-import { Info } from "lucide-react";
-import { useCallback, useEffect, useState } from "react";
+import { useAtomValue } from "jotai";
+import { Info, Send } from "lucide-react";
+import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { z } from "zod";
+import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { PlateEditor } from "@/components/editor/plate-editor";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import { Spinner } from "@/components/ui/spinner";
+import { Textarea } from "@/components/ui/textarea";
import { baseApiService } from "@/lib/apis/base-api.service";
const MEMORY_HARD_LIMIT = 25_000;
@@ -17,17 +20,19 @@ const MemoryReadSchema = z.object({
});
export function MemoryContent() {
+ const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
const [memory, setMemory] = useState("");
- const [savedMemory, setSavedMemory] = useState("");
const [loading, setLoading] = useState(true);
const [saving, setSaving] = useState(false);
+ const [editQuery, setEditQuery] = useState("");
+ const [editing, setEditing] = useState(false);
+ const textareaRef = useRef(null);
const fetchMemory = useCallback(async () => {
try {
setLoading(true);
const data = await baseApiService.get("/api/v1/users/me/memory", MemoryReadSchema);
setMemory(data.memory_md);
- setSavedMemory(data.memory_md);
} catch {
toast.error("Failed to load memory");
} finally {
@@ -39,21 +44,6 @@ export function MemoryContent() {
fetchMemory();
}, [fetchMemory]);
- const handleSave = async () => {
- try {
- setSaving(true);
- const data = await baseApiService.put("/api/v1/users/me/memory", MemoryReadSchema, {
- body: { memory_md: memory },
- });
- setSavedMemory(data.memory_md);
- toast.success("Memory saved");
- } catch {
- toast.error("Failed to save memory");
- } finally {
- setSaving(false);
- }
- };
-
const handleClear = async () => {
try {
setSaving(true);
@@ -61,7 +51,6 @@ export function MemoryContent() {
body: { memory_md: "" },
});
setMemory(data.memory_md);
- setSavedMemory(data.memory_md);
toast.success("Memory cleared");
} catch {
toast.error("Failed to clear memory");
@@ -70,14 +59,33 @@ export function MemoryContent() {
}
};
- const handleMarkdownChange = useCallback((md: string) => {
- const trimmed = md.trim();
- setMemory(trimmed);
- }, []);
+ const handleEdit = async () => {
+ const query = editQuery.trim();
+ if (!query) return;
+
+ try {
+ setEditing(true);
+ const data = await baseApiService.post("/api/v1/users/me/memory/edit", MemoryReadSchema, {
+ body: { query, search_space_id: Number(activeSearchSpaceId) },
+ });
+ setMemory(data.memory_md);
+ setEditQuery("");
+ toast.success("Memory updated");
+ } catch {
+ toast.error("Failed to edit memory");
+ } finally {
+ setEditing(false);
+ }
+ };
+
+ const handleKeyDown = (e: React.KeyboardEvent) => {
+ if (e.key === "Enter" && !e.shiftKey) {
+ e.preventDefault();
+ handleEdit();
+ }
+ };
- const hasChanges = memory !== savedMemory;
const charCount = memory.length;
- const isOverLimit = charCount > MEMORY_HARD_LIMIT;
const getCounterColor = () => {
if (charCount > MEMORY_HARD_LIMIT) return "text-red-500";
@@ -101,18 +109,16 @@ export function MemoryContent() {
SurfSense uses this personal memory to personalize your responses across all
- conversations. Supports Markdown formatting.
+ conversations. Use the input below to add, update, or remove memory entries.
SurfSense uses this shared memory to provide team-wide context across all conversations
- in this search space. Supports Markdown formatting.
+ in this search space. Use the input below to add, update, or remove memory entries.