feat: implement background memory extraction and editing capabilities for user and team memory management, enhancing long-term memory persistence and user interaction

2026-05-11 00:32:38 +02:00 · 2026-04-10 00:21:55 +05:30 · 2026-04-10 00:21:55 +05:30 · 84fc72e596
commit 84fc72e596
parent cd72fa9a48
9 changed files with 534 additions and 224 deletions
--- a/surfsense_backend/app/agents/new_chat/memory_extraction.py
+++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py
@ -0,0 +1,115 @@
+"""Background memory extraction for the SurfSense agent.
+
+After each agent response, if the agent did not call ``update_memory`` during
+the turn, this module runs a lightweight LLM call to decide whether the user's
+message contains any long-term information worth persisting.
+
+Only user (personal) memory is handled here — team memory relies on explicit
+agent calls.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+from uuid import UUID
+
+from langchain_core.messages import HumanMessage
+from sqlalchemy import select
+
+from app.agents.new_chat.tools.update_memory import _save_memory
+from app.db import User, shielded_async_session
+
+logger = logging.getLogger(__name__)
+
+_MEMORY_EXTRACT_PROMPT = """\
+You are a memory extraction assistant. Analyze the user's message and decide \
+if it contains any long-term information worth persisting to memory.
+
+Worth remembering: preferences, background/identity, goals, projects, \
+instructions, tools/languages they use, decisions, expertise, workplace — \
+durable facts that will matter in future conversations.
+
+NOT worth remembering: greetings, one-off factual questions, session \
+logistics, ephemeral requests, follow-up clarifications with no new personal \
+info, things that only matter for the current task.
+
+If the message contains memorizable information, output the FULL updated \
+memory document with the new facts merged into the existing content. Follow \
+these rules:
+- Use the same ## section structure as the existing memory.
+- Keep entries as single concise bullet points (under 120 chars each).
+- Every bullet MUST start with a (YYYY-MM-DD) date prefix.
+- If a new fact contradicts an existing entry, update the existing entry.
+- Do not duplicate information that is already present.
+- Standard sections: \
+"## About the user", "## Preferences", "## Instructions"
+
+If nothing is worth remembering, output exactly: NO_UPDATE
+
+<current_memory>
+{current_memory}
+</current_memory>
+
+<user_message>
+{user_message}
+</user_message>"""
+
+
+async def extract_and_save_memory(
+    *,
+    user_message: str,
+    user_id: str | None,
+    llm: Any,
+) -> None:
+    """Background task: extract memorizable info and persist it.
+
+    Designed to be fire-and-forget — catches all exceptions internally.
+    """
+    if not user_id:
+        return
+
+    try:
+        uid = UUID(user_id) if isinstance(user_id, str) else user_id
+
+        async with shielded_async_session() as session:
+            result = await session.execute(select(User).where(User.id == uid))
+            user = result.scalars().first()
+            if not user:
+                return
+
+            old_memory = user.memory_md
+            prompt = _MEMORY_EXTRACT_PROMPT.format(
+                current_memory=old_memory or "(empty)",
+                user_message=user_message,
+            )
+            response = await llm.ainvoke(
+                [HumanMessage(content=prompt)],
+                config={"tags": ["surfsense:internal", "memory-extraction"]},
+            )
+            text = (
+                response.content
+                if isinstance(response.content, str)
+                else str(response.content)
+            ).strip()
+
+            if text == "NO_UPDATE" or not text:
+                logger.debug("Memory extraction: no update needed (user %s)", uid)
+                return
+
+            save_result = await _save_memory(
+                updated_memory=text,
+                old_memory=old_memory,
+                llm=llm,
+                apply_fn=lambda content: setattr(user, "memory_md", content),
+                commit_fn=session.commit,
+                rollback_fn=session.rollback,
+                label="memory",
+            )
+            logger.info(
+                "Background memory extraction for user %s: %s",
+                uid,
+                save_result.get("status"),
+            )
+    except Exception:
+        logger.exception("Background user memory extraction failed")
--- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
@ -19,7 +19,7 @@ from langgraph.runtime import Runtime
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
+from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT
 from app.db import ChatVisibility, SearchSpace, User, shielded_async_session

 logger = logging.getLogger(__name__)
@ -70,6 +70,15 @@ class MemoryInjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
                        f"{user_memory}\n"
                        f"</user_memory>"
                    )
+                    if chars > MEMORY_SOFT_LIMIT:
+                        memory_blocks.append(
+                            f"<memory_warning>Your personal memory is at "
+                            f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
+                            f"the hard limit. On your next update_memory call, consolidate "
+                            f"by merging duplicates, removing outdated entries, and "
+                            f"shortening descriptions before adding anything new."
+                            f"</memory_warning>"
+                        )

            if self.visibility == ChatVisibility.SEARCH_SPACE:
                team_memory = await self._load_team_memory(session)
@ -80,6 +89,15 @@ class MemoryInjectionMiddleware(AgentMiddleware):  # type: ignore[type-arg]
                        f"{team_memory}\n"
                        f"</team_memory>"
                    )
+                    if chars > MEMORY_SOFT_LIMIT:
+                        memory_blocks.append(
+                            f"<memory_warning>Team memory is at "
+                            f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
+                            f"the hard limit. On your next update_memory call, consolidate "
+                            f"by merging duplicates, removing outdated entries, and "
+                            f"shortening descriptions before adding anything new."
+                            f"</memory_warning>"
+                        )

        if not memory_blocks:
            return None
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -271,8 +271,7 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
    `limit` attributes show your current usage and the maximum allowed size.
  - This is your curated long-term memory — the distilled essence of what you know about
    the user, not raw conversation logs.
-  - You are the sole mechanism for persisting memory — there is no background extraction.
-    Call update_memory when:
+  - Call update_memory when:
    * The user explicitly asks to remember or forget something
    * The user shares durable facts or preferences that will matter in future conversations
  - The user's name is already provided via <user_name> — do not store it in memory.
@ -285,21 +284,18 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
  - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
  - Keep it concise and well under the character limit shown in <user_memory>.
  - You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
-    ## About the user (pinned) — role, background, company
+    ## About the user — role, background, company
    ## Preferences — languages, tools, frameworks, response style
-    ## Instructions (pinned) — standing instructions, things to always/never do
+    ## Instructions — standing instructions, things to always/never do
  - Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
-  - Sections with `(pinned)` in the heading are protected — the system will reject any
-    update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
-  - During consolidation, prioritize keeping: pinned sections > preferences.
+  - During consolidation, prioritize keeping: identity/instructions > preferences.
 """,
        "shared": """
 - update_memory: Update the team's shared memory document for this search space.
  - Your current team memory is already in <team_memory> in your context.  The `chars`
    and `limit` attributes show current usage and the maximum allowed size.
  - This is the team's curated long-term memory — decisions, conventions, key facts.
-  - You are the sole mechanism for persisting team memory — there is no background extraction.
-    Call update_memory when:
+  - Call update_memory when:
    * A team member explicitly asks to remember or forget something
    * The conversation surfaces durable team decisions, conventions, or facts
      that will matter in future conversations
@ -312,14 +308,12 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
  - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
  - Keep it concise and well under the character limit shown in <team_memory>.
  - You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
-    ## Team decisions (pinned) — agreed-upon choices with rationale
-    ## Conventions (pinned) — coding standards, tools, processes, naming patterns
+    ## Team decisions — agreed-upon choices with rationale
+    ## Conventions — coding standards, tools, processes, naming patterns
    ## Key facts — where things are, how things work, team structure
    ## Current priorities — active projects, deadlines, blockers
  - Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
-  - Sections with `(pinned)` in the heading are protected — the system will reject any
-    update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
-  - During consolidation, prioritize keeping: pinned sections > key facts > current priorities.
+  - During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.
 """,
    },
 }
@ -329,21 +323,21 @@ _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
        "private": """
 - <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
  - The user casually shared a durable fact about themselves. Save it:
-    update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n")
+    update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n")
 - User: "Remember that I prefer concise answers over detailed explanations"
  - Durable preference. You see the current <user_memory> and merge:
-    update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
+    update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
 - User: "I actually moved to Tokyo last month"
  - Updated fact, date prefix reflects when recorded:
-    update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
+    update_memory(updated_memory="## About the user\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
 - User: "I'm a freelance photographer working on a nature documentary"
  - Durable background info. Save it under About the user:
-    update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
+    update_memory(updated_memory="## About the user\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
 """,
        "shared": """
 - User: "Let's remember that we decided to do weekly standup meetings on Mondays"
  - Durable team decision:
-    update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
+    update_memory(updated_memory="## Team decisions\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
 - User: "Our office is in downtown Seattle, 5th floor"
  - Durable team fact:
    update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...")
--- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py
+++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py
@ -6,12 +6,10 @@ always sees the current memory in <user_memory> / <team_memory> tags injected
 by MemoryInjectionMiddleware, so it passes the FULL updated document each time.

 Overflow handling:
-  - Soft limit (18K chars): an automatic LLM-driven consolidation is attempted
-    to proactively keep memory lean.  The save always succeeds.
-  - Hard limit (25K chars): save rejected if memory still exceeds this after
-    consolidation.
-  - Pinned sections: headings containing ``(pinned)`` are protected — the system
-    rejects any update that drops them and auto-restores them during consolidation.
+  - Soft limit (18K chars): a warning is returned telling the agent to
+    consolidate on the next update.
+  - Hard limit (25K chars): a forced LLM-driven rewrite compresses the document.
+    If it still exceeds the limit after rewriting, the save is rejected.
  - Diff validation: warns when entire ``##`` sections are dropped or when the
    document shrinks by more than 60%.
 """
@ -35,74 +33,9 @@ logger = logging.getLogger(__name__)
 MEMORY_SOFT_LIMIT = 18_000
 MEMORY_HARD_LIMIT = 25_000

-_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE)
 _SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)


-# ---------------------------------------------------------------------------
-# Pinned-section helpers
-# ---------------------------------------------------------------------------
-
-
-def _extract_pinned_headings(memory: str) -> set[str]:
-    """Return the set of ``## …`` headings that contain ``(pinned)``."""
-    return set(_PINNED_RE.findall(memory))
-
-
-def _extract_section_map(memory: str) -> dict[str, str]:
-    """Split *memory* into ``{heading_text: full_section_content}``."""
-    sections: dict[str, str] = {}
-    parts = _SECTION_HEADING_RE.split(memory)
-    # parts: [preamble, heading1, body1, heading2, body2, …]
-    for i in range(1, len(parts) - 1, 2):
-        heading = parts[i].strip()
-        body = parts[i + 1]
-        sections[heading] = f"## {heading}\n{body}"
-    return sections
-
-
-def _validate_pinned_preserved(old_memory: str | None, new_memory: str) -> str | None:
-    """Return an error message if pinned headings from *old_memory* are missing
-    in *new_memory*, else ``None``."""
-    if not old_memory:
-        return None
-    old_pinned = _extract_pinned_headings(old_memory)
-    if not old_pinned:
-        return None
-    new_pinned = _extract_pinned_headings(new_memory)
-    dropped = old_pinned - new_pinned
-    if dropped:
-        names = ", ".join(sorted(dropped))
-        return (
-            f"Cannot remove pinned sections: {names}. "
-            "These sections are protected and must be preserved. "
-            "Re-include them and call update_memory again."
-        )
-    return None
-
-
-def _restore_missing_pinned(old_memory: str, consolidated: str) -> str:
-    """Prepend any pinned sections from *old_memory* that are absent in
-    *consolidated*."""
-    old_pinned = _extract_pinned_headings(old_memory)
-    if not old_pinned:
-        return consolidated
-    new_pinned = _extract_pinned_headings(consolidated)
-    dropped = old_pinned - new_pinned
-    if not dropped:
-        return consolidated
-
-    old_sections = _extract_section_map(old_memory)
-    restored_parts: list[str] = []
-    for heading in sorted(dropped):
-        raw_heading = heading.removeprefix("## ").strip()
-        if raw_heading in old_sections:
-            restored_parts.append(old_sections[raw_heading].rstrip())
-    if restored_parts:
-        return "\n\n".join(restored_parts) + "\n\n" + consolidated
-    return consolidated
-
-
 # ---------------------------------------------------------------------------
 # Diff validation
 # ---------------------------------------------------------------------------
@ -173,37 +106,35 @@ def _soft_warning(content: str) -> str | None:


 # ---------------------------------------------------------------------------
-# Auto-consolidation via a separate LLM call
+# Forced rewrite when memory exceeds the hard limit
 # ---------------------------------------------------------------------------

-_CONSOLIDATION_PROMPT = """\
+_FORCED_REWRITE_PROMPT = """\
 You are a memory curator. The following memory document exceeds the character \
 limit and must be shortened.

 RULES:
 1. Rewrite the document to be under {target} characters.
-2. Sections whose headings contain "(pinned)" MUST be preserved EXACTLY as-is \
-   — do not modify, shorten, or remove them.
-3. Only consolidate non-pinned content.
-4. Priority for keeping content: pinned sections > identity/instructions > \
-   preferences > current context.
-5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
-6. Each entry must be a single bullet point.
-7. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
-8. Output ONLY the consolidated markdown — no explanations, no wrapping.
+2. Preserve all ## section headings.
+3. Priority for keeping content: identity/instructions > preferences > \
+   current context.
+4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
+5. Each entry must be a single bullet point.
+6. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
+7. Output ONLY the consolidated markdown — no explanations, no wrapping.

 <memory_document>
 {content}
 </memory_document>"""


-async def _auto_consolidate(content: str, llm: Any) -> str | None:
-    """Use a focused LLM call to consolidate *content* under the soft limit.
+async def _forced_rewrite(content: str, llm: Any) -> str | None:
+    """Use a focused LLM call to compress *content* under the hard limit.

-    Returns the consolidated string, or ``None`` if consolidation fails.
+    Returns the rewritten string, or ``None`` if the call fails.
    """
    try:
-        prompt = _CONSOLIDATION_PROMPT.format(target=MEMORY_SOFT_LIMIT, content=content)
+        prompt = _FORCED_REWRITE_PROMPT.format(target=MEMORY_HARD_LIMIT, content=content)
        response = await llm.ainvoke(
            [HumanMessage(content=prompt)],
            config={"tags": ["surfsense:internal"]},
@ -215,7 +146,7 @@ async def _auto_consolidate(content: str, llm: Any) -> str | None:
        )
        return text.strip()
    except Exception:
-        logger.exception("Auto-consolidation LLM call failed")
+        logger.exception("Forced rewrite LLM call failed")
        return None


@ -234,16 +165,17 @@ async def _save_memory(
    rollback_fn,
    label: str,
 ) -> dict[str, Any]:
-    """Validate, optionally auto-consolidate, save, and return a response dict.
+    """Validate, optionally force-rewrite if over the hard limit, save, and
+    return a response dict.

    Parameters
    ----------
    updated_memory : str
        The new document the agent submitted.
    old_memory : str | None
-        The previously persisted document (for diff / pinned checks).
+        The previously persisted document (for diff checks).
    llm : Any | None
-        LLM instance for auto-consolidation (may be ``None``).
+        LLM instance for forced rewrite (may be ``None``).
    apply_fn : callable(str) -> None
        Callback that sets the new memory on the ORM object.
    commit_fn : coroutine
@ -255,21 +187,13 @@ async def _save_memory(
    """
    content = updated_memory

-    # --- pinned-section gate (before any size check) ---
-    pinned_err = _validate_pinned_preserved(old_memory, content)
-    if pinned_err:
-        return {"status": "error", "message": pinned_err}
+    # --- forced rewrite if over the hard limit ---
+    if len(content) > MEMORY_HARD_LIMIT and llm is not None:
+        rewritten = await _forced_rewrite(content, llm)
+        if rewritten is not None and len(rewritten) < len(content):
+            content = rewritten

-    # --- auto-consolidate proactively at the soft limit ---
-    if len(content) > MEMORY_SOFT_LIMIT and llm is not None:
-        consolidated = await _auto_consolidate(content, llm)
-        if consolidated is not None:
-            if old_memory:
-                consolidated = _restore_missing_pinned(old_memory, consolidated)
-            if len(consolidated) < len(content):
-                content = consolidated
-
-    # --- hard-limit gate (reject if still too large after consolidation) ---
+    # --- hard-limit gate (reject if still too large after rewrite) ---
    size_err = _validate_memory_size(content)
    if size_err:
        return size_err
@ -290,7 +214,7 @@ async def _save_memory(
    }

    if content is not updated_memory:
-        resp["notice"] = "Memory was automatically consolidated to fit within limits."
+        resp["notice"] = "Memory was automatically rewritten to fit within limits."

    diff_warnings = _validate_diff(old_memory, content)
    if diff_warnings:
--- a/surfsense_backend/app/routes/memory_routes.py
+++ b/surfsense_backend/app/routes/memory_routes.py
@ -1,13 +1,24 @@
 """Routes for user memory management (personal memory.md)."""

+from __future__ import annotations
+
+import logging
+
 from fastapi import APIRouter, Depends, HTTPException
+from langchain_core.messages import HumanMessage
 from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession

-from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
+from app.agents.new_chat.llm_config import (
+    create_chat_litellm_from_agent_config,
+    load_agent_llm_config_for_search_space,
+)
+from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory
 from app.db import User, get_async_session
 from app.users import current_active_user

+logger = logging.getLogger(__name__)
+
 router = APIRouter()


@ -19,6 +30,33 @@ class MemoryUpdate(BaseModel):
    memory_md: str


+class MemoryEditRequest(BaseModel):
+    query: str
+    search_space_id: int
+
+
+_MEMORY_EDIT_PROMPT = """\
+You are a memory editor. The user wants to modify their memory document. \
+Apply the user's instruction to the existing memory document and output the \
+FULL updated document.
+
+RULES:
+1. If the instruction asks to add something, add it in the appropriate \
+## section with a (YYYY-MM-DD) date prefix using today's date.
+2. If the instruction asks to remove something, remove the matching entry.
+3. If the instruction asks to change something, update the matching entry.
+4. Preserve the existing ## section structure and all other entries.
+5. Output ONLY the updated markdown — no explanations, no wrapping.
+
+<current_memory>
+{current_memory}
+</current_memory>
+
+<user_instruction>
+{instruction}
+</user_instruction>"""
+
+
@router.get("/users/me/memory", response_model=MemoryRead)
 async def get_user_memory(
    user: User = Depends(current_active_user),
@ -44,3 +82,60 @@ async def update_user_memory(
    await session.commit()
    await session.refresh(user, ["memory_md"])
    return MemoryRead(memory_md=user.memory_md or "")
+
+
+@router.post("/users/me/memory/edit", response_model=MemoryRead)
+async def edit_user_memory(
+    body: MemoryEditRequest,
+    user: User = Depends(current_active_user),
+    session: AsyncSession = Depends(get_async_session),
+):
+    """Apply a natural language edit to the user's personal memory via LLM."""
+    agent_config = await load_agent_llm_config_for_search_space(
+        session, body.search_space_id
+    )
+    if not agent_config:
+        raise HTTPException(status_code=500, detail="No LLM configuration available.")
+    llm = create_chat_litellm_from_agent_config(agent_config)
+    if not llm:
+        raise HTTPException(status_code=500, detail="Failed to create LLM instance.")
+
+    await session.refresh(user, ["memory_md"])
+    current_memory = user.memory_md or ""
+
+    prompt = _MEMORY_EDIT_PROMPT.format(
+        current_memory=current_memory or "(empty)",
+        instruction=body.query,
+    )
+    try:
+        response = await llm.ainvoke(
+            [HumanMessage(content=prompt)],
+            config={"tags": ["surfsense:internal", "memory-edit"]},
+        )
+        updated = (
+            response.content
+            if isinstance(response.content, str)
+            else str(response.content)
+        ).strip()
+    except Exception as e:
+        logger.exception("Memory edit LLM call failed: %s", e)
+        raise HTTPException(status_code=500, detail="Memory edit failed.") from e
+
+    if not updated:
+        raise HTTPException(status_code=400, detail="LLM returned empty result.")
+
+    result = await _save_memory(
+        updated_memory=updated,
+        old_memory=current_memory,
+        llm=llm,
+        apply_fn=lambda content: setattr(user, "memory_md", content),
+        commit_fn=session.commit,
+        rollback_fn=session.rollback,
+        label="memory",
+    )
+
+    if result.get("status") == "error":
+        raise HTTPException(status_code=400, detail=result["message"])
+
+    await session.refresh(user, ["memory_md"])
+    return MemoryRead(memory_md=user.memory_md or "")
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@ -1,11 +1,17 @@
 import logging

 from fastapi import APIRouter, Depends, HTTPException
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel as PydanticBaseModel
 from sqlalchemy import func
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select

-from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
+from app.agents.new_chat.llm_config import (
+    create_chat_litellm_from_agent_config,
+    load_agent_llm_config_for_search_space,
+)
+from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory
 from app.config import config
 from app.db import (
    ImageGenerationConfig,
@ -35,6 +41,32 @@ logger = logging.getLogger(__name__)
 router = APIRouter()


+class _TeamMemoryEditRequest(PydanticBaseModel):
+    query: str
+
+
+_TEAM_MEMORY_EDIT_PROMPT = """\
+You are a memory editor for a team workspace. The user wants to modify the \
+team's shared memory document. Apply the user's instruction to the existing \
+memory document and output the FULL updated document.
+
+RULES:
+1. If the instruction asks to add something, add it in the appropriate \
+## section with a (YYYY-MM-DD) date prefix using today's date.
+2. If the instruction asks to remove something, remove the matching entry.
+3. If the instruction asks to change something, update the matching entry.
+4. Preserve the existing ## section structure and all other entries.
+5. Output ONLY the updated markdown — no explanations, no wrapping.
+
+<current_memory>
+{current_memory}
+</current_memory>
+
+<user_instruction>
+{instruction}
+</user_instruction>"""
+
+
 async def create_default_roles_and_membership(
    session: AsyncSession,
    search_space_id: int,
@ -280,6 +312,79 @@ async def update_search_space(
        ) from e


+@router.post(
+    "/searchspaces/{search_space_id}/memory/edit",
+    response_model=SearchSpaceRead,
+)
+async def edit_team_memory(
+    search_space_id: int,
+    body: _TeamMemoryEditRequest,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """Apply a natural language edit to the team memory via LLM."""
+    await check_search_space_access(session, user, search_space_id)
+
+    agent_config = await load_agent_llm_config_for_search_space(
+        session, search_space_id
+    )
+    if not agent_config:
+        raise HTTPException(status_code=500, detail="No LLM configuration available.")
+    llm = create_chat_litellm_from_agent_config(agent_config)
+    if not llm:
+        raise HTTPException(status_code=500, detail="Failed to create LLM instance.")
+
+    result = await session.execute(
+        select(SearchSpace).filter(SearchSpace.id == search_space_id)
+    )
+    db_search_space = result.scalars().first()
+    if not db_search_space:
+        raise HTTPException(status_code=404, detail="Search space not found")
+
+    current_memory = db_search_space.shared_memory_md or ""
+
+    prompt = _TEAM_MEMORY_EDIT_PROMPT.format(
+        current_memory=current_memory or "(empty)",
+        instruction=body.query,
+    )
+    try:
+        response = await llm.ainvoke(
+            [HumanMessage(content=prompt)],
+            config={"tags": ["surfsense:internal", "memory-edit"]},
+        )
+        updated = (
+            response.content
+            if isinstance(response.content, str)
+            else str(response.content)
+        ).strip()
+    except Exception as e:
+        logger.exception("Team memory edit LLM call failed: %s", e)
+        raise HTTPException(
+            status_code=500, detail="Team memory edit failed."
+        ) from e
+
+    if not updated:
+        raise HTTPException(status_code=400, detail="LLM returned empty result.")
+
+    save_result = await _save_memory(
+        updated_memory=updated,
+        old_memory=current_memory,
+        llm=llm,
+        apply_fn=lambda content: setattr(
+            db_search_space, "shared_memory_md", content
+        ),
+        commit_fn=session.commit,
+        rollback_fn=session.rollback,
+        label="team memory",
+    )
+
+    if save_result.get("status") == "error":
+        raise HTTPException(status_code=400, detail=save_result["message"])
+
+    await session.refresh(db_search_space)
+    return db_search_space
+
+
@router.delete("/searchspaces/{search_space_id}", response_model=dict)
 async def delete_search_space(
    search_space_id: int,
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -30,6 +30,7 @@ from sqlalchemy.orm import selectinload

 from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
 from app.agents.new_chat.checkpointer import get_checkpointer
+from app.agents.new_chat.memory_extraction import extract_and_save_memory
 from app.agents.new_chat.llm_config import (
    AgentConfig,
    create_chat_litellm_from_agent_config,
@ -139,6 +140,7 @@ class StreamResult:
    is_interrupted: bool = False
    interrupt_value: dict[str, Any] | None = None
    sandbox_files: list[str] = field(default_factory=list)  # unused, kept for compat
+    agent_called_update_memory: bool = False


 async def _stream_agent_events(
@ -181,6 +183,7 @@ async def _stream_agent_events(
    last_active_step_items: list[str] = initial_step_items or []
    just_finished_tool: bool = False
    active_tool_depth: int = 0  # Track nesting: >0 means we're inside a tool
+    called_update_memory: bool = False

    def next_thinking_step_id() -> str:
        nonlocal thinking_step_counter
@ -488,6 +491,9 @@ async def _stream_agent_events(
            tool_name = event.get("name", "unknown_tool")
            raw_output = event.get("data", {}).get("output", "")

+            if tool_name == "update_memory":
+                called_update_memory = True
+
            if hasattr(raw_output, "content"):
                content = raw_output.content
                if isinstance(content, str):
@ -1109,6 +1115,7 @@ async def _stream_agent_events(
        yield completion_event

    result.accumulated_text = accumulated_text
+    result.agent_called_update_memory = called_update_memory

    state = await agent.aget_state(config)
    is_interrupted = state.tasks and any(task.interrupts for task in state.tasks)
@ -1538,6 +1545,16 @@ async def stream_new_chat(
                    chat_id, generated_title
                )

+        # Fire background memory extraction if the agent didn't handle it
+        if not stream_result.agent_called_update_memory and user_id:
+            asyncio.create_task(
+                extract_and_save_memory(
+                    user_message=user_query,
+                    user_id=user_id,
+                    llm=llm,
+                )
+            )
+
        # Finish the step and message
        yield streaming_service.format_finish_step()
        yield streaming_service.format_finish()
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx
@ -1,13 +1,16 @@
 "use client";

-import { Info } from "lucide-react";
-import { useCallback, useEffect, useState } from "react";
+import { useAtomValue } from "jotai";
+import { Info, Send } from "lucide-react";
+import { useCallback, useEffect, useRef, useState } from "react";
 import { toast } from "sonner";
 import { z } from "zod";
+import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { Alert, AlertDescription } from "@/components/ui/alert";
 import { Button } from "@/components/ui/button";
 import { Spinner } from "@/components/ui/spinner";
+import { Textarea } from "@/components/ui/textarea";
 import { baseApiService } from "@/lib/apis/base-api.service";

 const MEMORY_HARD_LIMIT = 25_000;
@ -17,17 +20,19 @@ const MemoryReadSchema = z.object({
 });

 export function MemoryContent() {
+	const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
 	const [memory, setMemory] = useState("");
-	const [savedMemory, setSavedMemory] = useState("");
 	const [loading, setLoading] = useState(true);
 	const [saving, setSaving] = useState(false);
+	const [editQuery, setEditQuery] = useState("");
+	const [editing, setEditing] = useState(false);
+	const textareaRef = useRef<HTMLTextAreaElement>(null);

 	const fetchMemory = useCallback(async () => {
 		try {
 			setLoading(true);
 			const data = await baseApiService.get("/api/v1/users/me/memory", MemoryReadSchema);
 			setMemory(data.memory_md);
-			setSavedMemory(data.memory_md);
 		} catch {
 			toast.error("Failed to load memory");
 		} finally {
@ -39,21 +44,6 @@ export function MemoryContent() {
 		fetchMemory();
 	}, [fetchMemory]);

-	const handleSave = async () => {
-		try {
-			setSaving(true);
-			const data = await baseApiService.put("/api/v1/users/me/memory", MemoryReadSchema, {
-				body: { memory_md: memory },
-			});
-			setSavedMemory(data.memory_md);
-			toast.success("Memory saved");
-		} catch {
-			toast.error("Failed to save memory");
-		} finally {
-			setSaving(false);
-		}
-	};
-
 	const handleClear = async () => {
 		try {
 			setSaving(true);
@ -61,7 +51,6 @@ export function MemoryContent() {
 				body: { memory_md: "" },
 			});
 			setMemory(data.memory_md);
-			setSavedMemory(data.memory_md);
 			toast.success("Memory cleared");
 		} catch {
 			toast.error("Failed to clear memory");
@ -70,14 +59,33 @@ export function MemoryContent() {
 		}
 	};

-	const handleMarkdownChange = useCallback((md: string) => {
-		const trimmed = md.trim();
-		setMemory(trimmed);
-	}, []);
+	const handleEdit = async () => {
+		const query = editQuery.trim();
+		if (!query) return;
+
+		try {
+			setEditing(true);
+			const data = await baseApiService.post("/api/v1/users/me/memory/edit", MemoryReadSchema, {
+				body: { query, search_space_id: Number(activeSearchSpaceId) },
+			});
+			setMemory(data.memory_md);
+			setEditQuery("");
+			toast.success("Memory updated");
+		} catch {
+			toast.error("Failed to edit memory");
+		} finally {
+			setEditing(false);
+		}
+	};
+
+	const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
+		if (e.key === "Enter" && !e.shiftKey) {
+			e.preventDefault();
+			handleEdit();
+		}
+	};

-	const hasChanges = memory !== savedMemory;
 	const charCount = memory.length;
-	const isOverLimit = charCount > MEMORY_HARD_LIMIT;

 	const getCounterColor = () => {
 		if (charCount > MEMORY_HARD_LIMIT) return "text-red-500";
@ -101,18 +109,16 @@ export function MemoryContent() {
 				<AlertDescription className="text-xs md:text-sm">
 					<p>
 						SurfSense uses this personal memory to personalize your responses across all
-						conversations. Supports <span className="font-medium">Markdown</span> formatting.
+						conversations. Use the input below to add, update, or remove memory entries.
 					</p>
 				</AlertDescription>
 			</Alert>

 			<div className="h-[340px] overflow-y-auto rounded-md border">
 				<PlateEditor
-					markdown={savedMemory}
-					onMarkdownChange={handleMarkdownChange}
-					preset="minimal"
-					defaultEditing
-					placeholder="Add personal context here, such as your preferences, instructions, or facts about you"
+					markdown={memory}
+					readOnly
+					preset="readonly"
 					variant="default"
 					editorVariant="none"
 					className="px-4 py-4 text-xs min-h-full"
@ -123,30 +129,43 @@ export function MemoryContent() {
 				<span className={`text-xs ${getCounterColor()}`}>
 					{charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} characters
 					{charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"}
-					{isOverLimit && " - Exceeds limit"}
+					{charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"}
 				</span>
 			</div>

-			<div className="flex justify-between">
+			<div className="relative">
+				<Textarea
+					ref={textareaRef}
+					value={editQuery}
+					onChange={(e) => setEditQuery(e.target.value)}
+					onKeyDown={handleKeyDown}
+					placeholder="e.g. &quot;I prefer TypeScript over JavaScript&quot; or &quot;Remove the entry about Tokyo&quot;"
+					disabled={editing}
+					rows={2}
+					className="pr-12 resize-none text-sm"
+				/>
+				<Button
+					type="button"
+					size="icon"
+					variant="ghost"
+					onClick={handleEdit}
+					disabled={editing || !editQuery.trim()}
+					className="absolute right-2 bottom-2 h-7 w-7"
+				>
+					{editing ? <Spinner size="sm" /> : <Send className="h-4 w-4" />}
+				</Button>
+			</div>
+
+			<div className="flex justify-start">
 				<Button
 					type="button"
 					variant="destructive"
 					size="sm"
 					onClick={handleClear}
-					disabled={saving || !savedMemory}
+					disabled={saving || editing || !memory}
 				>
 					Reset Memory
 				</Button>
-				<Button
-					type="button"
-					variant="outline"
-					onClick={handleSave}
-					disabled={saving || !hasChanges || isOverLimit}
-					className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200 items-center justify-center"
-				>
-					<span className={saving ? "opacity-0" : ""}>Save</span>
-					{saving && <Spinner size="sm" className="absolute" />}
-				</Button>
 			</div>
 		</div>
 	);
--- a/surfsense_web/components/settings/team-memory-manager.tsx
+++ b/surfsense_web/components/settings/team-memory-manager.tsx
@ -1,25 +1,33 @@
 "use client";

-import { useQuery } from "@tanstack/react-query";
+import { useQuery, useQueryClient } from "@tanstack/react-query";
 import { useAtomValue } from "jotai";
-import { Info } from "lucide-react";
-import { useCallback, useEffect, useState } from "react";
+import { Info, Send } from "lucide-react";
+import { useRef, useState } from "react";
 import { toast } from "sonner";
+import { z } from "zod";
 import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms";
 import { PlateEditor } from "@/components/editor/plate-editor";
 import { Alert, AlertDescription } from "@/components/ui/alert";
 import { Button } from "@/components/ui/button";
 import { Spinner } from "@/components/ui/spinner";
+import { Textarea } from "@/components/ui/textarea";
+import { baseApiService } from "@/lib/apis/base-api.service";
 import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service";
 import { cacheKeys } from "@/lib/query-client/cache-keys";

 const MEMORY_HARD_LIMIT = 25_000;

+const SearchSpaceSchema = z.object({
+	shared_memory_md: z.string().optional().default(""),
+}).passthrough();
+
 interface TeamMemoryManagerProps {
 	searchSpaceId: number;
 }

 export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
+	const queryClient = useQueryClient();
 	const { data: searchSpace, isLoading: loading } = useQuery({
 		queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()),
 		queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }),
@ -28,36 +36,12 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {

 	const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom);

-	const [memory, setMemory] = useState("");
 	const [saving, setSaving] = useState(false);
+	const [editQuery, setEditQuery] = useState("");
+	const [editing, setEditing] = useState(false);
+	const textareaRef = useRef<HTMLTextAreaElement>(null);

-	useEffect(() => {
-		if (searchSpace) {
-			setMemory(searchSpace.shared_memory_md || "");
-		}
-	}, [searchSpace?.shared_memory_md]);
-
-	const handleMarkdownChange = useCallback((md: string) => {
-		const trimmed = md.trim();
-		setMemory(trimmed);
-	}, []);
-
-	const hasChanges = !!searchSpace && (searchSpace.shared_memory_md || "") !== memory;
-
-	const handleSave = async () => {
-		try {
-			setSaving(true);
-			await updateSearchSpace({
-				id: searchSpaceId,
-				data: { shared_memory_md: memory },
-			});
-			toast.success("Team memory saved");
-		} catch {
-			toast.error("Failed to save team memory");
-		} finally {
-			setSaving(false);
-		}
-	};
+	const memory = searchSpace?.shared_memory_md || "";

 	const handleClear = async () => {
 		try {
@ -66,7 +50,6 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
 				id: searchSpaceId,
 				data: { shared_memory_md: "" },
 			});
-			setMemory("");
 			toast.success("Team memory cleared");
 		} catch {
 			toast.error("Failed to clear team memory");
@ -75,8 +58,37 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
 		}
 	};

+	const handleEdit = async () => {
+		const query = editQuery.trim();
+		if (!query) return;
+
+		try {
+			setEditing(true);
+			await baseApiService.post(
+				`/api/v1/searchspaces/${searchSpaceId}/memory/edit`,
+				SearchSpaceSchema,
+				{ body: { query } },
+			);
+			setEditQuery("");
+			await queryClient.invalidateQueries({
+				queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()),
+			});
+			toast.success("Team memory updated");
+		} catch {
+			toast.error("Failed to edit team memory");
+		} finally {
+			setEditing(false);
+		}
+	};
+
+	const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
+		if (e.key === "Enter" && !e.shiftKey) {
+			e.preventDefault();
+			handleEdit();
+		}
+	};
+
 	const charCount = memory.length;
-	const isOverLimit = charCount > MEMORY_HARD_LIMIT;

 	const getCounterColor = () => {
 		if (charCount > MEMORY_HARD_LIMIT) return "text-red-500";
@ -100,18 +112,16 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
 				<AlertDescription className="text-xs md:text-sm">
 					<p>
 						SurfSense uses this shared memory to provide team-wide context across all conversations
-						in this search space. Supports <span className="font-medium">Markdown</span> formatting.
+						in this search space. Use the input below to add, update, or remove memory entries.
 					</p>
 				</AlertDescription>
 			</Alert>

 			<div className="h-[340px] overflow-y-auto rounded-md border">
 				<PlateEditor
-					markdown={searchSpace?.shared_memory_md || ""}
-					onMarkdownChange={handleMarkdownChange}
-					preset="minimal"
-					defaultEditing
-					placeholder="Add team context here, such as decisions, conventions, key facts, or current priorities"
+					markdown={memory}
+					readOnly
+					preset="readonly"
 					variant="default"
 					editorVariant="none"
 					className="px-4 py-4 text-xs min-h-full"
@ -122,30 +132,43 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
 				<span className={`text-xs ${getCounterColor()}`}>
 					{charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} characters
 					{charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"}
-					{isOverLimit && " - Exceeds limit"}
+					{charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"}
 				</span>
 			</div>

-			<div className="flex justify-between">
+			<div className="relative">
+				<Textarea
+					ref={textareaRef}
+					value={editQuery}
+					onChange={(e) => setEditQuery(e.target.value)}
+					onKeyDown={handleKeyDown}
+					placeholder="e.g. &quot;We decided to use PostgreSQL&quot; or &quot;Remove the standup entry&quot;"
+					disabled={editing}
+					rows={2}
+					className="pr-12 resize-none text-sm"
+				/>
+				<Button
+					type="button"
+					size="icon"
+					variant="ghost"
+					onClick={handleEdit}
+					disabled={editing || !editQuery.trim()}
+					className="absolute right-2 bottom-2 h-7 w-7"
+				>
+					{editing ? <Spinner size="sm" /> : <Send className="h-4 w-4" />}
+				</Button>
+			</div>
+
+			<div className="flex justify-start">
 				<Button
 					type="button"
 					variant="destructive"
 					size="sm"
 					onClick={handleClear}
-					disabled={saving || !searchSpace?.shared_memory_md}
+					disabled={saving || editing || !memory}
 				>
 					Clear Memory
 				</Button>
-				<Button
-					type="button"
-					variant="outline"
-					onClick={handleSave}
-					disabled={saving || !hasChanges || isOverLimit}
-					className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200 items-center justify-center"
-				>
-					<span className={saving ? "opacity-0" : ""}>Save</span>
-					{saving && <Spinner size="sm" className="absolute" />}
-				</Button>
 			</div>
 		</div>
 	);