feat: implement background memory extraction and editing capabilities for user and team memory management, enhancing long-term memory persistence and user interaction

This commit is contained in:
Anish Sarkar 2026-04-10 00:21:55 +05:30
parent cd72fa9a48
commit 84fc72e596
9 changed files with 534 additions and 224 deletions

View file

@ -0,0 +1,115 @@
"""Background memory extraction for the SurfSense agent.
After each agent response, if the agent did not call ``update_memory`` during
the turn, this module runs a lightweight LLM call to decide whether the user's
message contains any long-term information worth persisting.
Only user (personal) memory is handled here team memory relies on explicit
agent calls.
"""
from __future__ import annotations
import logging
from typing import Any
from uuid import UUID
from langchain_core.messages import HumanMessage
from sqlalchemy import select
from app.agents.new_chat.tools.update_memory import _save_memory
from app.db import User, shielded_async_session
logger = logging.getLogger(__name__)
_MEMORY_EXTRACT_PROMPT = """\
You are a memory extraction assistant. Analyze the user's message and decide \
if it contains any long-term information worth persisting to memory.
Worth remembering: preferences, background/identity, goals, projects, \
instructions, tools/languages they use, decisions, expertise, workplace \
durable facts that will matter in future conversations.
NOT worth remembering: greetings, one-off factual questions, session \
logistics, ephemeral requests, follow-up clarifications with no new personal \
info, things that only matter for the current task.
If the message contains memorizable information, output the FULL updated \
memory document with the new facts merged into the existing content. Follow \
these rules:
- Use the same ## section structure as the existing memory.
- Keep entries as single concise bullet points (under 120 chars each).
- Every bullet MUST start with a (YYYY-MM-DD) date prefix.
- If a new fact contradicts an existing entry, update the existing entry.
- Do not duplicate information that is already present.
- Standard sections: \
"## About the user", "## Preferences", "## Instructions"
If nothing is worth remembering, output exactly: NO_UPDATE
<current_memory>
{current_memory}
</current_memory>
<user_message>
{user_message}
</user_message>"""
async def extract_and_save_memory(
*,
user_message: str,
user_id: str | None,
llm: Any,
) -> None:
"""Background task: extract memorizable info and persist it.
Designed to be fire-and-forget catches all exceptions internally.
"""
if not user_id:
return
try:
uid = UUID(user_id) if isinstance(user_id, str) else user_id
async with shielded_async_session() as session:
result = await session.execute(select(User).where(User.id == uid))
user = result.scalars().first()
if not user:
return
old_memory = user.memory_md
prompt = _MEMORY_EXTRACT_PROMPT.format(
current_memory=old_memory or "(empty)",
user_message=user_message,
)
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal", "memory-extraction"]},
)
text = (
response.content
if isinstance(response.content, str)
else str(response.content)
).strip()
if text == "NO_UPDATE" or not text:
logger.debug("Memory extraction: no update needed (user %s)", uid)
return
save_result = await _save_memory(
updated_memory=text,
old_memory=old_memory,
llm=llm,
apply_fn=lambda content: setattr(user, "memory_md", content),
commit_fn=session.commit,
rollback_fn=session.rollback,
label="memory",
)
logger.info(
"Background memory extraction for user %s: %s",
uid,
save_result.get("status"),
)
except Exception:
logger.exception("Background user memory extraction failed")

View file

@ -19,7 +19,7 @@ from langgraph.runtime import Runtime
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT
from app.db import ChatVisibility, SearchSpace, User, shielded_async_session
logger = logging.getLogger(__name__)
@ -70,6 +70,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
f"{user_memory}\n"
f"</user_memory>"
)
if chars > MEMORY_SOFT_LIMIT:
memory_blocks.append(
f"<memory_warning>Your personal memory is at "
f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
f"the hard limit. On your next update_memory call, consolidate "
f"by merging duplicates, removing outdated entries, and "
f"shortening descriptions before adding anything new."
f"</memory_warning>"
)
if self.visibility == ChatVisibility.SEARCH_SPACE:
team_memory = await self._load_team_memory(session)
@ -80,6 +89,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
f"{team_memory}\n"
f"</team_memory>"
)
if chars > MEMORY_SOFT_LIMIT:
memory_blocks.append(
f"<memory_warning>Team memory is at "
f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching "
f"the hard limit. On your next update_memory call, consolidate "
f"by merging duplicates, removing outdated entries, and "
f"shortening descriptions before adding anything new."
f"</memory_warning>"
)
if not memory_blocks:
return None

View file

@ -271,8 +271,7 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
`limit` attributes show your current usage and the maximum allowed size.
- This is your curated long-term memory the distilled essence of what you know about
the user, not raw conversation logs.
- You are the sole mechanism for persisting memory there is no background extraction.
Call update_memory when:
- Call update_memory when:
* The user explicitly asks to remember or forget something
* The user shares durable facts or preferences that will matter in future conversations
- The user's name is already provided via <user_name> — do not store it in memory.
@ -285,21 +284,18 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in <user_memory>.
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
## About the user (pinned) — role, background, company
## About the user — role, background, company
## Preferences — languages, tools, frameworks, response style
## Instructions (pinned) — standing instructions, things to always/never do
## Instructions — standing instructions, things to always/never do
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- Sections with `(pinned)` in the heading are protected the system will reject any
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
- During consolidation, prioritize keeping: pinned sections > preferences.
- During consolidation, prioritize keeping: identity/instructions > preferences.
""",
"shared": """
- update_memory: Update the team's shared memory document for this search space.
- Your current team memory is already in <team_memory> in your context. The `chars`
and `limit` attributes show current usage and the maximum allowed size.
- This is the team's curated long-term memory — decisions, conventions, key facts.
- You are the sole mechanism for persisting team memory there is no background extraction.
Call update_memory when:
- Call update_memory when:
* A team member explicitly asks to remember or forget something
* The conversation surfaces durable team decisions, conventions, or facts
that will matter in future conversations
@ -312,14 +308,12 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Keep it concise and well under the character limit shown in <team_memory>.
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
## Team decisions (pinned) — agreed-upon choices with rationale
## Conventions (pinned) — coding standards, tools, processes, naming patterns
## Team decisions — agreed-upon choices with rationale
## Conventions — coding standards, tools, processes, naming patterns
## Key facts — where things are, how things work, team structure
## Current priorities — active projects, deadlines, blockers
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- Sections with `(pinned)` in the heading are protected the system will reject any
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
- During consolidation, prioritize keeping: pinned sections > key facts > current priorities.
- During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.
""",
},
}
@ -329,21 +323,21 @@ _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
"private": """
- <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
- The user casually shared a durable fact about themselves. Save it:
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n")
update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n")
- User: "Remember that I prefer concise answers over detailed explanations"
- Durable preference. You see the current <user_memory> and merge:
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
- User: "I actually moved to Tokyo last month"
- Updated fact, date prefix reflects when recorded:
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
update_memory(updated_memory="## About the user\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
- User: "I'm a freelance photographer working on a nature documentary"
- Durable background info. Save it under About the user:
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
update_memory(updated_memory="## About the user\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
""",
"shared": """
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
- Durable team decision:
update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
update_memory(updated_memory="## Team decisions\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
- User: "Our office is in downtown Seattle, 5th floor"
- Durable team fact:
update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...")

View file

@ -6,12 +6,10 @@ always sees the current memory in <user_memory> / <team_memory> tags injected
by MemoryInjectionMiddleware, so it passes the FULL updated document each time.
Overflow handling:
- Soft limit (18K chars): an automatic LLM-driven consolidation is attempted
to proactively keep memory lean. The save always succeeds.
- Hard limit (25K chars): save rejected if memory still exceeds this after
consolidation.
- Pinned sections: headings containing ``(pinned)`` are protected the system
rejects any update that drops them and auto-restores them during consolidation.
- Soft limit (18K chars): a warning is returned telling the agent to
consolidate on the next update.
- Hard limit (25K chars): a forced LLM-driven rewrite compresses the document.
If it still exceeds the limit after rewriting, the save is rejected.
- Diff validation: warns when entire ``##`` sections are dropped or when the
document shrinks by more than 60%.
"""
@ -35,74 +33,9 @@ logger = logging.getLogger(__name__)
MEMORY_SOFT_LIMIT = 18_000
MEMORY_HARD_LIMIT = 25_000
_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE)
_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)
# ---------------------------------------------------------------------------
# Pinned-section helpers
# ---------------------------------------------------------------------------
def _extract_pinned_headings(memory: str) -> set[str]:
"""Return the set of ``## …`` headings that contain ``(pinned)``."""
return set(_PINNED_RE.findall(memory))
def _extract_section_map(memory: str) -> dict[str, str]:
"""Split *memory* into ``{heading_text: full_section_content}``."""
sections: dict[str, str] = {}
parts = _SECTION_HEADING_RE.split(memory)
# parts: [preamble, heading1, body1, heading2, body2, …]
for i in range(1, len(parts) - 1, 2):
heading = parts[i].strip()
body = parts[i + 1]
sections[heading] = f"## {heading}\n{body}"
return sections
def _validate_pinned_preserved(old_memory: str | None, new_memory: str) -> str | None:
"""Return an error message if pinned headings from *old_memory* are missing
in *new_memory*, else ``None``."""
if not old_memory:
return None
old_pinned = _extract_pinned_headings(old_memory)
if not old_pinned:
return None
new_pinned = _extract_pinned_headings(new_memory)
dropped = old_pinned - new_pinned
if dropped:
names = ", ".join(sorted(dropped))
return (
f"Cannot remove pinned sections: {names}. "
"These sections are protected and must be preserved. "
"Re-include them and call update_memory again."
)
return None
def _restore_missing_pinned(old_memory: str, consolidated: str) -> str:
"""Prepend any pinned sections from *old_memory* that are absent in
*consolidated*."""
old_pinned = _extract_pinned_headings(old_memory)
if not old_pinned:
return consolidated
new_pinned = _extract_pinned_headings(consolidated)
dropped = old_pinned - new_pinned
if not dropped:
return consolidated
old_sections = _extract_section_map(old_memory)
restored_parts: list[str] = []
for heading in sorted(dropped):
raw_heading = heading.removeprefix("## ").strip()
if raw_heading in old_sections:
restored_parts.append(old_sections[raw_heading].rstrip())
if restored_parts:
return "\n\n".join(restored_parts) + "\n\n" + consolidated
return consolidated
# ---------------------------------------------------------------------------
# Diff validation
# ---------------------------------------------------------------------------
@ -173,37 +106,35 @@ def _soft_warning(content: str) -> str | None:
# ---------------------------------------------------------------------------
# Auto-consolidation via a separate LLM call
# Forced rewrite when memory exceeds the hard limit
# ---------------------------------------------------------------------------
_CONSOLIDATION_PROMPT = """\
_FORCED_REWRITE_PROMPT = """\
You are a memory curator. The following memory document exceeds the character \
limit and must be shortened.
RULES:
1. Rewrite the document to be under {target} characters.
2. Sections whose headings contain "(pinned)" MUST be preserved EXACTLY as-is \
do not modify, shorten, or remove them.
3. Only consolidate non-pinned content.
4. Priority for keeping content: pinned sections > identity/instructions > \
preferences > current context.
5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
6. Each entry must be a single bullet point.
7. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
8. Output ONLY the consolidated markdown no explanations, no wrapping.
2. Preserve all ## section headings.
3. Priority for keeping content: identity/instructions > preferences > \
current context.
4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
5. Each entry must be a single bullet point.
6. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
7. Output ONLY the consolidated markdown no explanations, no wrapping.
<memory_document>
{content}
</memory_document>"""
async def _auto_consolidate(content: str, llm: Any) -> str | None:
"""Use a focused LLM call to consolidate *content* under the soft limit.
async def _forced_rewrite(content: str, llm: Any) -> str | None:
"""Use a focused LLM call to compress *content* under the hard limit.
Returns the consolidated string, or ``None`` if consolidation fails.
Returns the rewritten string, or ``None`` if the call fails.
"""
try:
prompt = _CONSOLIDATION_PROMPT.format(target=MEMORY_SOFT_LIMIT, content=content)
prompt = _FORCED_REWRITE_PROMPT.format(target=MEMORY_HARD_LIMIT, content=content)
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal"]},
@ -215,7 +146,7 @@ async def _auto_consolidate(content: str, llm: Any) -> str | None:
)
return text.strip()
except Exception:
logger.exception("Auto-consolidation LLM call failed")
logger.exception("Forced rewrite LLM call failed")
return None
@ -234,16 +165,17 @@ async def _save_memory(
rollback_fn,
label: str,
) -> dict[str, Any]:
"""Validate, optionally auto-consolidate, save, and return a response dict.
"""Validate, optionally force-rewrite if over the hard limit, save, and
return a response dict.
Parameters
----------
updated_memory : str
The new document the agent submitted.
old_memory : str | None
The previously persisted document (for diff / pinned checks).
The previously persisted document (for diff checks).
llm : Any | None
LLM instance for auto-consolidation (may be ``None``).
LLM instance for forced rewrite (may be ``None``).
apply_fn : callable(str) -> None
Callback that sets the new memory on the ORM object.
commit_fn : coroutine
@ -255,21 +187,13 @@ async def _save_memory(
"""
content = updated_memory
# --- pinned-section gate (before any size check) ---
pinned_err = _validate_pinned_preserved(old_memory, content)
if pinned_err:
return {"status": "error", "message": pinned_err}
# --- forced rewrite if over the hard limit ---
if len(content) > MEMORY_HARD_LIMIT and llm is not None:
rewritten = await _forced_rewrite(content, llm)
if rewritten is not None and len(rewritten) < len(content):
content = rewritten
# --- auto-consolidate proactively at the soft limit ---
if len(content) > MEMORY_SOFT_LIMIT and llm is not None:
consolidated = await _auto_consolidate(content, llm)
if consolidated is not None:
if old_memory:
consolidated = _restore_missing_pinned(old_memory, consolidated)
if len(consolidated) < len(content):
content = consolidated
# --- hard-limit gate (reject if still too large after consolidation) ---
# --- hard-limit gate (reject if still too large after rewrite) ---
size_err = _validate_memory_size(content)
if size_err:
return size_err
@ -290,7 +214,7 @@ async def _save_memory(
}
if content is not updated_memory:
resp["notice"] = "Memory was automatically consolidated to fit within limits."
resp["notice"] = "Memory was automatically rewritten to fit within limits."
diff_warnings = _validate_diff(old_memory, content)
if diff_warnings:

View file

@ -1,13 +1,24 @@
"""Routes for user memory management (personal memory.md)."""
from __future__ import annotations
import logging
from fastapi import APIRouter, Depends, HTTPException
from langchain_core.messages import HumanMessage
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
from app.agents.new_chat.llm_config import (
create_chat_litellm_from_agent_config,
load_agent_llm_config_for_search_space,
)
from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory
from app.db import User, get_async_session
from app.users import current_active_user
logger = logging.getLogger(__name__)
router = APIRouter()
@ -19,6 +30,33 @@ class MemoryUpdate(BaseModel):
memory_md: str
class MemoryEditRequest(BaseModel):
query: str
search_space_id: int
_MEMORY_EDIT_PROMPT = """\
You are a memory editor. The user wants to modify their memory document. \
Apply the user's instruction to the existing memory document and output the \
FULL updated document.
RULES:
1. If the instruction asks to add something, add it in the appropriate \
## section with a (YYYY-MM-DD) date prefix using today's date.
2. If the instruction asks to remove something, remove the matching entry.
3. If the instruction asks to change something, update the matching entry.
4. Preserve the existing ## section structure and all other entries.
5. Output ONLY the updated markdown no explanations, no wrapping.
<current_memory>
{current_memory}
</current_memory>
<user_instruction>
{instruction}
</user_instruction>"""
@router.get("/users/me/memory", response_model=MemoryRead)
async def get_user_memory(
user: User = Depends(current_active_user),
@ -44,3 +82,60 @@ async def update_user_memory(
await session.commit()
await session.refresh(user, ["memory_md"])
return MemoryRead(memory_md=user.memory_md or "")
@router.post("/users/me/memory/edit", response_model=MemoryRead)
async def edit_user_memory(
body: MemoryEditRequest,
user: User = Depends(current_active_user),
session: AsyncSession = Depends(get_async_session),
):
"""Apply a natural language edit to the user's personal memory via LLM."""
agent_config = await load_agent_llm_config_for_search_space(
session, body.search_space_id
)
if not agent_config:
raise HTTPException(status_code=500, detail="No LLM configuration available.")
llm = create_chat_litellm_from_agent_config(agent_config)
if not llm:
raise HTTPException(status_code=500, detail="Failed to create LLM instance.")
await session.refresh(user, ["memory_md"])
current_memory = user.memory_md or ""
prompt = _MEMORY_EDIT_PROMPT.format(
current_memory=current_memory or "(empty)",
instruction=body.query,
)
try:
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal", "memory-edit"]},
)
updated = (
response.content
if isinstance(response.content, str)
else str(response.content)
).strip()
except Exception as e:
logger.exception("Memory edit LLM call failed: %s", e)
raise HTTPException(status_code=500, detail="Memory edit failed.") from e
if not updated:
raise HTTPException(status_code=400, detail="LLM returned empty result.")
result = await _save_memory(
updated_memory=updated,
old_memory=current_memory,
llm=llm,
apply_fn=lambda content: setattr(user, "memory_md", content),
commit_fn=session.commit,
rollback_fn=session.rollback,
label="memory",
)
if result.get("status") == "error":
raise HTTPException(status_code=400, detail=result["message"])
await session.refresh(user, ["memory_md"])
return MemoryRead(memory_md=user.memory_md or "")

View file

@ -1,11 +1,17 @@
import logging
from fastapi import APIRouter, Depends, HTTPException
from langchain_core.messages import HumanMessage
from pydantic import BaseModel as PydanticBaseModel
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT
from app.agents.new_chat.llm_config import (
create_chat_litellm_from_agent_config,
load_agent_llm_config_for_search_space,
)
from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory
from app.config import config
from app.db import (
ImageGenerationConfig,
@ -35,6 +41,32 @@ logger = logging.getLogger(__name__)
router = APIRouter()
class _TeamMemoryEditRequest(PydanticBaseModel):
query: str
_TEAM_MEMORY_EDIT_PROMPT = """\
You are a memory editor for a team workspace. The user wants to modify the \
team's shared memory document. Apply the user's instruction to the existing \
memory document and output the FULL updated document.
RULES:
1. If the instruction asks to add something, add it in the appropriate \
## section with a (YYYY-MM-DD) date prefix using today's date.
2. If the instruction asks to remove something, remove the matching entry.
3. If the instruction asks to change something, update the matching entry.
4. Preserve the existing ## section structure and all other entries.
5. Output ONLY the updated markdown no explanations, no wrapping.
<current_memory>
{current_memory}
</current_memory>
<user_instruction>
{instruction}
</user_instruction>"""
async def create_default_roles_and_membership(
session: AsyncSession,
search_space_id: int,
@ -280,6 +312,79 @@ async def update_search_space(
) from e
@router.post(
"/searchspaces/{search_space_id}/memory/edit",
response_model=SearchSpaceRead,
)
async def edit_team_memory(
search_space_id: int,
body: _TeamMemoryEditRequest,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""Apply a natural language edit to the team memory via LLM."""
await check_search_space_access(session, user, search_space_id)
agent_config = await load_agent_llm_config_for_search_space(
session, search_space_id
)
if not agent_config:
raise HTTPException(status_code=500, detail="No LLM configuration available.")
llm = create_chat_litellm_from_agent_config(agent_config)
if not llm:
raise HTTPException(status_code=500, detail="Failed to create LLM instance.")
result = await session.execute(
select(SearchSpace).filter(SearchSpace.id == search_space_id)
)
db_search_space = result.scalars().first()
if not db_search_space:
raise HTTPException(status_code=404, detail="Search space not found")
current_memory = db_search_space.shared_memory_md or ""
prompt = _TEAM_MEMORY_EDIT_PROMPT.format(
current_memory=current_memory or "(empty)",
instruction=body.query,
)
try:
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal", "memory-edit"]},
)
updated = (
response.content
if isinstance(response.content, str)
else str(response.content)
).strip()
except Exception as e:
logger.exception("Team memory edit LLM call failed: %s", e)
raise HTTPException(
status_code=500, detail="Team memory edit failed."
) from e
if not updated:
raise HTTPException(status_code=400, detail="LLM returned empty result.")
save_result = await _save_memory(
updated_memory=updated,
old_memory=current_memory,
llm=llm,
apply_fn=lambda content: setattr(
db_search_space, "shared_memory_md", content
),
commit_fn=session.commit,
rollback_fn=session.rollback,
label="team memory",
)
if save_result.get("status") == "error":
raise HTTPException(status_code=400, detail=save_result["message"])
await session.refresh(db_search_space)
return db_search_space
@router.delete("/searchspaces/{search_space_id}", response_model=dict)
async def delete_search_space(
search_space_id: int,

View file

@ -30,6 +30,7 @@ from sqlalchemy.orm import selectinload
from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
from app.agents.new_chat.checkpointer import get_checkpointer
from app.agents.new_chat.memory_extraction import extract_and_save_memory
from app.agents.new_chat.llm_config import (
AgentConfig,
create_chat_litellm_from_agent_config,
@ -139,6 +140,7 @@ class StreamResult:
is_interrupted: bool = False
interrupt_value: dict[str, Any] | None = None
sandbox_files: list[str] = field(default_factory=list) # unused, kept for compat
agent_called_update_memory: bool = False
async def _stream_agent_events(
@ -181,6 +183,7 @@ async def _stream_agent_events(
last_active_step_items: list[str] = initial_step_items or []
just_finished_tool: bool = False
active_tool_depth: int = 0 # Track nesting: >0 means we're inside a tool
called_update_memory: bool = False
def next_thinking_step_id() -> str:
nonlocal thinking_step_counter
@ -488,6 +491,9 @@ async def _stream_agent_events(
tool_name = event.get("name", "unknown_tool")
raw_output = event.get("data", {}).get("output", "")
if tool_name == "update_memory":
called_update_memory = True
if hasattr(raw_output, "content"):
content = raw_output.content
if isinstance(content, str):
@ -1109,6 +1115,7 @@ async def _stream_agent_events(
yield completion_event
result.accumulated_text = accumulated_text
result.agent_called_update_memory = called_update_memory
state = await agent.aget_state(config)
is_interrupted = state.tasks and any(task.interrupts for task in state.tasks)
@ -1538,6 +1545,16 @@ async def stream_new_chat(
chat_id, generated_title
)
# Fire background memory extraction if the agent didn't handle it
if not stream_result.agent_called_update_memory and user_id:
asyncio.create_task(
extract_and_save_memory(
user_message=user_query,
user_id=user_id,
llm=llm,
)
)
# Finish the step and message
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()

View file

@ -1,13 +1,16 @@
"use client";
import { Info } from "lucide-react";
import { useCallback, useEffect, useState } from "react";
import { useAtomValue } from "jotai";
import { Info, Send } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { z } from "zod";
import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms";
import { PlateEditor } from "@/components/editor/plate-editor";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import { Spinner } from "@/components/ui/spinner";
import { Textarea } from "@/components/ui/textarea";
import { baseApiService } from "@/lib/apis/base-api.service";
const MEMORY_HARD_LIMIT = 25_000;
@ -17,17 +20,19 @@ const MemoryReadSchema = z.object({
});
export function MemoryContent() {
const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom);
const [memory, setMemory] = useState("");
const [savedMemory, setSavedMemory] = useState("");
const [loading, setLoading] = useState(true);
const [saving, setSaving] = useState(false);
const [editQuery, setEditQuery] = useState("");
const [editing, setEditing] = useState(false);
const textareaRef = useRef<HTMLTextAreaElement>(null);
const fetchMemory = useCallback(async () => {
try {
setLoading(true);
const data = await baseApiService.get("/api/v1/users/me/memory", MemoryReadSchema);
setMemory(data.memory_md);
setSavedMemory(data.memory_md);
} catch {
toast.error("Failed to load memory");
} finally {
@ -39,21 +44,6 @@ export function MemoryContent() {
fetchMemory();
}, [fetchMemory]);
const handleSave = async () => {
try {
setSaving(true);
const data = await baseApiService.put("/api/v1/users/me/memory", MemoryReadSchema, {
body: { memory_md: memory },
});
setSavedMemory(data.memory_md);
toast.success("Memory saved");
} catch {
toast.error("Failed to save memory");
} finally {
setSaving(false);
}
};
const handleClear = async () => {
try {
setSaving(true);
@ -61,7 +51,6 @@ export function MemoryContent() {
body: { memory_md: "" },
});
setMemory(data.memory_md);
setSavedMemory(data.memory_md);
toast.success("Memory cleared");
} catch {
toast.error("Failed to clear memory");
@ -70,14 +59,33 @@ export function MemoryContent() {
}
};
const handleMarkdownChange = useCallback((md: string) => {
const trimmed = md.trim();
setMemory(trimmed);
}, []);
const handleEdit = async () => {
const query = editQuery.trim();
if (!query) return;
try {
setEditing(true);
const data = await baseApiService.post("/api/v1/users/me/memory/edit", MemoryReadSchema, {
body: { query, search_space_id: Number(activeSearchSpaceId) },
});
setMemory(data.memory_md);
setEditQuery("");
toast.success("Memory updated");
} catch {
toast.error("Failed to edit memory");
} finally {
setEditing(false);
}
};
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleEdit();
}
};
const hasChanges = memory !== savedMemory;
const charCount = memory.length;
const isOverLimit = charCount > MEMORY_HARD_LIMIT;
const getCounterColor = () => {
if (charCount > MEMORY_HARD_LIMIT) return "text-red-500";
@ -101,18 +109,16 @@ export function MemoryContent() {
<AlertDescription className="text-xs md:text-sm">
<p>
SurfSense uses this personal memory to personalize your responses across all
conversations. Supports <span className="font-medium">Markdown</span> formatting.
conversations. Use the input below to add, update, or remove memory entries.
</p>
</AlertDescription>
</Alert>
<div className="h-[340px] overflow-y-auto rounded-md border">
<PlateEditor
markdown={savedMemory}
onMarkdownChange={handleMarkdownChange}
preset="minimal"
defaultEditing
placeholder="Add personal context here, such as your preferences, instructions, or facts about you"
markdown={memory}
readOnly
preset="readonly"
variant="default"
editorVariant="none"
className="px-4 py-4 text-xs min-h-full"
@ -123,30 +129,43 @@ export function MemoryContent() {
<span className={`text-xs ${getCounterColor()}`}>
{charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} characters
{charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"}
{isOverLimit && " - Exceeds limit"}
{charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"}
</span>
</div>
<div className="flex justify-between">
<div className="relative">
<Textarea
ref={textareaRef}
value={editQuery}
onChange={(e) => setEditQuery(e.target.value)}
onKeyDown={handleKeyDown}
placeholder="e.g. &quot;I prefer TypeScript over JavaScript&quot; or &quot;Remove the entry about Tokyo&quot;"
disabled={editing}
rows={2}
className="pr-12 resize-none text-sm"
/>
<Button
type="button"
size="icon"
variant="ghost"
onClick={handleEdit}
disabled={editing || !editQuery.trim()}
className="absolute right-2 bottom-2 h-7 w-7"
>
{editing ? <Spinner size="sm" /> : <Send className="h-4 w-4" />}
</Button>
</div>
<div className="flex justify-start">
<Button
type="button"
variant="destructive"
size="sm"
onClick={handleClear}
disabled={saving || !savedMemory}
disabled={saving || editing || !memory}
>
Reset Memory
</Button>
<Button
type="button"
variant="outline"
onClick={handleSave}
disabled={saving || !hasChanges || isOverLimit}
className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200 items-center justify-center"
>
<span className={saving ? "opacity-0" : ""}>Save</span>
{saving && <Spinner size="sm" className="absolute" />}
</Button>
</div>
</div>
);

View file

@ -1,25 +1,33 @@
"use client";
import { useQuery } from "@tanstack/react-query";
import { useQuery, useQueryClient } from "@tanstack/react-query";
import { useAtomValue } from "jotai";
import { Info } from "lucide-react";
import { useCallback, useEffect, useState } from "react";
import { Info, Send } from "lucide-react";
import { useRef, useState } from "react";
import { toast } from "sonner";
import { z } from "zod";
import { updateSearchSpaceMutationAtom } from "@/atoms/search-spaces/search-space-mutation.atoms";
import { PlateEditor } from "@/components/editor/plate-editor";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Button } from "@/components/ui/button";
import { Spinner } from "@/components/ui/spinner";
import { Textarea } from "@/components/ui/textarea";
import { baseApiService } from "@/lib/apis/base-api.service";
import { searchSpacesApiService } from "@/lib/apis/search-spaces-api.service";
import { cacheKeys } from "@/lib/query-client/cache-keys";
const MEMORY_HARD_LIMIT = 25_000;
const SearchSpaceSchema = z.object({
shared_memory_md: z.string().optional().default(""),
}).passthrough();
interface TeamMemoryManagerProps {
searchSpaceId: number;
}
export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
const queryClient = useQueryClient();
const { data: searchSpace, isLoading: loading } = useQuery({
queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()),
queryFn: () => searchSpacesApiService.getSearchSpace({ id: searchSpaceId }),
@ -28,36 +36,12 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
const { mutateAsync: updateSearchSpace } = useAtomValue(updateSearchSpaceMutationAtom);
const [memory, setMemory] = useState("");
const [saving, setSaving] = useState(false);
const [editQuery, setEditQuery] = useState("");
const [editing, setEditing] = useState(false);
const textareaRef = useRef<HTMLTextAreaElement>(null);
useEffect(() => {
if (searchSpace) {
setMemory(searchSpace.shared_memory_md || "");
}
}, [searchSpace?.shared_memory_md]);
const handleMarkdownChange = useCallback((md: string) => {
const trimmed = md.trim();
setMemory(trimmed);
}, []);
const hasChanges = !!searchSpace && (searchSpace.shared_memory_md || "") !== memory;
const handleSave = async () => {
try {
setSaving(true);
await updateSearchSpace({
id: searchSpaceId,
data: { shared_memory_md: memory },
});
toast.success("Team memory saved");
} catch {
toast.error("Failed to save team memory");
} finally {
setSaving(false);
}
};
const memory = searchSpace?.shared_memory_md || "";
const handleClear = async () => {
try {
@ -66,7 +50,6 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
id: searchSpaceId,
data: { shared_memory_md: "" },
});
setMemory("");
toast.success("Team memory cleared");
} catch {
toast.error("Failed to clear team memory");
@ -75,8 +58,37 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
}
};
const handleEdit = async () => {
const query = editQuery.trim();
if (!query) return;
try {
setEditing(true);
await baseApiService.post(
`/api/v1/searchspaces/${searchSpaceId}/memory/edit`,
SearchSpaceSchema,
{ body: { query } },
);
setEditQuery("");
await queryClient.invalidateQueries({
queryKey: cacheKeys.searchSpaces.detail(searchSpaceId.toString()),
});
toast.success("Team memory updated");
} catch {
toast.error("Failed to edit team memory");
} finally {
setEditing(false);
}
};
const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
handleEdit();
}
};
const charCount = memory.length;
const isOverLimit = charCount > MEMORY_HARD_LIMIT;
const getCounterColor = () => {
if (charCount > MEMORY_HARD_LIMIT) return "text-red-500";
@ -100,18 +112,16 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
<AlertDescription className="text-xs md:text-sm">
<p>
SurfSense uses this shared memory to provide team-wide context across all conversations
in this search space. Supports <span className="font-medium">Markdown</span> formatting.
in this search space. Use the input below to add, update, or remove memory entries.
</p>
</AlertDescription>
</Alert>
<div className="h-[340px] overflow-y-auto rounded-md border">
<PlateEditor
markdown={searchSpace?.shared_memory_md || ""}
onMarkdownChange={handleMarkdownChange}
preset="minimal"
defaultEditing
placeholder="Add team context here, such as decisions, conventions, key facts, or current priorities"
markdown={memory}
readOnly
preset="readonly"
variant="default"
editorVariant="none"
className="px-4 py-4 text-xs min-h-full"
@ -122,30 +132,43 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
<span className={`text-xs ${getCounterColor()}`}>
{charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} characters
{charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"}
{isOverLimit && " - Exceeds limit"}
{charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"}
</span>
</div>
<div className="flex justify-between">
<div className="relative">
<Textarea
ref={textareaRef}
value={editQuery}
onChange={(e) => setEditQuery(e.target.value)}
onKeyDown={handleKeyDown}
placeholder="e.g. &quot;We decided to use PostgreSQL&quot; or &quot;Remove the standup entry&quot;"
disabled={editing}
rows={2}
className="pr-12 resize-none text-sm"
/>
<Button
type="button"
size="icon"
variant="ghost"
onClick={handleEdit}
disabled={editing || !editQuery.trim()}
className="absolute right-2 bottom-2 h-7 w-7"
>
{editing ? <Spinner size="sm" /> : <Send className="h-4 w-4" />}
</Button>
</div>
<div className="flex justify-start">
<Button
type="button"
variant="destructive"
size="sm"
onClick={handleClear}
disabled={saving || !searchSpace?.shared_memory_md}
disabled={saving || editing || !memory}
>
Clear Memory
</Button>
<Button
type="button"
variant="outline"
onClick={handleSave}
disabled={saving || !hasChanges || isOverLimit}
className="relative gap-2 bg-white text-black hover:bg-neutral-100 dark:bg-white dark:text-black dark:hover:bg-neutral-200 items-center justify-center"
>
<span className={saving ? "opacity-0" : ""}>Save</span>
{saving && <Spinner size="sm" className="absolute" />}
</Button>
</div>
</div>
);