diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py new file mode 100644 index 000000000..ceff6ff41 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py @@ -0,0 +1,115 @@ +"""Background memory extraction for the SurfSense agent. + +After each agent response, if the agent did not call ``update_memory`` during +the turn, this module runs a lightweight LLM call to decide whether the user's +message contains any long-term information worth persisting. + +Only user (personal) memory is handled here — team memory relies on explicit +agent calls. +""" + +from __future__ import annotations + +import logging +from typing import Any +from uuid import UUID + +from langchain_core.messages import HumanMessage +from sqlalchemy import select + +from app.agents.new_chat.tools.update_memory import _save_memory +from app.db import User, shielded_async_session + +logger = logging.getLogger(__name__) + +_MEMORY_EXTRACT_PROMPT = """\ +You are a memory extraction assistant. Analyze the user's message and decide \ +if it contains any long-term information worth persisting to memory. + +Worth remembering: preferences, background/identity, goals, projects, \ +instructions, tools/languages they use, decisions, expertise, workplace — \ +durable facts that will matter in future conversations. + +NOT worth remembering: greetings, one-off factual questions, session \ +logistics, ephemeral requests, follow-up clarifications with no new personal \ +info, things that only matter for the current task. + +If the message contains memorizable information, output the FULL updated \ +memory document with the new facts merged into the existing content. Follow \ +these rules: +- Use the same ## section structure as the existing memory. +- Keep entries as single concise bullet points (under 120 chars each). +- Every bullet MUST start with a (YYYY-MM-DD) date prefix. +- If a new fact contradicts an existing entry, update the existing entry. +- Do not duplicate information that is already present. +- Standard sections: \ +"## About the user", "## Preferences", "## Instructions" + +If nothing is worth remembering, output exactly: NO_UPDATE + + +{current_memory} + + + +{user_message} +""" + + +async def extract_and_save_memory( + *, + user_message: str, + user_id: str | None, + llm: Any, +) -> None: + """Background task: extract memorizable info and persist it. + + Designed to be fire-and-forget — catches all exceptions internally. + """ + if not user_id: + return + + try: + uid = UUID(user_id) if isinstance(user_id, str) else user_id + + async with shielded_async_session() as session: + result = await session.execute(select(User).where(User.id == uid)) + user = result.scalars().first() + if not user: + return + + old_memory = user.memory_md + prompt = _MEMORY_EXTRACT_PROMPT.format( + current_memory=old_memory or "(empty)", + user_message=user_message, + ) + response = await llm.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-extraction"]}, + ) + text = ( + response.content + if isinstance(response.content, str) + else str(response.content) + ).strip() + + if text == "NO_UPDATE" or not text: + logger.debug("Memory extraction: no update needed (user %s)", uid) + return + + save_result = await _save_memory( + updated_memory=text, + old_memory=old_memory, + llm=llm, + apply_fn=lambda content: setattr(user, "memory_md", content), + commit_fn=session.commit, + rollback_fn=session.rollback, + label="memory", + ) + logger.info( + "Background memory extraction for user %s: %s", + uid, + save_result.get("status"), + ) + except Exception: + logger.exception("Background user memory extraction failed") diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py index 05b8d2be3..cd09e6b45 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py +++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py @@ -19,7 +19,7 @@ from langgraph.runtime import Runtime from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT +from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT from app.db import ChatVisibility, SearchSpace, User, shielded_async_session logger = logging.getLogger(__name__) @@ -70,6 +70,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] f"{user_memory}\n" f"" ) + if chars > MEMORY_SOFT_LIMIT: + memory_blocks.append( + f"Your personal memory is at " + f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching " + f"the hard limit. On your next update_memory call, consolidate " + f"by merging duplicates, removing outdated entries, and " + f"shortening descriptions before adding anything new." + f"" + ) if self.visibility == ChatVisibility.SEARCH_SPACE: team_memory = await self._load_team_memory(session) @@ -80,6 +89,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] f"{team_memory}\n" f"" ) + if chars > MEMORY_SOFT_LIMIT: + memory_blocks.append( + f"Team memory is at " + f"{chars:,}/{MEMORY_HARD_LIMIT:,} characters and approaching " + f"the hard limit. On your next update_memory call, consolidate " + f"by merging duplicates, removing outdated entries, and " + f"shortening descriptions before adding anything new." + f"" + ) if not memory_blocks: return None diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index f811deda9..403019d96 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -271,8 +271,7 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { `limit` attributes show your current usage and the maximum allowed size. - This is your curated long-term memory — the distilled essence of what you know about the user, not raw conversation logs. - - You are the sole mechanism for persisting memory — there is no background extraction. - Call update_memory when: + - Call update_memory when: * The user explicitly asks to remember or forget something * The user shares durable facts or preferences that will matter in future conversations - The user's name is already provided via — do not store it in memory. @@ -285,21 +284,18 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated. - Keep it concise and well under the character limit shown in . - You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit): - ## About the user (pinned) — role, background, company + ## About the user — role, background, company ## Preferences — languages, tools, frameworks, response style - ## Instructions (pinned) — standing instructions, things to always/never do + ## Instructions — standing instructions, things to always/never do - Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each). - - Sections with `(pinned)` in the heading are protected — the system will reject any - update that removes them. Users can add `(pinned)` to any `##` heading to protect it. - - During consolidation, prioritize keeping: pinned sections > preferences. + - During consolidation, prioritize keeping: identity/instructions > preferences. """, "shared": """ - update_memory: Update the team's shared memory document for this search space. - Your current team memory is already in in your context. The `chars` and `limit` attributes show current usage and the maximum allowed size. - This is the team's curated long-term memory — decisions, conventions, key facts. - - You are the sole mechanism for persisting team memory — there is no background extraction. - Call update_memory when: + - Call update_memory when: * A team member explicitly asks to remember or forget something * The conversation surfaces durable team decisions, conventions, or facts that will matter in future conversations @@ -312,14 +308,12 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = { - Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated. - Keep it concise and well under the character limit shown in . - You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit): - ## Team decisions (pinned) — agreed-upon choices with rationale - ## Conventions (pinned) — coding standards, tools, processes, naming patterns + ## Team decisions — agreed-upon choices with rationale + ## Conventions — coding standards, tools, processes, naming patterns ## Key facts — where things are, how things work, team structure ## Current priorities — active projects, deadlines, blockers - Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each). - - Sections with `(pinned)` in the heading are protected — the system will reject any - update that removes them. Users can add `(pinned)` to any `##` heading to protect it. - - During consolidation, prioritize keeping: pinned sections > key facts > current priorities. + - During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities. """, }, } @@ -329,21 +323,21 @@ _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = { "private": """ - is empty. User: "I'm a space enthusiast, explain astrophage to me" - The user casually shared a durable fact about themselves. Save it: - update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n") + update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n") - User: "Remember that I prefer concise answers over detailed explanations" - Durable preference. You see the current and merge: - update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...") + update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...") - User: "I actually moved to Tokyo last month" - Updated fact, date prefix reflects when recorded: - update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...") + update_memory(updated_memory="## About the user\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...") - User: "I'm a freelance photographer working on a nature documentary" - Durable background info. Save it under About the user: - update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n") + update_memory(updated_memory="## About the user\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n") """, "shared": """ - User: "Let's remember that we decided to do weekly standup meetings on Mondays" - Durable team decision: - update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...") + update_memory(updated_memory="## Team decisions\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...") - User: "Our office is in downtown Seattle, 5th floor" - Durable team fact: update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...") diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py index 991e8338e..b487fb4a0 100644 --- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py +++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py @@ -6,12 +6,10 @@ always sees the current memory in / tags injected by MemoryInjectionMiddleware, so it passes the FULL updated document each time. Overflow handling: - - Soft limit (18K chars): an automatic LLM-driven consolidation is attempted - to proactively keep memory lean. The save always succeeds. - - Hard limit (25K chars): save rejected if memory still exceeds this after - consolidation. - - Pinned sections: headings containing ``(pinned)`` are protected — the system - rejects any update that drops them and auto-restores them during consolidation. + - Soft limit (18K chars): a warning is returned telling the agent to + consolidate on the next update. + - Hard limit (25K chars): a forced LLM-driven rewrite compresses the document. + If it still exceeds the limit after rewriting, the save is rejected. - Diff validation: warns when entire ``##`` sections are dropped or when the document shrinks by more than 60%. """ @@ -35,74 +33,9 @@ logger = logging.getLogger(__name__) MEMORY_SOFT_LIMIT = 18_000 MEMORY_HARD_LIMIT = 25_000 -_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE) _SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) -# --------------------------------------------------------------------------- -# Pinned-section helpers -# --------------------------------------------------------------------------- - - -def _extract_pinned_headings(memory: str) -> set[str]: - """Return the set of ``## …`` headings that contain ``(pinned)``.""" - return set(_PINNED_RE.findall(memory)) - - -def _extract_section_map(memory: str) -> dict[str, str]: - """Split *memory* into ``{heading_text: full_section_content}``.""" - sections: dict[str, str] = {} - parts = _SECTION_HEADING_RE.split(memory) - # parts: [preamble, heading1, body1, heading2, body2, …] - for i in range(1, len(parts) - 1, 2): - heading = parts[i].strip() - body = parts[i + 1] - sections[heading] = f"## {heading}\n{body}" - return sections - - -def _validate_pinned_preserved(old_memory: str | None, new_memory: str) -> str | None: - """Return an error message if pinned headings from *old_memory* are missing - in *new_memory*, else ``None``.""" - if not old_memory: - return None - old_pinned = _extract_pinned_headings(old_memory) - if not old_pinned: - return None - new_pinned = _extract_pinned_headings(new_memory) - dropped = old_pinned - new_pinned - if dropped: - names = ", ".join(sorted(dropped)) - return ( - f"Cannot remove pinned sections: {names}. " - "These sections are protected and must be preserved. " - "Re-include them and call update_memory again." - ) - return None - - -def _restore_missing_pinned(old_memory: str, consolidated: str) -> str: - """Prepend any pinned sections from *old_memory* that are absent in - *consolidated*.""" - old_pinned = _extract_pinned_headings(old_memory) - if not old_pinned: - return consolidated - new_pinned = _extract_pinned_headings(consolidated) - dropped = old_pinned - new_pinned - if not dropped: - return consolidated - - old_sections = _extract_section_map(old_memory) - restored_parts: list[str] = [] - for heading in sorted(dropped): - raw_heading = heading.removeprefix("## ").strip() - if raw_heading in old_sections: - restored_parts.append(old_sections[raw_heading].rstrip()) - if restored_parts: - return "\n\n".join(restored_parts) + "\n\n" + consolidated - return consolidated - - # --------------------------------------------------------------------------- # Diff validation # --------------------------------------------------------------------------- @@ -173,37 +106,35 @@ def _soft_warning(content: str) -> str | None: # --------------------------------------------------------------------------- -# Auto-consolidation via a separate LLM call +# Forced rewrite when memory exceeds the hard limit # --------------------------------------------------------------------------- -_CONSOLIDATION_PROMPT = """\ +_FORCED_REWRITE_PROMPT = """\ You are a memory curator. The following memory document exceeds the character \ limit and must be shortened. RULES: 1. Rewrite the document to be under {target} characters. -2. Sections whose headings contain "(pinned)" MUST be preserved EXACTLY as-is \ - — do not modify, shorten, or remove them. -3. Only consolidate non-pinned content. -4. Priority for keeping content: pinned sections > identity/instructions > \ - preferences > current context. -5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. -6. Each entry must be a single bullet point. -7. Every bullet MUST keep its (YYYY-MM-DD) date prefix. -8. Output ONLY the consolidated markdown — no explanations, no wrapping. +2. Preserve all ## section headings. +3. Priority for keeping content: identity/instructions > preferences > \ + current context. +4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. +5. Each entry must be a single bullet point. +6. Every bullet MUST keep its (YYYY-MM-DD) date prefix. +7. Output ONLY the consolidated markdown — no explanations, no wrapping. {content} """ -async def _auto_consolidate(content: str, llm: Any) -> str | None: - """Use a focused LLM call to consolidate *content* under the soft limit. +async def _forced_rewrite(content: str, llm: Any) -> str | None: + """Use a focused LLM call to compress *content* under the hard limit. - Returns the consolidated string, or ``None`` if consolidation fails. + Returns the rewritten string, or ``None`` if the call fails. """ try: - prompt = _CONSOLIDATION_PROMPT.format(target=MEMORY_SOFT_LIMIT, content=content) + prompt = _FORCED_REWRITE_PROMPT.format(target=MEMORY_HARD_LIMIT, content=content) response = await llm.ainvoke( [HumanMessage(content=prompt)], config={"tags": ["surfsense:internal"]}, @@ -215,7 +146,7 @@ async def _auto_consolidate(content: str, llm: Any) -> str | None: ) return text.strip() except Exception: - logger.exception("Auto-consolidation LLM call failed") + logger.exception("Forced rewrite LLM call failed") return None @@ -234,16 +165,17 @@ async def _save_memory( rollback_fn, label: str, ) -> dict[str, Any]: - """Validate, optionally auto-consolidate, save, and return a response dict. + """Validate, optionally force-rewrite if over the hard limit, save, and + return a response dict. Parameters ---------- updated_memory : str The new document the agent submitted. old_memory : str | None - The previously persisted document (for diff / pinned checks). + The previously persisted document (for diff checks). llm : Any | None - LLM instance for auto-consolidation (may be ``None``). + LLM instance for forced rewrite (may be ``None``). apply_fn : callable(str) -> None Callback that sets the new memory on the ORM object. commit_fn : coroutine @@ -255,21 +187,13 @@ async def _save_memory( """ content = updated_memory - # --- pinned-section gate (before any size check) --- - pinned_err = _validate_pinned_preserved(old_memory, content) - if pinned_err: - return {"status": "error", "message": pinned_err} + # --- forced rewrite if over the hard limit --- + if len(content) > MEMORY_HARD_LIMIT and llm is not None: + rewritten = await _forced_rewrite(content, llm) + if rewritten is not None and len(rewritten) < len(content): + content = rewritten - # --- auto-consolidate proactively at the soft limit --- - if len(content) > MEMORY_SOFT_LIMIT and llm is not None: - consolidated = await _auto_consolidate(content, llm) - if consolidated is not None: - if old_memory: - consolidated = _restore_missing_pinned(old_memory, consolidated) - if len(consolidated) < len(content): - content = consolidated - - # --- hard-limit gate (reject if still too large after consolidation) --- + # --- hard-limit gate (reject if still too large after rewrite) --- size_err = _validate_memory_size(content) if size_err: return size_err @@ -290,7 +214,7 @@ async def _save_memory( } if content is not updated_memory: - resp["notice"] = "Memory was automatically consolidated to fit within limits." + resp["notice"] = "Memory was automatically rewritten to fit within limits." diff_warnings = _validate_diff(old_memory, content) if diff_warnings: diff --git a/surfsense_backend/app/routes/memory_routes.py b/surfsense_backend/app/routes/memory_routes.py index aa8b1be28..6ec535626 100644 --- a/surfsense_backend/app/routes/memory_routes.py +++ b/surfsense_backend/app/routes/memory_routes.py @@ -1,13 +1,24 @@ """Routes for user memory management (personal memory.md).""" +from __future__ import annotations + +import logging + from fastapi import APIRouter, Depends, HTTPException +from langchain_core.messages import HumanMessage from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT +from app.agents.new_chat.llm_config import ( + create_chat_litellm_from_agent_config, + load_agent_llm_config_for_search_space, +) +from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory from app.db import User, get_async_session from app.users import current_active_user +logger = logging.getLogger(__name__) + router = APIRouter() @@ -19,6 +30,33 @@ class MemoryUpdate(BaseModel): memory_md: str +class MemoryEditRequest(BaseModel): + query: str + search_space_id: int + + +_MEMORY_EDIT_PROMPT = """\ +You are a memory editor. The user wants to modify their memory document. \ +Apply the user's instruction to the existing memory document and output the \ +FULL updated document. + +RULES: +1. If the instruction asks to add something, add it in the appropriate \ +## section with a (YYYY-MM-DD) date prefix using today's date. +2. If the instruction asks to remove something, remove the matching entry. +3. If the instruction asks to change something, update the matching entry. +4. Preserve the existing ## section structure and all other entries. +5. Output ONLY the updated markdown — no explanations, no wrapping. + + +{current_memory} + + + +{instruction} +""" + + @router.get("/users/me/memory", response_model=MemoryRead) async def get_user_memory( user: User = Depends(current_active_user), @@ -44,3 +82,60 @@ async def update_user_memory( await session.commit() await session.refresh(user, ["memory_md"]) return MemoryRead(memory_md=user.memory_md or "") + + +@router.post("/users/me/memory/edit", response_model=MemoryRead) +async def edit_user_memory( + body: MemoryEditRequest, + user: User = Depends(current_active_user), + session: AsyncSession = Depends(get_async_session), +): + """Apply a natural language edit to the user's personal memory via LLM.""" + agent_config = await load_agent_llm_config_for_search_space( + session, body.search_space_id + ) + if not agent_config: + raise HTTPException(status_code=500, detail="No LLM configuration available.") + llm = create_chat_litellm_from_agent_config(agent_config) + if not llm: + raise HTTPException(status_code=500, detail="Failed to create LLM instance.") + + await session.refresh(user, ["memory_md"]) + current_memory = user.memory_md or "" + + prompt = _MEMORY_EDIT_PROMPT.format( + current_memory=current_memory or "(empty)", + instruction=body.query, + ) + try: + response = await llm.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-edit"]}, + ) + updated = ( + response.content + if isinstance(response.content, str) + else str(response.content) + ).strip() + except Exception as e: + logger.exception("Memory edit LLM call failed: %s", e) + raise HTTPException(status_code=500, detail="Memory edit failed.") from e + + if not updated: + raise HTTPException(status_code=400, detail="LLM returned empty result.") + + result = await _save_memory( + updated_memory=updated, + old_memory=current_memory, + llm=llm, + apply_fn=lambda content: setattr(user, "memory_md", content), + commit_fn=session.commit, + rollback_fn=session.rollback, + label="memory", + ) + + if result.get("status") == "error": + raise HTTPException(status_code=400, detail=result["message"]) + + await session.refresh(user, ["memory_md"]) + return MemoryRead(memory_md=user.memory_md or "") diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 0650b8dfe..e66fc5f0c 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -1,11 +1,17 @@ import logging from fastapi import APIRouter, Depends, HTTPException +from langchain_core.messages import HumanMessage +from pydantic import BaseModel as PydanticBaseModel from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT +from app.agents.new_chat.llm_config import ( + create_chat_litellm_from_agent_config, + load_agent_llm_config_for_search_space, +) +from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory from app.config import config from app.db import ( ImageGenerationConfig, @@ -35,6 +41,32 @@ logger = logging.getLogger(__name__) router = APIRouter() +class _TeamMemoryEditRequest(PydanticBaseModel): + query: str + + +_TEAM_MEMORY_EDIT_PROMPT = """\ +You are a memory editor for a team workspace. The user wants to modify the \ +team's shared memory document. Apply the user's instruction to the existing \ +memory document and output the FULL updated document. + +RULES: +1. If the instruction asks to add something, add it in the appropriate \ +## section with a (YYYY-MM-DD) date prefix using today's date. +2. If the instruction asks to remove something, remove the matching entry. +3. If the instruction asks to change something, update the matching entry. +4. Preserve the existing ## section structure and all other entries. +5. Output ONLY the updated markdown — no explanations, no wrapping. + + +{current_memory} + + + +{instruction} +""" + + async def create_default_roles_and_membership( session: AsyncSession, search_space_id: int, @@ -280,6 +312,79 @@ async def update_search_space( ) from e +@router.post( + "/searchspaces/{search_space_id}/memory/edit", + response_model=SearchSpaceRead, +) +async def edit_team_memory( + search_space_id: int, + body: _TeamMemoryEditRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Apply a natural language edit to the team memory via LLM.""" + await check_search_space_access(session, user, search_space_id) + + agent_config = await load_agent_llm_config_for_search_space( + session, search_space_id + ) + if not agent_config: + raise HTTPException(status_code=500, detail="No LLM configuration available.") + llm = create_chat_litellm_from_agent_config(agent_config) + if not llm: + raise HTTPException(status_code=500, detail="Failed to create LLM instance.") + + result = await session.execute( + select(SearchSpace).filter(SearchSpace.id == search_space_id) + ) + db_search_space = result.scalars().first() + if not db_search_space: + raise HTTPException(status_code=404, detail="Search space not found") + + current_memory = db_search_space.shared_memory_md or "" + + prompt = _TEAM_MEMORY_EDIT_PROMPT.format( + current_memory=current_memory or "(empty)", + instruction=body.query, + ) + try: + response = await llm.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-edit"]}, + ) + updated = ( + response.content + if isinstance(response.content, str) + else str(response.content) + ).strip() + except Exception as e: + logger.exception("Team memory edit LLM call failed: %s", e) + raise HTTPException( + status_code=500, detail="Team memory edit failed." + ) from e + + if not updated: + raise HTTPException(status_code=400, detail="LLM returned empty result.") + + save_result = await _save_memory( + updated_memory=updated, + old_memory=current_memory, + llm=llm, + apply_fn=lambda content: setattr( + db_search_space, "shared_memory_md", content + ), + commit_fn=session.commit, + rollback_fn=session.rollback, + label="team memory", + ) + + if save_result.get("status") == "error": + raise HTTPException(status_code=400, detail=save_result["message"]) + + await session.refresh(db_search_space) + return db_search_space + + @router.delete("/searchspaces/{search_space_id}", response_model=dict) async def delete_search_space( search_space_id: int, diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 0a6c34e81..e12d189ac 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -30,6 +30,7 @@ from sqlalchemy.orm import selectinload from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent from app.agents.new_chat.checkpointer import get_checkpointer +from app.agents.new_chat.memory_extraction import extract_and_save_memory from app.agents.new_chat.llm_config import ( AgentConfig, create_chat_litellm_from_agent_config, @@ -139,6 +140,7 @@ class StreamResult: is_interrupted: bool = False interrupt_value: dict[str, Any] | None = None sandbox_files: list[str] = field(default_factory=list) # unused, kept for compat + agent_called_update_memory: bool = False async def _stream_agent_events( @@ -181,6 +183,7 @@ async def _stream_agent_events( last_active_step_items: list[str] = initial_step_items or [] just_finished_tool: bool = False active_tool_depth: int = 0 # Track nesting: >0 means we're inside a tool + called_update_memory: bool = False def next_thinking_step_id() -> str: nonlocal thinking_step_counter @@ -488,6 +491,9 @@ async def _stream_agent_events( tool_name = event.get("name", "unknown_tool") raw_output = event.get("data", {}).get("output", "") + if tool_name == "update_memory": + called_update_memory = True + if hasattr(raw_output, "content"): content = raw_output.content if isinstance(content, str): @@ -1109,6 +1115,7 @@ async def _stream_agent_events( yield completion_event result.accumulated_text = accumulated_text + result.agent_called_update_memory = called_update_memory state = await agent.aget_state(config) is_interrupted = state.tasks and any(task.interrupts for task in state.tasks) @@ -1538,6 +1545,16 @@ async def stream_new_chat( chat_id, generated_title ) + # Fire background memory extraction if the agent didn't handle it + if not stream_result.agent_called_update_memory and user_id: + asyncio.create_task( + extract_and_save_memory( + user_message=user_query, + user_id=user_id, + llm=llm, + ) + ) + # Finish the step and message yield streaming_service.format_finish_step() yield streaming_service.format_finish() diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx index 85f2db695..8403c641c 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx @@ -1,13 +1,16 @@ "use client"; -import { Info } from "lucide-react"; -import { useCallback, useEffect, useState } from "react"; +import { useAtomValue } from "jotai"; +import { Info, Send } from "lucide-react"; +import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { z } from "zod"; +import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { PlateEditor } from "@/components/editor/plate-editor"; import { Alert, AlertDescription } from "@/components/ui/alert"; import { Button } from "@/components/ui/button"; import { Spinner } from "@/components/ui/spinner"; +import { Textarea } from "@/components/ui/textarea"; import { baseApiService } from "@/lib/apis/base-api.service"; const MEMORY_HARD_LIMIT = 25_000; @@ -17,17 +20,19 @@ const MemoryReadSchema = z.object({ }); export function MemoryContent() { + const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom); const [memory, setMemory] = useState(""); - const [savedMemory, setSavedMemory] = useState(""); const [loading, setLoading] = useState(true); const [saving, setSaving] = useState(false); + const [editQuery, setEditQuery] = useState(""); + const [editing, setEditing] = useState(false); + const textareaRef = useRef(null); const fetchMemory = useCallback(async () => { try { setLoading(true); const data = await baseApiService.get("/api/v1/users/me/memory", MemoryReadSchema); setMemory(data.memory_md); - setSavedMemory(data.memory_md); } catch { toast.error("Failed to load memory"); } finally { @@ -39,21 +44,6 @@ export function MemoryContent() { fetchMemory(); }, [fetchMemory]); - const handleSave = async () => { - try { - setSaving(true); - const data = await baseApiService.put("/api/v1/users/me/memory", MemoryReadSchema, { - body: { memory_md: memory }, - }); - setSavedMemory(data.memory_md); - toast.success("Memory saved"); - } catch { - toast.error("Failed to save memory"); - } finally { - setSaving(false); - } - }; - const handleClear = async () => { try { setSaving(true); @@ -61,7 +51,6 @@ export function MemoryContent() { body: { memory_md: "" }, }); setMemory(data.memory_md); - setSavedMemory(data.memory_md); toast.success("Memory cleared"); } catch { toast.error("Failed to clear memory"); @@ -70,14 +59,33 @@ export function MemoryContent() { } }; - const handleMarkdownChange = useCallback((md: string) => { - const trimmed = md.trim(); - setMemory(trimmed); - }, []); + const handleEdit = async () => { + const query = editQuery.trim(); + if (!query) return; + + try { + setEditing(true); + const data = await baseApiService.post("/api/v1/users/me/memory/edit", MemoryReadSchema, { + body: { query, search_space_id: Number(activeSearchSpaceId) }, + }); + setMemory(data.memory_md); + setEditQuery(""); + toast.success("Memory updated"); + } catch { + toast.error("Failed to edit memory"); + } finally { + setEditing(false); + } + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + handleEdit(); + } + }; - const hasChanges = memory !== savedMemory; const charCount = memory.length; - const isOverLimit = charCount > MEMORY_HARD_LIMIT; const getCounterColor = () => { if (charCount > MEMORY_HARD_LIMIT) return "text-red-500"; @@ -101,18 +109,16 @@ export function MemoryContent() {

SurfSense uses this personal memory to personalize your responses across all - conversations. Supports Markdown formatting. + conversations. Use the input below to add, update, or remove memory entries.

{charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} characters {charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"} - {isOverLimit && " - Exceeds limit"} + {charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"}
-
+
+