mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
refactor: remove memory extraction functionality and update memory management protocols to ensure immediate updates for user and team interactions
This commit is contained in:
parent
f38ea77940
commit
cd72fa9a48
7 changed files with 80 additions and 337 deletions
|
|
@ -1,223 +0,0 @@
|
||||||
"""Post-response memory extraction for the SurfSense agent.
|
|
||||||
|
|
||||||
After each agent response, a background task calls a lightweight LLM to decide
|
|
||||||
whether the user's message contains any long-term information worth persisting
|
|
||||||
(preferences, background, goals, instructions, etc.). This ensures memory
|
|
||||||
updates are never missed regardless of whether the main agent called
|
|
||||||
``update_memory`` during the conversation.
|
|
||||||
|
|
||||||
The function re-reads memory from the database so it always sees the latest
|
|
||||||
state — including any updates the agent may have already made.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any
|
|
||||||
from uuid import UUID
|
|
||||||
|
|
||||||
from langchain_core.messages import HumanMessage
|
|
||||||
from sqlalchemy import select
|
|
||||||
|
|
||||||
from app.agents.new_chat.tools.update_memory import _save_memory
|
|
||||||
from app.db import ChatVisibility, SearchSpace, User, shielded_async_session
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_MEMORY_EXTRACT_PROMPT = """\
|
|
||||||
You are a memory extraction assistant. Analyze the user's message and decide \
|
|
||||||
if it contains any long-term information worth persisting to memory.
|
|
||||||
|
|
||||||
Worth remembering: preferences, background/identity, goals, projects, \
|
|
||||||
instructions, tools/languages they use, decisions, expertise, workplace.
|
|
||||||
|
|
||||||
NOT worth remembering: greetings, one-off factual questions, session \
|
|
||||||
logistics, ephemeral requests, follow-up clarifications with no new personal info.
|
|
||||||
|
|
||||||
If the message contains memorizable information, output the FULL updated \
|
|
||||||
memory document with the new facts merged into the existing content. Follow \
|
|
||||||
these rules:
|
|
||||||
- Use the same ## section structure as the existing memory.
|
|
||||||
- Keep entries as single concise bullet points (under 120 chars each).
|
|
||||||
- Add (YYYY-MM) date suffixes on time-sensitive entries.
|
|
||||||
- Never remove or modify sections marked with (pinned).
|
|
||||||
- If a new fact contradicts an existing entry, update the existing entry.
|
|
||||||
- Do not duplicate information that is already present.
|
|
||||||
- Standard sections: \
|
|
||||||
"## About the user (pinned)", "## Preferences", "## Instructions (pinned)", \
|
|
||||||
"## Current context"
|
|
||||||
|
|
||||||
If nothing is worth remembering, output exactly: NO_UPDATE
|
|
||||||
|
|
||||||
<current_memory>
|
|
||||||
{current_memory}
|
|
||||||
</current_memory>
|
|
||||||
|
|
||||||
<user_message>
|
|
||||||
{user_message}
|
|
||||||
</user_message>"""
|
|
||||||
|
|
||||||
_TEAM_MEMORY_EXTRACT_PROMPT = """\
|
|
||||||
You are a memory extraction assistant for a team workspace. Analyze the \
|
|
||||||
user's message and decide if it contains any long-term team information \
|
|
||||||
worth persisting to the shared memory.
|
|
||||||
|
|
||||||
Worth remembering: team decisions, conventions, coding standards, key facts \
|
|
||||||
about the project/team, processes, architecture decisions.
|
|
||||||
|
|
||||||
NOT worth remembering: greetings, personal preferences, one-off questions, \
|
|
||||||
ephemeral requests.
|
|
||||||
|
|
||||||
If the message contains memorizable information, output the FULL updated \
|
|
||||||
memory document with the new facts merged into the existing content. Follow \
|
|
||||||
these rules:
|
|
||||||
- Use the same ## section structure as the existing memory.
|
|
||||||
- Keep entries as single concise bullet points (under 120 chars each).
|
|
||||||
- Add (YYYY-MM) date suffixes on time-sensitive entries.
|
|
||||||
- Never remove or modify sections marked with (pinned).
|
|
||||||
- Standard sections: \
|
|
||||||
"## Team decisions (pinned)", "## Conventions (pinned)", "## Key facts", \
|
|
||||||
"## Current priorities"
|
|
||||||
|
|
||||||
If nothing is worth remembering, output exactly: NO_UPDATE
|
|
||||||
|
|
||||||
<current_memory>
|
|
||||||
{current_memory}
|
|
||||||
</current_memory>
|
|
||||||
|
|
||||||
<user_message>
|
|
||||||
{user_message}
|
|
||||||
</user_message>"""
|
|
||||||
|
|
||||||
|
|
||||||
async def _call_extraction_llm(
|
|
||||||
llm: Any,
|
|
||||||
prompt_template: str,
|
|
||||||
current_memory: str,
|
|
||||||
user_message: str,
|
|
||||||
) -> str | None:
|
|
||||||
"""Run the extraction LLM and return the updated memory, or ``None``."""
|
|
||||||
prompt = prompt_template.format(
|
|
||||||
current_memory=current_memory or "(empty)",
|
|
||||||
user_message=user_message,
|
|
||||||
)
|
|
||||||
response = await llm.ainvoke(
|
|
||||||
[HumanMessage(content=prompt)],
|
|
||||||
config={"tags": ["surfsense:internal", "memory-extraction"]},
|
|
||||||
)
|
|
||||||
text = (
|
|
||||||
response.content if isinstance(response.content, str) else str(response.content)
|
|
||||||
).strip()
|
|
||||||
|
|
||||||
if text == "NO_UPDATE" or not text:
|
|
||||||
return None
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
async def extract_and_save_memory(
|
|
||||||
*,
|
|
||||||
user_message: str,
|
|
||||||
user_id: str | None,
|
|
||||||
search_space_id: int,
|
|
||||||
thread_visibility: ChatVisibility | None,
|
|
||||||
llm: Any,
|
|
||||||
) -> None:
|
|
||||||
"""Background task: extract memorizable info and persist it.
|
|
||||||
|
|
||||||
This function is designed to be fire-and-forget — it catches all
|
|
||||||
exceptions internally and never propagates them.
|
|
||||||
"""
|
|
||||||
if not user_id:
|
|
||||||
return
|
|
||||||
|
|
||||||
visibility = thread_visibility or ChatVisibility.PRIVATE
|
|
||||||
|
|
||||||
try:
|
|
||||||
await _extract_user_memory(user_message, user_id, llm)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Background user memory extraction failed")
|
|
||||||
|
|
||||||
if visibility == ChatVisibility.SEARCH_SPACE:
|
|
||||||
try:
|
|
||||||
await _extract_team_memory(user_message, search_space_id, llm)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Background team memory extraction failed")
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_user_memory(
|
|
||||||
user_message: str,
|
|
||||||
user_id: str,
|
|
||||||
llm: Any,
|
|
||||||
) -> None:
|
|
||||||
"""Extract and persist user memory updates."""
|
|
||||||
uid = UUID(user_id) if isinstance(user_id, str) else user_id
|
|
||||||
|
|
||||||
async with shielded_async_session() as session:
|
|
||||||
result = await session.execute(select(User).where(User.id == uid))
|
|
||||||
user = result.scalars().first()
|
|
||||||
if not user:
|
|
||||||
return
|
|
||||||
|
|
||||||
old_memory = user.memory_md
|
|
||||||
updated = await _call_extraction_llm(
|
|
||||||
llm, _MEMORY_EXTRACT_PROMPT, old_memory or "", user_message
|
|
||||||
)
|
|
||||||
if updated is None:
|
|
||||||
logger.debug("Memory extraction: no update needed (user %s)", uid)
|
|
||||||
return
|
|
||||||
|
|
||||||
save_result = await _save_memory(
|
|
||||||
updated_memory=updated,
|
|
||||||
old_memory=old_memory,
|
|
||||||
llm=llm,
|
|
||||||
apply_fn=lambda content: setattr(user, "memory_md", content),
|
|
||||||
commit_fn=session.commit,
|
|
||||||
rollback_fn=session.rollback,
|
|
||||||
label="memory",
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
"Background memory extraction for user %s: %s",
|
|
||||||
uid,
|
|
||||||
save_result.get("status"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def _extract_team_memory(
|
|
||||||
user_message: str,
|
|
||||||
search_space_id: int,
|
|
||||||
llm: Any,
|
|
||||||
) -> None:
|
|
||||||
"""Extract and persist team memory updates."""
|
|
||||||
async with shielded_async_session() as session:
|
|
||||||
result = await session.execute(
|
|
||||||
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
|
||||||
)
|
|
||||||
space = result.scalars().first()
|
|
||||||
if not space:
|
|
||||||
return
|
|
||||||
|
|
||||||
old_memory = space.shared_memory_md
|
|
||||||
updated = await _call_extraction_llm(
|
|
||||||
llm, _TEAM_MEMORY_EXTRACT_PROMPT, old_memory or "", user_message
|
|
||||||
)
|
|
||||||
if updated is None:
|
|
||||||
logger.debug(
|
|
||||||
"Team memory extraction: no update needed (space %s)",
|
|
||||||
search_space_id,
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
save_result = await _save_memory(
|
|
||||||
updated_memory=updated,
|
|
||||||
old_memory=old_memory,
|
|
||||||
llm=llm,
|
|
||||||
apply_fn=lambda content: setattr(space, "shared_memory_md", content),
|
|
||||||
commit_fn=session.commit,
|
|
||||||
rollback_fn=session.rollback,
|
|
||||||
label="team memory",
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
"Background team memory extraction for space %s: %s",
|
|
||||||
search_space_id,
|
|
||||||
save_result.get("status"),
|
|
||||||
)
|
|
||||||
|
|
@ -59,12 +59,14 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
|
|
||||||
async with shielded_async_session() as session:
|
async with shielded_async_session() as session:
|
||||||
if self.user_id is not None:
|
if self.user_id is not None:
|
||||||
user_memory, is_persisted = await self._load_user_memory(session)
|
user_memory, display_name = await self._load_user_memory(session)
|
||||||
|
if display_name:
|
||||||
|
first_name = display_name.split()[0]
|
||||||
|
memory_blocks.append(f"<user_name>{first_name}</user_name>")
|
||||||
if user_memory:
|
if user_memory:
|
||||||
chars = len(user_memory)
|
chars = len(user_memory)
|
||||||
persisted = "true" if is_persisted else "false"
|
|
||||||
memory_blocks.append(
|
memory_blocks.append(
|
||||||
f'<user_memory chars="{chars}" limit="{MEMORY_HARD_LIMIT}" persisted="{persisted}">\n'
|
f'<user_memory chars="{chars}" limit="{MEMORY_HARD_LIMIT}">\n'
|
||||||
f"{user_memory}\n"
|
f"{user_memory}\n"
|
||||||
f"</user_memory>"
|
f"</user_memory>"
|
||||||
)
|
)
|
||||||
|
|
@ -91,42 +93,19 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
|
|
||||||
return {"messages": new_messages}
|
return {"messages": new_messages}
|
||||||
|
|
||||||
async def _load_user_memory(self, session: AsyncSession) -> tuple[str | None, bool]:
|
async def _load_user_memory(self, session: AsyncSession) -> tuple[str | None, str | None]:
|
||||||
"""Return (memory_content, is_persisted).
|
"""Return (memory_content, display_name)."""
|
||||||
|
|
||||||
When the user has no saved memory but has a display name, a seed
|
|
||||||
document is created and **persisted to the database immediately**
|
|
||||||
so the LLM doesn't need to make a tool call to save it.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
result = await session.execute(
|
result = await session.execute(
|
||||||
select(User.memory_md, User.display_name).where(User.id == self.user_id)
|
select(User.memory_md, User.display_name).where(User.id == self.user_id)
|
||||||
)
|
)
|
||||||
row = result.one_or_none()
|
row = result.one_or_none()
|
||||||
if row is None:
|
if row is None:
|
||||||
return None, True
|
return None, None
|
||||||
|
return row.memory_md or None, row.display_name
|
||||||
memory_md, display_name = row
|
|
||||||
|
|
||||||
if memory_md:
|
|
||||||
return memory_md, True
|
|
||||||
|
|
||||||
if display_name:
|
|
||||||
first_name = display_name.split()[0]
|
|
||||||
seed = f"## About the user (pinned)\n- Name: {first_name}"
|
|
||||||
await session.execute(
|
|
||||||
User.__table__.update()
|
|
||||||
.where(User.id == self.user_id)
|
|
||||||
.values(memory_md=seed)
|
|
||||||
)
|
|
||||||
await session.commit()
|
|
||||||
logger.info("Auto-persisted memory seed for user %s", self.user_id)
|
|
||||||
return seed, True
|
|
||||||
|
|
||||||
return None, True
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Failed to load user memory")
|
logger.exception("Failed to load user memory")
|
||||||
return None, True
|
return None, None
|
||||||
|
|
||||||
async def _load_team_memory(self, session: AsyncSession) -> str | None:
|
async def _load_team_memory(self, session: AsyncSession) -> str | None:
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,13 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
||||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||||
</knowledge_base_only_policy>
|
</knowledge_base_only_policy>
|
||||||
|
|
||||||
|
<memory_protocol>
|
||||||
|
IMPORTANT — After understanding each user message, ALWAYS check: does this message
|
||||||
|
reveal durable facts about the user (role, interests, preferences, projects,
|
||||||
|
background, or standing instructions)? If yes, you MUST call update_memory
|
||||||
|
alongside your normal response — do not defer this to a later turn.
|
||||||
|
</memory_protocol>
|
||||||
|
|
||||||
</system_instruction>
|
</system_instruction>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -71,6 +78,13 @@ CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
|
||||||
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
* Tool-usage actions like generating reports, podcasts, images, or scraping webpages
|
||||||
</knowledge_base_only_policy>
|
</knowledge_base_only_policy>
|
||||||
|
|
||||||
|
<memory_protocol>
|
||||||
|
IMPORTANT — After understanding each user message, ALWAYS check: does this message
|
||||||
|
reveal durable facts about the team (decisions, conventions, architecture, processes,
|
||||||
|
or key facts)? If yes, you MUST call update_memory alongside your normal response —
|
||||||
|
do not defer this to a later turn.
|
||||||
|
</memory_protocol>
|
||||||
|
|
||||||
</system_instruction>
|
</system_instruction>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -257,56 +271,52 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
|
||||||
`limit` attributes show your current usage and the maximum allowed size.
|
`limit` attributes show your current usage and the maximum allowed size.
|
||||||
- This is your curated long-term memory — the distilled essence of what you know about
|
- This is your curated long-term memory — the distilled essence of what you know about
|
||||||
the user, not raw conversation logs.
|
the user, not raw conversation logs.
|
||||||
- Note: The system automatically extracts memorizable information from every
|
- You are the sole mechanism for persisting memory — there is no background extraction.
|
||||||
conversation in the background. Use this tool primarily for:
|
Call update_memory when:
|
||||||
* Explicit user requests: "remember this", "keep in mind", "note that", "forget X"
|
* The user explicitly asks to remember or forget something
|
||||||
* Restructuring or reorganizing the memory document
|
* The user shares durable facts or preferences that will matter in future conversations
|
||||||
* Correcting outdated or wrong entries
|
- The user's name is already provided via <user_name> — do not store it in memory.
|
||||||
* **If <user_memory> has persisted="false"** — you MUST still call update_memory
|
- Do not store short-lived or ephemeral info: one-off questions, greetings,
|
||||||
to persist the seed.
|
session logistics, or things that only matter for the current task.
|
||||||
- Skip truly ephemeral info (one-off questions, greetings, session logistics).
|
|
||||||
- Args:
|
- Args:
|
||||||
- updated_memory: The FULL updated markdown document (not a diff).
|
- updated_memory: The FULL updated markdown document (not a diff).
|
||||||
Merge new facts with existing ones, update contradictions, remove outdated entries.
|
Merge new facts with existing ones, update contradictions, remove outdated entries.
|
||||||
Treat every update as a curation pass — consolidate, don't just append.
|
Treat every update as a curation pass — consolidate, don't just append.
|
||||||
Include inline dates (YYYY-MM) on entries where temporal context matters (facts that
|
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
|
||||||
may change, decisions, context). Skip dates on timeless preferences and instructions.
|
|
||||||
- Keep it concise and well under the character limit shown in <user_memory>.
|
- Keep it concise and well under the character limit shown in <user_memory>.
|
||||||
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
|
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
|
||||||
## About the user (pinned) — name, role, background, company (with date if it may change)
|
## About the user (pinned) — role, background, company
|
||||||
## Preferences — languages, tools, frameworks, response style
|
## Preferences — languages, tools, frameworks, response style
|
||||||
## Instructions (pinned) — standing instructions, things to always/never do
|
## Instructions (pinned) — standing instructions, things to always/never do
|
||||||
## Current context — ongoing projects, goals, deadlines (with date)
|
|
||||||
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
|
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
|
||||||
- Each time-sensitive entry MUST include a (YYYY-MM) date suffix.
|
|
||||||
- Sections with `(pinned)` in the heading are protected — the system will reject any
|
- Sections with `(pinned)` in the heading are protected — the system will reject any
|
||||||
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
|
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
|
||||||
- During consolidation, prioritize keeping: pinned sections > preferences > current context.
|
- During consolidation, prioritize keeping: pinned sections > preferences.
|
||||||
""",
|
""",
|
||||||
"shared": """
|
"shared": """
|
||||||
- update_memory: Update the team's shared memory document for this search space.
|
- update_memory: Update the team's shared memory document for this search space.
|
||||||
- Your current team memory is already in <team_memory> in your context. The `chars`
|
- Your current team memory is already in <team_memory> in your context. The `chars`
|
||||||
and `limit` attributes show current usage and the maximum allowed size.
|
and `limit` attributes show current usage and the maximum allowed size.
|
||||||
- This is the team's curated long-term memory — decisions, conventions, key facts.
|
- This is the team's curated long-term memory — decisions, conventions, key facts.
|
||||||
- Note: The system automatically extracts memorizable team information from every
|
- You are the sole mechanism for persisting team memory — there is no background extraction.
|
||||||
conversation in the background. Use this tool primarily for:
|
Call update_memory when:
|
||||||
* Explicit requests: "let's remember that", "note this decision", "forget X"
|
* A team member explicitly asks to remember or forget something
|
||||||
* Restructuring or reorganizing the team memory document
|
* The conversation surfaces durable team decisions, conventions, or facts
|
||||||
* Correcting outdated or wrong entries
|
that will matter in future conversations
|
||||||
- Skip truly ephemeral info (one-off questions, greetings, session logistics).
|
- Do not store short-lived or ephemeral info: one-off questions, greetings,
|
||||||
|
session logistics, or things that only matter for the current task.
|
||||||
- Args:
|
- Args:
|
||||||
- updated_memory: The FULL updated markdown document (not a diff).
|
- updated_memory: The FULL updated markdown document (not a diff).
|
||||||
Merge new facts with existing ones, update contradictions, remove outdated entries.
|
Merge new facts with existing ones, update contradictions, remove outdated entries.
|
||||||
Treat every update as a curation pass — consolidate, don't just append.
|
Treat every update as a curation pass — consolidate, don't just append.
|
||||||
Include inline dates (YYYY-MM) on decisions and time-sensitive entries.
|
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
|
||||||
- Keep it concise and well under the character limit shown in <team_memory>.
|
- Keep it concise and well under the character limit shown in <team_memory>.
|
||||||
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
|
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
|
||||||
## Team decisions (pinned) — agreed-upon choices with rationale and date
|
## Team decisions (pinned) — agreed-upon choices with rationale
|
||||||
## Conventions (pinned) — coding standards, tools, processes, naming patterns
|
## Conventions (pinned) — coding standards, tools, processes, naming patterns
|
||||||
## Key facts — where things are, how things work, team structure
|
## Key facts — where things are, how things work, team structure
|
||||||
## Current priorities — active projects, deadlines, blockers
|
## Current priorities — active projects, deadlines, blockers
|
||||||
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
|
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
|
||||||
- Each time-sensitive entry MUST include a (YYYY-MM) date suffix.
|
|
||||||
- Sections with `(pinned)` in the heading are protected — the system will reject any
|
- Sections with `(pinned)` in the heading are protected — the system will reject any
|
||||||
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
|
update that removes them. Users can add `(pinned)` to any `##` heading to protect it.
|
||||||
- During consolidation, prioritize keeping: pinned sections > key facts > current priorities.
|
- During consolidation, prioritize keeping: pinned sections > key facts > current priorities.
|
||||||
|
|
@ -317,28 +327,26 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
|
||||||
_MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
|
_MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
|
||||||
"update_memory": {
|
"update_memory": {
|
||||||
"private": """
|
"private": """
|
||||||
- <user_memory persisted="false"> contains "## About the user (pinned)\\n- Name: Alex"
|
- <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
|
||||||
User: "I'm a university student, explain astrophage to me"
|
- The user casually shared a durable fact about themselves. Save it:
|
||||||
- Memory is not yet persisted AND the user casually shared that they are a student.
|
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n")
|
||||||
You MUST call update_memory to persist the seed plus the new fact:
|
- User: "Remember that I prefer concise answers over detailed explanations"
|
||||||
update_memory(updated_memory="## About the user (pinned)\\n- Name: Alex\\n- University student\\n")
|
- Durable preference. You see the current <user_memory> and merge:
|
||||||
- User: "Remember that I prefer TypeScript over JavaScript"
|
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
|
||||||
- Timeless preference, no date needed. You see the current <user_memory> and merge:
|
- User: "I actually moved to Tokyo last month"
|
||||||
update_memory(updated_memory="## About the user (pinned)\\n- Senior developer\\n\\n## Preferences\\n- Prefers TypeScript over JavaScript\\n...")
|
- Updated fact, date prefix reflects when recorded:
|
||||||
- User: "I actually moved to Google last month"
|
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
|
||||||
- Fact that changes over time, include date:
|
- User: "I'm a freelance photographer working on a nature documentary"
|
||||||
update_memory(updated_memory="## About the user (pinned)\\n- Senior developer at Google (since 2026-03, previously Acme Corp)\\n...")
|
- Durable background info. Save it under About the user:
|
||||||
- User: "I'm building a SaaS app with Next.js and Supabase"
|
update_memory(updated_memory="## About the user (pinned)\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
|
||||||
- Implicit project info shared as context. Save it:
|
|
||||||
update_memory(updated_memory="## About the user (pinned)\\n- Name: Alex\\n\\n## Current context\\n- Building a SaaS app with Next.js and Supabase (2026-04)\\n")
|
|
||||||
""",
|
""",
|
||||||
"shared": """
|
"shared": """
|
||||||
- User: "Let's remember that we decided to use GraphQL"
|
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
|
||||||
- Decision with date:
|
- Durable team decision:
|
||||||
update_memory(updated_memory="## Team decisions (pinned)\\n- 2026-04: Adopted GraphQL over REST for new APIs\\n...")
|
update_memory(updated_memory="## Team decisions (pinned)\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
|
||||||
- User: "Our deploy process uses Railway auto-deploys"
|
- User: "Our office is in downtown Seattle, 5th floor"
|
||||||
- Key fact, no date needed:
|
- Durable team fact:
|
||||||
update_memory(updated_memory="## Key facts\\n- Deploy pipeline: git push -> Railway auto-deploys in ~3min\\n...")
|
update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...")
|
||||||
""",
|
""",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,10 @@ always sees the current memory in <user_memory> / <team_memory> tags injected
|
||||||
by MemoryInjectionMiddleware, so it passes the FULL updated document each time.
|
by MemoryInjectionMiddleware, so it passes the FULL updated document each time.
|
||||||
|
|
||||||
Overflow handling:
|
Overflow handling:
|
||||||
- Soft limit (15K chars): advisory warning returned alongside a successful save.
|
- Soft limit (18K chars): an automatic LLM-driven consolidation is attempted
|
||||||
- Hard limit (25K chars): save rejected; an automatic LLM-driven consolidation
|
to proactively keep memory lean. The save always succeeds.
|
||||||
is attempted before falling back to the error.
|
- Hard limit (25K chars): save rejected if memory still exceeds this after
|
||||||
|
consolidation.
|
||||||
- Pinned sections: headings containing ``(pinned)`` are protected — the system
|
- Pinned sections: headings containing ``(pinned)`` are protected — the system
|
||||||
rejects any update that drops them and auto-restores them during consolidation.
|
rejects any update that drops them and auto-restores them during consolidation.
|
||||||
- Diff validation: warns when entire ``##`` sections are dropped or when the
|
- Diff validation: warns when entire ``##`` sections are dropped or when the
|
||||||
|
|
@ -31,7 +32,7 @@ from app.db import SearchSpace, User
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
MEMORY_SOFT_LIMIT = 15_000
|
MEMORY_SOFT_LIMIT = 18_000
|
||||||
MEMORY_HARD_LIMIT = 25_000
|
MEMORY_HARD_LIMIT = 25_000
|
||||||
|
|
||||||
_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE)
|
_PINNED_RE = re.compile(r"^##\s+.+\(pinned\)", re.MULTILINE)
|
||||||
|
|
@ -188,7 +189,7 @@ RULES:
|
||||||
preferences > current context.
|
preferences > current context.
|
||||||
5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
|
5. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
|
||||||
6. Each entry must be a single bullet point.
|
6. Each entry must be a single bullet point.
|
||||||
7. Preserve (YYYY-MM) date suffixes on time-sensitive entries.
|
7. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
|
||||||
8. Output ONLY the consolidated markdown — no explanations, no wrapping.
|
8. Output ONLY the consolidated markdown — no explanations, no wrapping.
|
||||||
|
|
||||||
<memory_document>
|
<memory_document>
|
||||||
|
|
@ -259,25 +260,19 @@ async def _save_memory(
|
||||||
if pinned_err:
|
if pinned_err:
|
||||||
return {"status": "error", "message": pinned_err}
|
return {"status": "error", "message": pinned_err}
|
||||||
|
|
||||||
# --- hard-limit gate with auto-consolidation fallback ---
|
# --- auto-consolidate proactively at the soft limit ---
|
||||||
|
if len(content) > MEMORY_SOFT_LIMIT and llm is not None:
|
||||||
|
consolidated = await _auto_consolidate(content, llm)
|
||||||
|
if consolidated is not None:
|
||||||
|
if old_memory:
|
||||||
|
consolidated = _restore_missing_pinned(old_memory, consolidated)
|
||||||
|
if len(consolidated) < len(content):
|
||||||
|
content = consolidated
|
||||||
|
|
||||||
|
# --- hard-limit gate (reject if still too large after consolidation) ---
|
||||||
size_err = _validate_memory_size(content)
|
size_err = _validate_memory_size(content)
|
||||||
if size_err:
|
if size_err:
|
||||||
if llm is None:
|
return size_err
|
||||||
return size_err
|
|
||||||
|
|
||||||
consolidated = await _auto_consolidate(content, llm)
|
|
||||||
if consolidated is None:
|
|
||||||
return size_err
|
|
||||||
|
|
||||||
# Restore any pinned sections the consolidation LLM may have dropped
|
|
||||||
if old_memory:
|
|
||||||
consolidated = _restore_missing_pinned(old_memory, consolidated)
|
|
||||||
|
|
||||||
recheck = _validate_memory_size(consolidated)
|
|
||||||
if recheck:
|
|
||||||
return recheck
|
|
||||||
|
|
||||||
content = consolidated
|
|
||||||
|
|
||||||
# --- persist ---
|
# --- persist ---
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ from app.agents.new_chat.llm_config import (
|
||||||
load_agent_config,
|
load_agent_config,
|
||||||
load_llm_config_from_yaml,
|
load_llm_config_from_yaml,
|
||||||
)
|
)
|
||||||
from app.agents.new_chat.memory_extraction import extract_and_save_memory
|
|
||||||
from app.db import (
|
from app.db import (
|
||||||
ChatVisibility,
|
ChatVisibility,
|
||||||
NewChatMessage,
|
NewChatMessage,
|
||||||
|
|
@ -60,8 +59,6 @@ from app.utils.perf import get_perf_logger, log_system_snapshot, trim_native_hea
|
||||||
|
|
||||||
_perf_log = get_perf_logger()
|
_perf_log = get_perf_logger()
|
||||||
|
|
||||||
_background_tasks: set[asyncio.Task] = set()
|
|
||||||
|
|
||||||
|
|
||||||
def format_mentioned_surfsense_docs_as_context(
|
def format_mentioned_surfsense_docs_as_context(
|
||||||
documents: list[SurfsenseDocsDocument],
|
documents: list[SurfsenseDocsDocument],
|
||||||
|
|
@ -1525,19 +1522,6 @@ async def stream_new_chat(
|
||||||
yield streaming_service.format_done()
|
yield streaming_service.format_done()
|
||||||
return
|
return
|
||||||
|
|
||||||
if user_id and llm is not None:
|
|
||||||
_mem_task = asyncio.create_task(
|
|
||||||
extract_and_save_memory(
|
|
||||||
user_message=user_query,
|
|
||||||
user_id=user_id,
|
|
||||||
search_space_id=search_space_id,
|
|
||||||
thread_visibility=visibility,
|
|
||||||
llm=llm,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
_background_tasks.add(_mem_task)
|
|
||||||
_mem_task.add_done_callback(_background_tasks.discard)
|
|
||||||
|
|
||||||
# If the title task didn't finish during streaming, await it now
|
# If the title task didn't finish during streaming, await it now
|
||||||
if title_task is not None and not title_emitted:
|
if title_task is not None and not title_emitted:
|
||||||
generated_title = await title_task
|
generated_title = await title_task
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ export function MemoryContent() {
|
||||||
onClick={handleClear}
|
onClick={handleClear}
|
||||||
disabled={saving || !savedMemory}
|
disabled={saving || !savedMemory}
|
||||||
>
|
>
|
||||||
Clear All
|
Reset Memory
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
type="button"
|
type="button"
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
|
||||||
onClick={handleClear}
|
onClick={handleClear}
|
||||||
disabled={saving || !searchSpace?.shared_memory_md}
|
disabled={saving || !searchSpace?.shared_memory_md}
|
||||||
>
|
>
|
||||||
Clear All
|
Clear Memory
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
type="button"
|
type="button"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue