refactor: remove memory extraction functions and related components from the new chat agent

This commit is contained in:
Anish Sarkar 2026-05-20 14:03:28 +05:30
parent a0ff86e0e8
commit 132e7b3c44
12 changed files with 2 additions and 375 deletions

View file

@ -1,78 +0,0 @@
"""Background memory extraction for the SurfSense agent."""
from __future__ import annotations
import logging
from typing import Any
from uuid import UUID
from app.db import User, shielded_async_session
from app.services.memory import MemoryScope, extract_and_save
logger = logging.getLogger(__name__)
async def extract_and_save_memory(
*,
user_message: str,
user_id: str | None,
llm: Any,
) -> None:
"""Fire-and-forget personal memory extraction.
The service uses structured output, so free-form ``NO_UPDATE`` text can no
longer be accidentally persisted as memory.
"""
if not user_id:
return
try:
uid = UUID(user_id) if isinstance(user_id, str) else user_id
async with shielded_async_session() as session:
user = await session.get(User, uid)
actor_display_name = user.display_name if user else None
result = await extract_and_save(
scope=MemoryScope.USER,
target_id=uid,
user_message=user_message,
actor_display_name=actor_display_name,
session=session,
llm=llm,
)
logger.info(
"Background memory extraction for user %s: %s",
uid,
result.status,
)
except Exception:
logger.exception("Background user memory extraction failed")
async def extract_and_save_team_memory(
*,
user_message: str,
search_space_id: int | None,
llm: Any,
author_display_name: str | None = None,
) -> None:
"""Fire-and-forget team-level memory extraction."""
if not search_space_id:
return
try:
async with shielded_async_session() as session:
result = await extract_and_save(
scope=MemoryScope.TEAM,
target_id=search_space_id,
user_message=user_message,
actor_display_name=author_display_name,
session=session,
llm=llm,
)
logger.info(
"Background team memory extraction for space %s: %s",
search_space_id,
result.status,
)
except Exception:
logger.exception("Background team memory extraction failed")

View file

@ -4,7 +4,6 @@ from .schemas import MemoryLimits, MemoryRead
from .service import ( from .service import (
MemoryScope, MemoryScope,
SaveResult, SaveResult,
extract_and_save,
memory_limits, memory_limits,
read_memory, read_memory,
reset_memory, reset_memory,
@ -24,7 +23,6 @@ __all__ = [
"MemoryRead", "MemoryRead",
"MemoryScope", "MemoryScope",
"SaveResult", "SaveResult",
"extract_and_save",
"memory_limits", "memory_limits",
"read_memory", "read_memory",
"reset_memory", "reset_memory",

View file

@ -18,93 +18,3 @@ RULES:
<memory_document> <memory_document>
{content} {content}
</memory_document>""" </memory_document>"""
USER_MEMORY_EXTRACT_PROMPT = """\
You are a memory extraction assistant. Analyze the user's message and decide \
if it contains any long-term information worth persisting to personal memory.
Worth remembering: preferences, background/identity, goals, projects, \
instructions, tools/languages they use, decisions, expertise, workplace \
durable facts that will matter in future conversations.
NOT worth remembering: greetings, one-off factual questions, session \
logistics, ephemeral requests, follow-up clarifications with no new personal \
info, things that only matter for the current task.
If there is nothing durable to remember, choose `action = no_update`.
If the message contains memorizable information, choose `action = save` and \
return the FULL updated memory document with the new information merged into \
existing content.
FORMAT RULES FOR `updated_memory`:
- Markdown only.
- Every entry should be under a `##` heading.
- Recommended headings: `## Facts`, `## Preferences`, `## Instructions`.
- New bullets should use: `- YYYY-MM-DD: memory text`.
- If current memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers,
preserve the information but write the updated document in the new
heading-based format.
- Use the user's first name from `<user_name>` when helpful, not "the user".
- Do not duplicate existing information.
<user_name>{user_name}</user_name>
<current_memory>
{current_memory}
</current_memory>
<user_message>
{user_message}
</user_message>"""
TEAM_MEMORY_EXTRACT_PROMPT = """\
You are a team-memory extraction assistant. Analyze the latest message and \
decide if it contains durable TEAM-level information worth persisting.
Decision policy:
- Prioritize recall for durable team context, while avoiding personal-only facts.
- Do NOT require explicit consensus language. A direct team-level statement can
be stored if it is stable and broadly useful for future team chats.
- If evidence is weak or clearly tentative, choose `action = no_update`.
Worth remembering (team-level only):
- Decisions and defaults that guide future team work
- Team conventions/standards (naming, review policy, coding norms)
- Stable org/project facts (locations, ownership, constraints)
- Long-lived architecture/process facts
- Ongoing priorities that are likely relevant beyond this turn
NOT worth remembering:
- Personal preferences or biography of one person
- Questions, brainstorming, tentative ideas, or speculation
- One-off requests, status updates, TODOs, logistics for this session
- Information scoped only to a single ephemeral task
If the message contains memorizable team information, choose `action = save` \
and return the FULL updated team memory document with new facts merged into \
existing content.
FORMAT RULES FOR `updated_memory`:
- Markdown only.
- Every entry should be under a `##` heading.
- Recommended headings: `## Product Decisions`, `## Engineering Conventions`,
`## Project Facts`, `## Open Questions`.
- New bullets should use: `- YYYY-MM-DD: memory text`.
- If current memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the
information but write the updated document in the new heading-based format.
- Do not create personal headings such as `## Preferences`, `## Instructions`,
or `## Personal Notes`.
- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored.
<current_team_memory>
{current_memory}
</current_team_memory>
<latest_message_author>
{author}
</latest_message_author>
<latest_message>
{user_message}
</latest_message>"""

View file

@ -2,9 +2,7 @@
from __future__ import annotations from __future__ import annotations
from typing import Literal from pydantic import BaseModel
from pydantic import BaseModel, Field
class MemoryLimits(BaseModel): class MemoryLimits(BaseModel):
@ -19,19 +17,3 @@ class MemoryRead(BaseModel):
memory_md: str memory_md: str
limits: MemoryLimits limits: MemoryLimits
class MemoryExtractionDecision(BaseModel):
"""Structured extraction result; avoids string sentinel parsing."""
action: Literal["no_update", "save"] = Field(
description="Choose no_update when nothing durable should be saved; choose save otherwise."
)
reason: str | None = Field(
default=None,
description="Short reason for no_update, or brief summary of the memory update.",
)
updated_memory: str | None = Field(
default=None,
description="The full updated markdown memory document when action is save.",
)

View file

@ -8,18 +8,13 @@ from enum import StrEnum
from typing import Any, Literal from typing import Any, Literal
from uuid import UUID from uuid import UUID
from langchain_core.messages import HumanMessage
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SearchSpace, User from app.db import SearchSpace, User
from app.services.memory.document import parse_memory_document, render_memory_document from app.services.memory.document import parse_memory_document, render_memory_document
from app.services.memory.prompts import (
TEAM_MEMORY_EXTRACT_PROMPT,
USER_MEMORY_EXTRACT_PROMPT,
)
from app.services.memory.rewrite import forced_rewrite from app.services.memory.rewrite import forced_rewrite
from app.services.memory.schemas import MemoryExtractionDecision, MemoryLimits from app.services.memory.schemas import MemoryLimits
from app.services.memory.validation import ( from app.services.memory.validation import (
MEMORY_HARD_LIMIT, MEMORY_HARD_LIMIT,
MEMORY_SOFT_LIMIT, MEMORY_SOFT_LIMIT,
@ -234,74 +229,3 @@ async def reset_memory(
session=session, session=session,
llm=None, llm=None,
) )
async def extract_and_save(
*,
scope: MemoryScope | str,
target_id: str | int | UUID,
user_message: str,
actor_display_name: str | None,
session: AsyncSession,
llm: Any,
) -> SaveResult:
normalized = _normalize_scope(scope)
current_memory = await read_memory(
scope=normalized,
target_id=target_id,
session=session,
)
if normalized is MemoryScope.USER:
first_name = (
actor_display_name.strip().split()[0]
if actor_display_name and actor_display_name.strip()
else "The user"
)
prompt = USER_MEMORY_EXTRACT_PROMPT.format(
current_memory=current_memory or "(empty)",
user_message=user_message,
user_name=first_name,
)
else:
prompt = TEAM_MEMORY_EXTRACT_PROMPT.format(
current_memory=current_memory or "(empty)",
author=actor_display_name or "Unknown team member",
user_message=user_message,
)
try:
structured = llm.with_structured_output(MemoryExtractionDecision)
decision = await structured.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal", "memory-extraction"]},
)
except Exception:
logger.exception("Structured memory extraction failed")
return SaveResult(
status="error",
message="Structured memory extraction failed.",
memory_md=current_memory,
)
if decision.action == "no_update":
return SaveResult(
status="no_op",
message=decision.reason or "No durable memory to persist.",
memory_md=current_memory,
)
if not decision.updated_memory:
return SaveResult(
status="error",
message="Structured memory extraction chose save without updated_memory.",
memory_md=current_memory,
)
return await save_memory(
scope=normalized,
target_id=target_id,
content=decision.updated_memory,
session=session,
llm=llm,
)

View file

@ -39,10 +39,6 @@ from app.agents.new_chat.llm_config import (
load_agent_config, load_agent_config,
load_global_llm_config_by_id, load_global_llm_config_by_id,
) )
from app.agents.new_chat.memory_extraction import (
extract_and_save_memory,
extract_and_save_team_memory,
)
from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
from app.agents.new_chat.middleware.busy_mutex import ( from app.agents.new_chat.middleware.busy_mutex import (
end_turn, end_turn,
@ -283,7 +279,6 @@ class StreamResult:
accumulated_text: str = "" accumulated_text: str = ""
is_interrupted: bool = False is_interrupted: bool = False
sandbox_files: list[str] = field(default_factory=list) sandbox_files: list[str] = field(default_factory=list)
agent_called_update_memory: bool = False
request_id: str | None = None request_id: str | None = None
turn_id: str = "" turn_id: str = ""
filesystem_mode: str = "cloud" filesystem_mode: str = "cloud"
@ -2208,36 +2203,6 @@ async def stream_new_chat(
}, },
) )
# Fire background memory extraction if the agent didn't handle it.
# Shared threads write to team memory; private threads write to user memory.
if not stream_result.agent_called_update_memory:
memory_seed = user_query.strip() or (
f"[{len(user_image_data_urls or [])} image(s)]"
if user_image_data_urls
else "(message)"
)
if visibility == ChatVisibility.SEARCH_SPACE:
task = asyncio.create_task(
extract_and_save_team_memory(
user_message=memory_seed,
search_space_id=search_space_id,
llm=llm,
author_display_name=current_user_display_name,
)
)
_background_tasks.add(task)
task.add_done_callback(_background_tasks.discard)
elif user_id:
task = asyncio.create_task(
extract_and_save_memory(
user_message=memory_seed,
user_id=user_id,
llm=llm,
)
)
_background_tasks.add(task)
task.add_done_callback(_background_tasks.discard)
# Finish the step and message # Finish the step and message
yield streaming_service.format_data("turn-status", {"status": "idle"}) yield streaming_service.format_data("turn-status", {"status": "idle"})
yield streaming_service.format_finish_step() yield streaming_service.format_finish_step()

View file

@ -48,4 +48,3 @@ async def stream_output(
yield frame yield frame
result.accumulated_text = state.accumulated_text result.accumulated_text = state.accumulated_text
result.agent_called_update_memory = state.called_update_memory

View file

@ -11,7 +11,6 @@ class StreamingResult:
accumulated_text: str = "" accumulated_text: str = ""
is_interrupted: bool = False is_interrupted: bool = False
sandbox_files: list[str] = field(default_factory=list) sandbox_files: list[str] = field(default_factory=list)
agent_called_update_memory: bool = False
request_id: str | None = None request_id: str | None = None
turn_id: str = "" turn_id: str = ""
filesystem_mode: str = "cloud" filesystem_mode: str = "cloud"

View file

@ -36,9 +36,6 @@ def iter_tool_end_frames(
raw_output = event.get("data", {}).get("output", "") raw_output = event.get("data", {}).get("output", "")
staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None
if tool_name == "update_memory":
state.called_update_memory = True
if hasattr(raw_output, "content"): if hasattr(raw_output, "content"):
content = raw_output.content content = raw_output.content
if isinstance(content, str): if isinstance(content, str):

View file

@ -32,7 +32,6 @@ class AgentEventRelayState:
last_active_step_items: list[str] = field(default_factory=list) last_active_step_items: list[str] = field(default_factory=list)
just_finished_tool: bool = False just_finished_tool: bool = False
active_tool_depth: int = 0 active_tool_depth: int = 0
called_update_memory: bool = False
current_reasoning_id: str | None = None current_reasoning_id: str | None = None
pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict) lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict)

View file

@ -6,11 +6,9 @@ import pytest
from app.services.memory import ( from app.services.memory import (
MemoryScope, MemoryScope,
extract_and_save,
reset_memory, reset_memory,
save_memory, save_memory,
) )
from app.services.memory.schemas import MemoryExtractionDecision
pytestmark = pytest.mark.unit pytestmark = pytest.mark.unit
@ -31,17 +29,6 @@ class _FakeSession:
self.rollback_calls += 1 self.rollback_calls += 1
class _StructuredLLM:
def __init__(self, decision: MemoryExtractionDecision) -> None:
self.decision = decision
def with_structured_output(self, _schema):
return self
async def ainvoke(self, *_args, **_kwargs):
return self.decision
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None: async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None:
target = SimpleNamespace(memory_md="") target = SimpleNamespace(memory_md="")
@ -150,57 +137,3 @@ async def test_reset_memory_clears_memory(monkeypatch) -> None:
assert result.status == "saved" assert result.status == "saved"
assert target.memory_md == "" assert target.memory_md == ""
@pytest.mark.asyncio
async def test_extract_and_save_no_update_does_not_commit(monkeypatch) -> None:
target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n")
session = _FakeSession()
async def fake_load_target(**_kwargs):
return target
monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target)
result = await extract_and_save(
scope=MemoryScope.USER,
target_id="00000000-0000-0000-0000-000000000000",
user_message="hello",
actor_display_name="Anish",
session=session,
llm=_StructuredLLM(
MemoryExtractionDecision(action="no_update", reason="Greeting only")
),
)
assert result.status == "no_op"
assert session.commit_calls == 0
@pytest.mark.asyncio
async def test_extract_and_save_persists_structured_update(monkeypatch) -> None:
target = SimpleNamespace(memory_md="")
session = _FakeSession()
async def fake_load_target(**_kwargs):
return target
monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target)
result = await extract_and_save(
scope=MemoryScope.USER,
target_id="00000000-0000-0000-0000-000000000000",
user_message="I work on SurfSense",
actor_display_name="Anish",
session=session,
llm=_StructuredLLM(
MemoryExtractionDecision(
action="save",
updated_memory="## Facts\n- 2026-05-19: Anish works on SurfSense\n",
)
),
)
assert result.status == "saved"
assert "SurfSense" in target.memory_md
assert session.commit_calls == 1

View file

@ -89,7 +89,6 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:
"text_end:text-1", "text_end:text-1",
] ]
assert result.accumulated_text == "Hello world" assert result.accumulated_text == "Hello world"
assert result.agent_called_update_memory is False
async def test_stream_output_passes_runtime_context_to_agent() -> None: async def test_stream_output_passes_runtime_context_to_agent() -> None: