refactor: remove memory extraction functions and related components from the new chat agent

2026-07-12 22:42:13 +02:00 · 2026-05-20 14:03:28 +05:30 · 2026-05-20 14:03:28 +05:30 · 132e7b3c44
commit 132e7b3c44
parent a0ff86e0e8
12 changed files with 2 additions and 375 deletions
--- a/surfsense_backend/app/agents/new_chat/memory_extraction.py
+++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py
@ -1,78 +0,0 @@
-"""Background memory extraction for the SurfSense agent."""
-
-from __future__ import annotations
-
-import logging
-from typing import Any
-from uuid import UUID
-
-from app.db import User, shielded_async_session
-from app.services.memory import MemoryScope, extract_and_save
-
-logger = logging.getLogger(__name__)
-
-
-async def extract_and_save_memory(
-    *,
-    user_message: str,
-    user_id: str | None,
-    llm: Any,
-) -> None:
-    """Fire-and-forget personal memory extraction.
-
-    The service uses structured output, so free-form ``NO_UPDATE`` text can no
-    longer be accidentally persisted as memory.
-    """
-    if not user_id:
-        return
-
-    try:
-        uid = UUID(user_id) if isinstance(user_id, str) else user_id
-        async with shielded_async_session() as session:
-            user = await session.get(User, uid)
-            actor_display_name = user.display_name if user else None
-            result = await extract_and_save(
-                scope=MemoryScope.USER,
-                target_id=uid,
-                user_message=user_message,
-                actor_display_name=actor_display_name,
-                session=session,
-                llm=llm,
-            )
-            logger.info(
-                "Background memory extraction for user %s: %s",
-                uid,
-                result.status,
-            )
-    except Exception:
-        logger.exception("Background user memory extraction failed")
-
-
-async def extract_and_save_team_memory(
-    *,
-    user_message: str,
-    search_space_id: int | None,
-    llm: Any,
-    author_display_name: str | None = None,
-) -> None:
-    """Fire-and-forget team-level memory extraction."""
-    if not search_space_id:
-        return
-
-    try:
-        async with shielded_async_session() as session:
-            result = await extract_and_save(
-                scope=MemoryScope.TEAM,
-                target_id=search_space_id,
-                user_message=user_message,
-                actor_display_name=author_display_name,
-                session=session,
-                llm=llm,
-            )
-            logger.info(
-                "Background team memory extraction for space %s: %s",
-                search_space_id,
-                result.status,
-            )
-    except Exception:
-        logger.exception("Background team memory extraction failed")
--- a/surfsense_backend/app/services/memory/init.py
+++ b/surfsense_backend/app/services/memory/init.py
@ -4,7 +4,6 @@ from .schemas import MemoryLimits, MemoryRead
 from .service import (
    MemoryScope,
    SaveResult,
-    extract_and_save,
    memory_limits,
    read_memory,
    reset_memory,
@ -24,7 +23,6 @@ __all__ = [
    "MemoryRead",
    "MemoryScope",
    "SaveResult",
-    "extract_and_save",
    "memory_limits",
    "read_memory",
    "reset_memory",
--- a/surfsense_backend/app/services/memory/prompts.py
+++ b/surfsense_backend/app/services/memory/prompts.py
@ -18,93 +18,3 @@ RULES:
 <memory_document>
 {content}
 </memory_document>"""
-
-USER_MEMORY_EXTRACT_PROMPT = """\
-You are a memory extraction assistant. Analyze the user's message and decide \
-if it contains any long-term information worth persisting to personal memory.
-
-Worth remembering: preferences, background/identity, goals, projects, \
-instructions, tools/languages they use, decisions, expertise, workplace — \
-durable facts that will matter in future conversations.
-
-NOT worth remembering: greetings, one-off factual questions, session \
-logistics, ephemeral requests, follow-up clarifications with no new personal \
-info, things that only matter for the current task.
-
-If there is nothing durable to remember, choose `action = no_update`.
-
-If the message contains memorizable information, choose `action = save` and \
-return the FULL updated memory document with the new information merged into \
-existing content.
-
-FORMAT RULES FOR `updated_memory`:
- Markdown only.
- Every entry should be under a `##` heading.
- Recommended headings: `## Facts`, `## Preferences`, `## Instructions`.
- New bullets should use: `- YYYY-MM-DD: memory text`.
- If current memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers,
-  preserve the information but write the updated document in the new
-  heading-based format.
- Use the user's first name from `<user_name>` when helpful, not "the user".
- Do not duplicate existing information.
-
-<user_name>{user_name}</user_name>
-
-<current_memory>
-{current_memory}
-</current_memory>
-
-<user_message>
-{user_message}
-</user_message>"""
-
-TEAM_MEMORY_EXTRACT_PROMPT = """\
-You are a team-memory extraction assistant. Analyze the latest message and \
-decide if it contains durable TEAM-level information worth persisting.
-
-Decision policy:
- Prioritize recall for durable team context, while avoiding personal-only facts.
- Do NOT require explicit consensus language. A direct team-level statement can
-  be stored if it is stable and broadly useful for future team chats.
- If evidence is weak or clearly tentative, choose `action = no_update`.
-
-Worth remembering (team-level only):
- Decisions and defaults that guide future team work
- Team conventions/standards (naming, review policy, coding norms)
- Stable org/project facts (locations, ownership, constraints)
- Long-lived architecture/process facts
- Ongoing priorities that are likely relevant beyond this turn
-
-NOT worth remembering:
- Personal preferences or biography of one person
- Questions, brainstorming, tentative ideas, or speculation
- One-off requests, status updates, TODOs, logistics for this session
- Information scoped only to a single ephemeral task
-
-If the message contains memorizable team information, choose `action = save` \
-and return the FULL updated team memory document with new facts merged into \
-existing content.
-
-FORMAT RULES FOR `updated_memory`:
- Markdown only.
- Every entry should be under a `##` heading.
- Recommended headings: `## Product Decisions`, `## Engineering Conventions`,
-  `## Project Facts`, `## Open Questions`.
- New bullets should use: `- YYYY-MM-DD: memory text`.
- If current memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the
-  information but write the updated document in the new heading-based format.
- Do not create personal headings such as `## Preferences`, `## Instructions`,
-  or `## Personal Notes`.
- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored.
-
-<current_team_memory>
-{current_memory}
-</current_team_memory>
-
-<latest_message_author>
-{author}
-</latest_message_author>
-
-<latest_message>
-{user_message}
-</latest_message>"""
--- a/surfsense_backend/app/services/memory/schemas.py
+++ b/surfsense_backend/app/services/memory/schemas.py
@ -2,9 +2,7 @@

 from __future__ import annotations

-from typing import Literal
-
-from pydantic import BaseModel, Field
+from pydantic import BaseModel


 class MemoryLimits(BaseModel):
@ -19,19 +17,3 @@ class MemoryRead(BaseModel):

    memory_md: str
    limits: MemoryLimits
-
-
-class MemoryExtractionDecision(BaseModel):
-    """Structured extraction result; avoids string sentinel parsing."""
-
-    action: Literal["no_update", "save"] = Field(
-        description="Choose no_update when nothing durable should be saved; choose save otherwise."
-    )
-    reason: str | None = Field(
-        default=None,
-        description="Short reason for no_update, or brief summary of the memory update.",
-    )
-    updated_memory: str | None = Field(
-        default=None,
-        description="The full updated markdown memory document when action is save.",
-    )
--- a/surfsense_backend/app/services/memory/service.py
+++ b/surfsense_backend/app/services/memory/service.py
@ -8,18 +8,13 @@ from enum import StrEnum
 from typing import Any, Literal
 from uuid import UUID

-from langchain_core.messages import HumanMessage
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import SearchSpace, User
 from app.services.memory.document import parse_memory_document, render_memory_document
-from app.services.memory.prompts import (
-    TEAM_MEMORY_EXTRACT_PROMPT,
-    USER_MEMORY_EXTRACT_PROMPT,
-)
 from app.services.memory.rewrite import forced_rewrite
-from app.services.memory.schemas import MemoryExtractionDecision, MemoryLimits
+from app.services.memory.schemas import MemoryLimits
 from app.services.memory.validation import (
    MEMORY_HARD_LIMIT,
    MEMORY_SOFT_LIMIT,
@ -234,74 +229,3 @@ async def reset_memory(
        session=session,
        llm=None,
    )
-
-
-async def extract_and_save(
-    *,
-    scope: MemoryScope | str,
-    target_id: str | int | UUID,
-    user_message: str,
-    actor_display_name: str | None,
-    session: AsyncSession,
-    llm: Any,
-) -> SaveResult:
-    normalized = _normalize_scope(scope)
-    current_memory = await read_memory(
-        scope=normalized,
-        target_id=target_id,
-        session=session,
-    )
-
-    if normalized is MemoryScope.USER:
-        first_name = (
-            actor_display_name.strip().split()[0]
-            if actor_display_name and actor_display_name.strip()
-            else "The user"
-        )
-        prompt = USER_MEMORY_EXTRACT_PROMPT.format(
-            current_memory=current_memory or "(empty)",
-            user_message=user_message,
-            user_name=first_name,
-        )
-    else:
-        prompt = TEAM_MEMORY_EXTRACT_PROMPT.format(
-            current_memory=current_memory or "(empty)",
-            author=actor_display_name or "Unknown team member",
-            user_message=user_message,
-        )
-
-    try:
-        structured = llm.with_structured_output(MemoryExtractionDecision)
-        decision = await structured.ainvoke(
-            [HumanMessage(content=prompt)],
-            config={"tags": ["surfsense:internal", "memory-extraction"]},
-        )
-    except Exception:
-        logger.exception("Structured memory extraction failed")
-        return SaveResult(
-            status="error",
-            message="Structured memory extraction failed.",
-            memory_md=current_memory,
-        )
-
-    if decision.action == "no_update":
-        return SaveResult(
-            status="no_op",
-            message=decision.reason or "No durable memory to persist.",
-            memory_md=current_memory,
-        )
-
-    if not decision.updated_memory:
-        return SaveResult(
-            status="error",
-            message="Structured memory extraction chose save without updated_memory.",
-            memory_md=current_memory,
-        )
-
-    return await save_memory(
-        scope=normalized,
-        target_id=target_id,
-        content=decision.updated_memory,
-        session=session,
-        llm=llm,
-    )
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -39,10 +39,6 @@ from app.agents.new_chat.llm_config import (
    load_agent_config,
    load_global_llm_config_by_id,
 )
-from app.agents.new_chat.memory_extraction import (
-    extract_and_save_memory,
-    extract_and_save_team_memory,
-)
 from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
 from app.agents.new_chat.middleware.busy_mutex import (
    end_turn,
@ -283,7 +279,6 @@ class StreamResult:
    accumulated_text: str = ""
    is_interrupted: bool = False
    sandbox_files: list[str] = field(default_factory=list)
-    agent_called_update_memory: bool = False
    request_id: str | None = None
    turn_id: str = ""
    filesystem_mode: str = "cloud"
@ -2208,36 +2203,6 @@ async def stream_new_chat(
                },
            )

-        # Fire background memory extraction if the agent didn't handle it.
-        # Shared threads write to team memory; private threads write to user memory.
-        if not stream_result.agent_called_update_memory:
-            memory_seed = user_query.strip() or (
-                f"[{len(user_image_data_urls or [])} image(s)]"
-                if user_image_data_urls
-                else "(message)"
-            )
-            if visibility == ChatVisibility.SEARCH_SPACE:
-                task = asyncio.create_task(
-                    extract_and_save_team_memory(
-                        user_message=memory_seed,
-                        search_space_id=search_space_id,
-                        llm=llm,
-                        author_display_name=current_user_display_name,
-                    )
-                )
-                _background_tasks.add(task)
-                task.add_done_callback(_background_tasks.discard)
-            elif user_id:
-                task = asyncio.create_task(
-                    extract_and_save_memory(
-                        user_message=memory_seed,
-                        user_id=user_id,
-                        llm=llm,
-                    )
-                )
-                _background_tasks.add(task)
-                task.add_done_callback(_background_tasks.discard)
-
        # Finish the step and message
        yield streaming_service.format_data("turn-status", {"status": "idle"})
        yield streaming_service.format_finish_step()
--- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
@ -48,4 +48,3 @@ async def stream_output(
        yield frame

    result.accumulated_text = state.accumulated_text
-    result.agent_called_update_memory = state.called_update_memory
--- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
@ -11,7 +11,6 @@ class StreamingResult:
    accumulated_text: str = ""
    is_interrupted: bool = False
    sandbox_files: list[str] = field(default_factory=list)
-    agent_called_update_memory: bool = False
    request_id: str | None = None
    turn_id: str = ""
    filesystem_mode: str = "cloud"
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
@ -36,9 +36,6 @@ def iter_tool_end_frames(
    raw_output = event.get("data", {}).get("output", "")
    staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None

-    if tool_name == "update_memory":
-        state.called_update_memory = True
-
    if hasattr(raw_output, "content"):
        content = raw_output.content
        if isinstance(content, str):
--- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
@ -32,7 +32,6 @@ class AgentEventRelayState:
    last_active_step_items: list[str] = field(default_factory=list)
    just_finished_tool: bool = False
    active_tool_depth: int = 0
-    called_update_memory: bool = False
    current_reasoning_id: str | None = None
    pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
    lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict)
--- a/surfsense_backend/tests/unit/services/test_memory_service.py
+++ b/surfsense_backend/tests/unit/services/test_memory_service.py
@ -6,11 +6,9 @@ import pytest

 from app.services.memory import (
    MemoryScope,
-    extract_and_save,
    reset_memory,
    save_memory,
 )
-from app.services.memory.schemas import MemoryExtractionDecision

 pytestmark = pytest.mark.unit

@ -31,17 +29,6 @@ class _FakeSession:
        self.rollback_calls += 1


-class _StructuredLLM:
-    def __init__(self, decision: MemoryExtractionDecision) -> None:
-        self.decision = decision
-
-    def with_structured_output(self, _schema):
-        return self
-
-    async def ainvoke(self, *_args, **_kwargs):
-        return self.decision
-
-
@pytest.mark.asyncio
 async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None:
    target = SimpleNamespace(memory_md="")
@ -150,57 +137,3 @@ async def test_reset_memory_clears_memory(monkeypatch) -> None:

    assert result.status == "saved"
    assert target.memory_md == ""
-
-
-@pytest.mark.asyncio
-async def test_extract_and_save_no_update_does_not_commit(monkeypatch) -> None:
-    target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n")
-    session = _FakeSession()
-
-    async def fake_load_target(**_kwargs):
-        return target
-
-    monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target)
-
-    result = await extract_and_save(
-        scope=MemoryScope.USER,
-        target_id="00000000-0000-0000-0000-000000000000",
-        user_message="hello",
-        actor_display_name="Anish",
-        session=session,
-        llm=_StructuredLLM(
-            MemoryExtractionDecision(action="no_update", reason="Greeting only")
-        ),
-    )
-
-    assert result.status == "no_op"
-    assert session.commit_calls == 0
-
-
-@pytest.mark.asyncio
-async def test_extract_and_save_persists_structured_update(monkeypatch) -> None:
-    target = SimpleNamespace(memory_md="")
-    session = _FakeSession()
-
-    async def fake_load_target(**_kwargs):
-        return target
-
-    monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target)
-
-    result = await extract_and_save(
-        scope=MemoryScope.USER,
-        target_id="00000000-0000-0000-0000-000000000000",
-        user_message="I work on SurfSense",
-        actor_display_name="Anish",
-        session=session,
-        llm=_StructuredLLM(
-            MemoryExtractionDecision(
-                action="save",
-                updated_memory="## Facts\n- 2026-05-19: Anish works on SurfSense\n",
-            )
-        ),
-    )
-
-    assert result.status == "saved"
-    assert "SurfSense" in target.memory_md
-    assert session.commit_calls == 1
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
@ -89,7 +89,6 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:
        "text_end:text-1",
    ]
    assert result.accumulated_text == "Hello world"
-    assert result.agent_called_update_memory is False


 async def test_stream_output_passes_runtime_context_to_agent() -> None: