refactor: remove memory extraction functions and related components from the new chat agent

2026-05-27 19:25:15 +02:00 · 2026-05-20 14:03:28 +05:30 · 2026-05-20 14:03:28 +05:30 · 132e7b3c44
commit 132e7b3c44
parent a0ff86e0e8
12 changed files with 2 additions and 375 deletions
--- a/surfsense_backend/app/agents/new_chat/memory_extraction.py
+++ b/surfsense_backend/app/agents/new_chat/memory_extraction.py
@ -1,78 +0,0 @@
 """Background memory extraction for the SurfSense agent."""
 from __future__ import annotations
 import logging
 from typing import Any
 from uuid import UUID
 from app.db import User, shielded_async_session
 from app.services.memory import MemoryScope, extract_and_save
 logger = logging.getLogger(__name__)
 async def extract_and_save_memory(
    *,
    user_message: str,
    user_id: str | None,
    llm: Any,
 ) -> None:
    """Fire-and-forget personal memory extraction.
    The service uses structured output, so free-form ``NO_UPDATE`` text can no
    longer be accidentally persisted as memory.
    """
    if not user_id:
        return
    try:
        uid = UUID(user_id) if isinstance(user_id, str) else user_id
        async with shielded_async_session() as session:
            user = await session.get(User, uid)
            actor_display_name = user.display_name if user else None
            result = await extract_and_save(
                scope=MemoryScope.USER,
                target_id=uid,
                user_message=user_message,
                actor_display_name=actor_display_name,
                session=session,
                llm=llm,
            )
            logger.info(
                "Background memory extraction for user %s: %s",
                uid,
                result.status,
            )
    except Exception:
        logger.exception("Background user memory extraction failed")
 async def extract_and_save_team_memory(
    *,
    user_message: str,
    search_space_id: int | None,
    llm: Any,
    author_display_name: str | None = None,
 ) -> None:
    """Fire-and-forget team-level memory extraction."""
    if not search_space_id:
        return
    try:
        async with shielded_async_session() as session:
            result = await extract_and_save(
                scope=MemoryScope.TEAM,
                target_id=search_space_id,
                user_message=user_message,
                actor_display_name=author_display_name,
                session=session,
                llm=llm,
            )
            logger.info(
                "Background team memory extraction for space %s: %s",
                search_space_id,
                result.status,
            )
    except Exception:
        logger.exception("Background team memory extraction failed")
--- a/surfsense_backend/app/services/memory/init.py
+++ b/surfsense_backend/app/services/memory/init.py
@ -4,7 +4,6 @@ from .schemas import MemoryLimits, MemoryRead
 from .service import (
    MemoryScope,
    SaveResult,
    extract_and_save,
    memory_limits,
    read_memory,
    reset_memory,
@ -24,7 +23,6 @@ __all__ = [
    "MemoryRead",
    "MemoryScope",
    "SaveResult",
    "extract_and_save",
    "memory_limits",
    "read_memory",
    "reset_memory",
--- a/surfsense_backend/app/services/memory/prompts.py
+++ b/surfsense_backend/app/services/memory/prompts.py
@ -18,93 +18,3 @@ RULES:
 <memory_document>
 {content}
 </memory_document>"""
 USER_MEMORY_EXTRACT_PROMPT = """\
 You are a memory extraction assistant. Analyze the user's message and decide \
 if it contains any long-term information worth persisting to personal memory.
 Worth remembering: preferences, background/identity, goals, projects, \
 instructions, tools/languages they use, decisions, expertise, workplace — \
 durable facts that will matter in future conversations.
 NOT worth remembering: greetings, one-off factual questions, session \
 logistics, ephemeral requests, follow-up clarifications with no new personal \
 info, things that only matter for the current task.
 If there is nothing durable to remember, choose `action = no_update`.
 If the message contains memorizable information, choose `action = save` and \
 return the FULL updated memory document with the new information merged into \
 existing content.
 FORMAT RULES FOR `updated_memory`:
 - Markdown only.
 - Every entry should be under a `##` heading.
 - Recommended headings: `## Facts`, `## Preferences`, `## Instructions`.
 - New bullets should use: `- YYYY-MM-DD: memory text`.
 - If current memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers,
  preserve the information but write the updated document in the new
  heading-based format.
 - Use the user's first name from `<user_name>` when helpful, not "the user".
 - Do not duplicate existing information.
 <user_name>{user_name}</user_name>
 <current_memory>
 {current_memory}
 </current_memory>
 <user_message>
 {user_message}
 </user_message>"""
 TEAM_MEMORY_EXTRACT_PROMPT = """\
 You are a team-memory extraction assistant. Analyze the latest message and \
 decide if it contains durable TEAM-level information worth persisting.
 Decision policy:
 - Prioritize recall for durable team context, while avoiding personal-only facts.
 - Do NOT require explicit consensus language. A direct team-level statement can
  be stored if it is stable and broadly useful for future team chats.
 - If evidence is weak or clearly tentative, choose `action = no_update`.
 Worth remembering (team-level only):
 - Decisions and defaults that guide future team work
 - Team conventions/standards (naming, review policy, coding norms)
 - Stable org/project facts (locations, ownership, constraints)
 - Long-lived architecture/process facts
 - Ongoing priorities that are likely relevant beyond this turn
 NOT worth remembering:
 - Personal preferences or biography of one person
 - Questions, brainstorming, tentative ideas, or speculation
 - One-off requests, status updates, TODOs, logistics for this session
 - Information scoped only to a single ephemeral task
 If the message contains memorizable team information, choose `action = save` \
 and return the FULL updated team memory document with new facts merged into \
 existing content.
 FORMAT RULES FOR `updated_memory`:
 - Markdown only.
 - Every entry should be under a `##` heading.
 - Recommended headings: `## Product Decisions`, `## Engineering Conventions`,
  `## Project Facts`, `## Open Questions`.
 - New bullets should use: `- YYYY-MM-DD: memory text`.
 - If current memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the
  information but write the updated document in the new heading-based format.
 - Do not create personal headings such as `## Preferences`, `## Instructions`,
  or `## Personal Notes`.
 - Preserve neutral team phrasing; avoid person-specific memory unless role-anchored.
 <current_team_memory>
 {current_memory}
 </current_team_memory>
 <latest_message_author>
 {author}
 </latest_message_author>
 <latest_message>
 {user_message}
 </latest_message>"""
--- a/surfsense_backend/app/services/memory/schemas.py
+++ b/surfsense_backend/app/services/memory/schemas.py
@ -2,9 +2,7 @@
 from __future__ import annotations
-from typing import Literal
+from pydantic import BaseModel
 from pydantic import BaseModel, Field
 class MemoryLimits(BaseModel):
@ -19,19 +17,3 @@ class MemoryRead(BaseModel):
    memory_md: str
    limits: MemoryLimits
 class MemoryExtractionDecision(BaseModel):
    """Structured extraction result; avoids string sentinel parsing."""
    action: Literal["no_update", "save"] = Field(
        description="Choose no_update when nothing durable should be saved; choose save otherwise."
    )
    reason: str | None = Field(
        default=None,
        description="Short reason for no_update, or brief summary of the memory update.",
    )
    updated_memory: str | None = Field(
        default=None,
        description="The full updated markdown memory document when action is save.",
    )
--- a/surfsense_backend/app/services/memory/service.py
+++ b/surfsense_backend/app/services/memory/service.py
@ -8,18 +8,13 @@ from enum import StrEnum
 from typing import Any, Literal
 from uuid import UUID
 from langchain_core.messages import HumanMessage
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.db import SearchSpace, User
 from app.services.memory.document import parse_memory_document, render_memory_document
 from app.services.memory.prompts import (
    TEAM_MEMORY_EXTRACT_PROMPT,
    USER_MEMORY_EXTRACT_PROMPT,
 )
 from app.services.memory.rewrite import forced_rewrite
-from app.services.memory.schemas import MemoryExtractionDecision, MemoryLimits
+from app.services.memory.schemas import MemoryLimits
 from app.services.memory.validation import (
    MEMORY_HARD_LIMIT,
    MEMORY_SOFT_LIMIT,
@ -234,74 +229,3 @@ async def reset_memory(
        session=session,
        llm=None,
    )
 async def extract_and_save(
    *,
    scope: MemoryScope | str,
    target_id: str | int | UUID,
    user_message: str,
    actor_display_name: str | None,
    session: AsyncSession,
    llm: Any,
 ) -> SaveResult:
    normalized = _normalize_scope(scope)
    current_memory = await read_memory(
        scope=normalized,
        target_id=target_id,
        session=session,
    )
    if normalized is MemoryScope.USER:
        first_name = (
            actor_display_name.strip().split()[0]
            if actor_display_name and actor_display_name.strip()
            else "The user"
        )
        prompt = USER_MEMORY_EXTRACT_PROMPT.format(
            current_memory=current_memory or "(empty)",
            user_message=user_message,
            user_name=first_name,
        )
    else:
        prompt = TEAM_MEMORY_EXTRACT_PROMPT.format(
            current_memory=current_memory or "(empty)",
            author=actor_display_name or "Unknown team member",
            user_message=user_message,
        )
    try:
        structured = llm.with_structured_output(MemoryExtractionDecision)
        decision = await structured.ainvoke(
            [HumanMessage(content=prompt)],
            config={"tags": ["surfsense:internal", "memory-extraction"]},
        )
    except Exception:
        logger.exception("Structured memory extraction failed")
        return SaveResult(
            status="error",
            message="Structured memory extraction failed.",
            memory_md=current_memory,
        )
    if decision.action == "no_update":
        return SaveResult(
            status="no_op",
            message=decision.reason or "No durable memory to persist.",
            memory_md=current_memory,
        )
    if not decision.updated_memory:
        return SaveResult(
            status="error",
            message="Structured memory extraction chose save without updated_memory.",
            memory_md=current_memory,
        )
    return await save_memory(
        scope=normalized,
        target_id=target_id,
        content=decision.updated_memory,
        session=session,
        llm=llm,
    )
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -39,10 +39,6 @@ from app.agents.new_chat.llm_config import (
    load_agent_config,
    load_global_llm_config_by_id,
 )
 from app.agents.new_chat.memory_extraction import (
    extract_and_save_memory,
    extract_and_save_team_memory,
 )
 from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text
 from app.agents.new_chat.middleware.busy_mutex import (
    end_turn,
@ -283,7 +279,6 @@ class StreamResult:
    accumulated_text: str = ""
    is_interrupted: bool = False
    sandbox_files: list[str] = field(default_factory=list)
    agent_called_update_memory: bool = False
    request_id: str | None = None
    turn_id: str = ""
    filesystem_mode: str = "cloud"
@ -2208,36 +2203,6 @@ async def stream_new_chat(
                },
            )
        # Fire background memory extraction if the agent didn't handle it.
        # Shared threads write to team memory; private threads write to user memory.
        if not stream_result.agent_called_update_memory:
            memory_seed = user_query.strip() or (
                f"[{len(user_image_data_urls or [])} image(s)]"
                if user_image_data_urls
                else "(message)"
            )
            if visibility == ChatVisibility.SEARCH_SPACE:
                task = asyncio.create_task(
                    extract_and_save_team_memory(
                        user_message=memory_seed,
                        search_space_id=search_space_id,
                        llm=llm,
                        author_display_name=current_user_display_name,
                    )
                )
                _background_tasks.add(task)
                task.add_done_callback(_background_tasks.discard)
            elif user_id:
                task = asyncio.create_task(
                    extract_and_save_memory(
                        user_message=memory_seed,
                        user_id=user_id,
                        llm=llm,
                    )
                )
                _background_tasks.add(task)
                task.add_done_callback(_background_tasks.discard)
        # Finish the step and message
        yield streaming_service.format_data("turn-status", {"status": "idle"})
        yield streaming_service.format_finish_step()
--- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py
@ -48,4 +48,3 @@ async def stream_output(
        yield frame
    result.accumulated_text = state.accumulated_text
    result.agent_called_update_memory = state.called_update_memory
--- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
+++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py
@ -11,7 +11,6 @@ class StreamingResult:
    accumulated_text: str = ""
    is_interrupted: bool = False
    sandbox_files: list[str] = field(default_factory=list)
    agent_called_update_memory: bool = False
    request_id: str | None = None
    turn_id: str = ""
    filesystem_mode: str = "cloud"
--- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
+++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py
@ -36,9 +36,6 @@ def iter_tool_end_frames(
    raw_output = event.get("data", {}).get("output", "")
    staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None
    if tool_name == "update_memory":
        state.called_update_memory = True
    if hasattr(raw_output, "content"):
        content = raw_output.content
        if isinstance(content, str):
--- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py
+++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py
@ -32,7 +32,6 @@ class AgentEventRelayState:
    last_active_step_items: list[str] = field(default_factory=list)
    just_finished_tool: bool = False
    active_tool_depth: int = 0
    called_update_memory: bool = False
    current_reasoning_id: str | None = None
    pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list)
    lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict)
--- a/surfsense_backend/tests/unit/services/test_memory_service.py
+++ b/surfsense_backend/tests/unit/services/test_memory_service.py
@ -6,11 +6,9 @@ import pytest
 from app.services.memory import (
    MemoryScope,
    extract_and_save,
    reset_memory,
    save_memory,
 )
 from app.services.memory.schemas import MemoryExtractionDecision
 pytestmark = pytest.mark.unit
@ -31,17 +29,6 @@ class _FakeSession:
        self.rollback_calls += 1
 class _StructuredLLM:
    def __init__(self, decision: MemoryExtractionDecision) -> None:
        self.decision = decision
    def with_structured_output(self, _schema):
        return self
    async def ainvoke(self, *_args, **_kwargs):
        return self.decision
@pytest.mark.asyncio
 async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None:
    target = SimpleNamespace(memory_md="")
@ -150,57 +137,3 @@ async def test_reset_memory_clears_memory(monkeypatch) -> None:
    assert result.status == "saved"
    assert target.memory_md == ""
@pytest.mark.asyncio
 async def test_extract_and_save_no_update_does_not_commit(monkeypatch) -> None:
    target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n")
    session = _FakeSession()
    async def fake_load_target(**_kwargs):
        return target
    monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target)
    result = await extract_and_save(
        scope=MemoryScope.USER,
        target_id="00000000-0000-0000-0000-000000000000",
        user_message="hello",
        actor_display_name="Anish",
        session=session,
        llm=_StructuredLLM(
            MemoryExtractionDecision(action="no_update", reason="Greeting only")
        ),
    )
    assert result.status == "no_op"
    assert session.commit_calls == 0
@pytest.mark.asyncio
 async def test_extract_and_save_persists_structured_update(monkeypatch) -> None:
    target = SimpleNamespace(memory_md="")
    session = _FakeSession()
    async def fake_load_target(**_kwargs):
        return target
    monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target)
    result = await extract_and_save(
        scope=MemoryScope.USER,
        target_id="00000000-0000-0000-0000-000000000000",
        user_message="I work on SurfSense",
        actor_display_name="Anish",
        session=session,
        llm=_StructuredLLM(
            MemoryExtractionDecision(
                action="save",
                updated_memory="## Facts\n- 2026-05-19: Anish works on SurfSense\n",
            )
        ),
    )
    assert result.status == "saved"
    assert "SurfSense" in target.memory_md
    assert session.commit_calls == 1
--- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
+++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py
@ -89,7 +89,6 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None:
        "text_end:text-1",
    ]
    assert result.accumulated_text == "Hello world"
    assert result.agent_called_update_memory is False
 async def test_stream_output_passes_runtime_context_to_agent() -> None: