Merge remote-tracking branch 'upstream/dev' into improvement-agent-speed

Resolves: surfsense_backend/app/agents/new_chat/middleware/memory_injection.py - Took both imports: upstream moved MEMORY_HARD_LIMIT/SOFT_LIMIT to app.services.memory; kept our perf-logger import for timing. Pulls in upstream changes: - Memory document feature (services/memory refactor, removal of app.agents.new_chat.memory_extraction and background extraction in stream_new_chat — agent now drives memory via update_memory tool). - BACKEND_URL env refactor across web tool-ui/editor/chat/dashboard/lib. - GitHub Actions backend test workflow + pre-commit biome bump. - Token-display polish in MessageInfoDropdown; save_memory no-update sentinel. Verified: 1723 unit tests pass, ruff clean. No semantic regression in stream_new_chat (their memory-extraction deletion and our preflight removal touch different functions).
2026-05-25 19:15:18 +02:00 · 2026-05-20 21:23:48 +02:00 · 2026-05-20 21:23:48 +02:00 · 49da7a57df
commit 49da7a57df
parent d5ee8cc4cd 883ac81ce1
79 changed files with 1992 additions and 2296 deletions
--- a/surfsense_backend/app/services/memory/init.py
+++ b/surfsense_backend/app/services/memory/init.py
@ -0,0 +1,32 @@
+"""First-class memory service for user and team markdown memory."""
+
+from .schemas import MemoryLimits, MemoryRead
+from .service import (
+    MemoryScope,
+    SaveResult,
+    memory_limits,
+    read_memory,
+    reset_memory,
+    save_memory,
+)
+from .validation import (
+    MEMORY_HARD_LIMIT,
+    MEMORY_SOFT_LIMIT,
+    validate_bullet_format,
+    validate_memory_scope,
+)
+
+__all__ = [
+    "MEMORY_HARD_LIMIT",
+    "MEMORY_SOFT_LIMIT",
+    "MemoryLimits",
+    "MemoryRead",
+    "MemoryScope",
+    "SaveResult",
+    "memory_limits",
+    "read_memory",
+    "reset_memory",
+    "save_memory",
+    "validate_bullet_format",
+    "validate_memory_scope",
+]
--- a/surfsense_backend/app/services/memory/document.py
+++ b/surfsense_backend/app/services/memory/document.py
@ -0,0 +1,200 @@
+"""Memory-specific markdown document model and canonical renderer.
+
+This intentionally parses only SurfSense memory's small markdown contract:
+``##`` sections with dated bullet items. Unknown lines are preserved so user
+edits are not lost, while legacy marker bullets are normalized on render.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import date
+
+DEFAULT_LEGACY_SECTION = "Memory"
+LEGACY_MARKERS = frozenset({"fact", "pref", "instr"})
+
+
+@dataclass(frozen=True)
+class MemoryBullet:
+    entry_date: date
+    text: str
+
+
+@dataclass(frozen=True)
+class MemoryRawLine:
+    text: str
+
+
+MemoryLine = MemoryBullet | MemoryRawLine
+
+
+@dataclass(frozen=True)
+class MemorySection:
+    heading: str
+    lines: list[MemoryLine] = field(default_factory=list)
+    explicit_heading: bool = True
+
+
+@dataclass(frozen=True)
+class MemoryDocument:
+    sections: list[MemorySection] = field(default_factory=list)
+
+    @property
+    def has_explicit_heading(self) -> bool:
+        return any(section.explicit_heading for section in self.sections)
+
+
+def is_section_heading(line: str) -> bool:
+    return line.startswith("## ") and bool(line[3:].strip())
+
+
+def heading_text(line: str) -> str:
+    return line[3:].strip()
+
+
+def normalize_heading(heading: str) -> str:
+    chars: list[str] = []
+    previous_was_space = True
+    for char in heading.strip().lower():
+        if char.isalnum():
+            chars.append(char)
+            previous_was_space = False
+        elif not previous_was_space:
+            chars.append(" ")
+            previous_was_space = True
+    return "".join(chars).strip()
+
+
+def parse_bullet_line(line: str) -> MemoryBullet | None:
+    stripped = line.strip()
+    if not stripped.startswith("- "):
+        return None
+
+    body = stripped[2:]
+    parsed = _parse_canonical_bullet(body)
+    if parsed is not None:
+        return parsed
+    return _parse_legacy_bullet(body)
+
+
+def _parse_canonical_bullet(body: str) -> MemoryBullet | None:
+    if len(body) < 13 or body[10:12] != ": ":
+        return None
+    try:
+        entry_date = date.fromisoformat(body[:10])
+    except ValueError:
+        return None
+    text = body[12:].strip()
+    if not text:
+        return None
+    return MemoryBullet(entry_date=entry_date, text=text)
+
+
+def _parse_legacy_bullet(body: str) -> MemoryBullet | None:
+    if len(body) < 20 or not body.startswith("("):
+        return None
+    if len(body) < 14 or body[11:14] != ") [":
+        return None
+    try:
+        entry_date = date.fromisoformat(body[1:11])
+    except ValueError:
+        return None
+
+    marker_end = body.find("] ", 14)
+    if marker_end == -1:
+        return None
+    marker = body[14:marker_end]
+    if marker not in LEGACY_MARKERS:
+        return None
+
+    text = body[marker_end + 2 :].strip()
+    if not text:
+        return None
+    return MemoryBullet(entry_date=entry_date, text=text)
+
+
+def parse_memory_document(content: str | None) -> MemoryDocument:
+    if not content:
+        return MemoryDocument()
+
+    sections: list[MemorySection] = []
+    current_heading: str | None = None
+    current_explicit = True
+    current_lines: list[MemoryLine] = []
+
+    def flush_current() -> None:
+        nonlocal current_heading, current_explicit, current_lines
+        if current_heading is None:
+            return
+        sections.append(
+            MemorySection(
+                heading=current_heading,
+                lines=current_lines,
+                explicit_heading=current_explicit,
+            )
+        )
+        current_heading = None
+        current_explicit = True
+        current_lines = []
+
+    for raw_line in content.strip().splitlines():
+        line = raw_line.rstrip()
+        if is_section_heading(line):
+            flush_current()
+            current_heading = heading_text(line)
+            current_explicit = True
+            current_lines = []
+            continue
+
+        bullet = parse_bullet_line(line)
+        if current_heading is None:
+            if bullet is None:
+                continue
+            current_heading = DEFAULT_LEGACY_SECTION
+            current_explicit = False
+            current_lines = [bullet]
+            continue
+
+        current_lines.append(bullet if bullet is not None else MemoryRawLine(text=line))
+
+    flush_current()
+    return MemoryDocument(sections=sections)
+
+
+def render_memory_document(document: MemoryDocument) -> str:
+    rendered_sections: list[str] = []
+    for section in document.sections:
+        section_lines = [f"## {section.heading}"]
+        for line in section.lines:
+            if isinstance(line, MemoryBullet):
+                section_lines.append(f"- {line.entry_date.isoformat()}: {line.text}")
+            else:
+                section_lines.append(line.text)
+        rendered_sections.append("\n".join(section_lines).strip())
+    return "\n\n".join(section for section in rendered_sections if section).strip()
+
+
+def extract_headings(memory: str | None) -> set[str]:
+    document = parse_memory_document(memory)
+    return {
+        normalize_heading(section.heading)
+        for section in document.sections
+        if section.explicit_heading
+    }
+
+
+def has_explicit_heading(content: str) -> bool:
+    return parse_memory_document(content).has_explicit_heading
+
+
+def nonstandard_bullets(content: str) -> list[str]:
+    warnings: list[str] = []
+    for line in content.splitlines():
+        stripped = line.strip()
+        if not stripped.startswith("- "):
+            continue
+        if parse_bullet_line(stripped) is not None:
+            continue
+        short = stripped[:80] + ("..." if len(stripped) > 80 else "")
+        warnings.append(f"Non-standard memory bullet: {short}")
+    return warnings
--- a/surfsense_backend/app/services/memory/prompts.py
+++ b/surfsense_backend/app/services/memory/prompts.py
@ -0,0 +1,20 @@
+"""Prompts used by the memory service."""
+
+FORCED_REWRITE_PROMPT = """\
+You are a memory curator. The following memory document exceeds the character \
+limit and must be shortened.
+
+RULES:
+1. Rewrite the document to be under {target} characters.
+2. Output Markdown only. Use clear `##` headings and concise bullet points.
+3. New-format bullets should look like: `- YYYY-MM-DD: memory text`.
+4. If the input contains legacy markers like `(YYYY-MM-DD) [fact]`, preserve the
+   information but remove the inline marker in the output.
+5. Preserve durable instructions and preferences before generic facts when
+   compressing personal memory.
+6. Preserve existing headings when useful; merge duplicate headings and bullets.
+7. Output ONLY the consolidated markdown — no explanations, no wrapping.
+
+<memory_document>
+{content}
+</memory_document>"""
--- a/surfsense_backend/app/services/memory/rewrite.py
+++ b/surfsense_backend/app/services/memory/rewrite.py
@ -0,0 +1,35 @@
+"""LLM-backed memory rewrite helpers."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from langchain_core.messages import HumanMessage
+
+from app.services.memory.prompts import FORCED_REWRITE_PROMPT
+from app.services.memory.validation import MEMORY_HARD_LIMIT
+from app.utils.content_utils import extract_text_content
+
+logger = logging.getLogger(__name__)
+
+
+async def forced_rewrite(content: str, llm: Any) -> str | None:
+    """Use a focused LLM call to compress memory under the hard limit."""
+    try:
+        prompt = FORCED_REWRITE_PROMPT.format(
+            target=MEMORY_HARD_LIMIT,
+            content=content,
+        )
+        response = await llm.ainvoke(
+            [HumanMessage(content=prompt)],
+            config={"tags": ["surfsense:internal", "memory-rewrite"]},
+        )
+        text = extract_text_content(response.content).strip()
+        if not text:
+            logger.warning("Forced memory rewrite returned empty text")
+            return None
+        return text
+    except Exception:
+        logger.exception("Forced memory rewrite LLM call failed")
+        return None
--- a/surfsense_backend/app/services/memory/schemas.py
+++ b/surfsense_backend/app/services/memory/schemas.py
@ -0,0 +1,19 @@
+"""Schemas for memory API responses and structured extraction."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+
+class MemoryLimits(BaseModel):
+    """Canonical memory size limits exposed to clients."""
+
+    soft: int
+    hard: int
+
+
+class MemoryRead(BaseModel):
+    """Memory document payload returned by user and team memory APIs."""
+
+    memory_md: str
+    limits: MemoryLimits
--- a/surfsense_backend/app/services/memory/service.py
+++ b/surfsense_backend/app/services/memory/service.py
@ -0,0 +1,247 @@
+"""Canonical read/write/reset/extract service for markdown memory."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from enum import StrEnum
+from typing import Any, Literal
+from uuid import UUID
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import SearchSpace, User
+from app.services.memory.document import parse_memory_document, render_memory_document
+from app.services.memory.rewrite import forced_rewrite
+from app.services.memory.schemas import MemoryLimits
+from app.services.memory.validation import (
+    MEMORY_HARD_LIMIT,
+    MEMORY_SOFT_LIMIT,
+    soft_limit_warning,
+    strip_preamble_to_first_heading,
+    validate_bullet_format,
+    validate_diff,
+    validate_heading_sanity,
+    validate_memory_scope,
+    validate_memory_size,
+)
+
+logger = logging.getLogger(__name__)
+
+_NO_UPDATE_SENTINELS = frozenset(
+    {
+        "NO_UPDATE",
+        "NO UPDATE",
+        "NO_CHANGE",
+        "NO CHANGE",
+    }
+)
+
+
+class MemoryScope(StrEnum):
+    USER = "user"
+    TEAM = "team"
+
+
+@dataclass(frozen=True)
+class SaveResult:
+    status: Literal["saved", "error", "no_op"]
+    message: str
+    memory_md: str = ""
+    warnings: list[str] = field(default_factory=list)
+    diff_warnings: list[str] = field(default_factory=list)
+    format_warnings: list[str] = field(default_factory=list)
+    notice: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        data: dict[str, Any] = {
+            "status": self.status,
+            "message": self.message,
+            "memory_md": self.memory_md,
+        }
+        if self.notice:
+            data["notice"] = self.notice
+        if self.warnings:
+            data["warnings"] = self.warnings
+            if len(self.warnings) == 1:
+                data["warning"] = self.warnings[0]
+        if self.diff_warnings:
+            data["diff_warnings"] = self.diff_warnings
+        if self.format_warnings:
+            data["format_warnings"] = self.format_warnings
+        return data
+
+
+def memory_limits() -> MemoryLimits:
+    return MemoryLimits(soft=MEMORY_SOFT_LIMIT, hard=MEMORY_HARD_LIMIT)
+
+
+def _normalize_scope(scope: MemoryScope | str) -> MemoryScope:
+    return scope if isinstance(scope, MemoryScope) else MemoryScope(scope)
+
+
+def _normalize_user_id(target_id: str | UUID) -> UUID:
+    return UUID(target_id) if isinstance(target_id, str) else target_id
+
+
+async def _load_target(
+    *,
+    scope: MemoryScope | str,
+    target_id: str | int | UUID,
+    session: AsyncSession,
+) -> User | SearchSpace | None:
+    normalized = _normalize_scope(scope)
+    if normalized is MemoryScope.USER:
+        result = await session.execute(
+            select(User).where(User.id == _normalize_user_id(target_id))  # type: ignore[arg-type]
+        )
+        return result.scalars().first()
+    result = await session.execute(
+        select(SearchSpace).where(SearchSpace.id == int(target_id))
+    )
+    return result.scalars().first()
+
+
+def _get_memory(target: User | SearchSpace, scope: MemoryScope) -> str:
+    if scope is MemoryScope.USER:
+        return getattr(target, "memory_md", None) or ""
+    return getattr(target, "shared_memory_md", None) or ""
+
+
+def _set_memory(target: User | SearchSpace, scope: MemoryScope, content: str) -> None:
+    if scope is MemoryScope.USER:
+        target.memory_md = content
+    else:
+        target.shared_memory_md = content
+
+
+async def read_memory(
+    *,
+    scope: MemoryScope | str,
+    target_id: str | int | UUID,
+    session: AsyncSession,
+) -> str:
+    normalized = _normalize_scope(scope)
+    target = await _load_target(scope=normalized, target_id=target_id, session=session)
+    if target is None:
+        return ""
+    return _get_memory(target, normalized)
+
+
+async def save_memory(
+    *,
+    scope: MemoryScope | str,
+    target_id: str | int | UUID,
+    content: str,
+    session: AsyncSession,
+    llm: Any | None = None,
+) -> SaveResult:
+    normalized = _normalize_scope(scope)
+    if not isinstance(content, str):
+        return SaveResult(
+            status="error",
+            message="Internal error: memory payload must be a string.",
+        )
+
+    target = await _load_target(scope=normalized, target_id=target_id, session=session)
+    if target is None:
+        return SaveResult(
+            status="error",
+            message="User not found."
+            if normalized is MemoryScope.USER
+            else "Search space not found.",
+        )
+
+    old_memory = _get_memory(target, normalized)
+    next_content = strip_preamble_to_first_heading(content.strip())
+    notice: str | None = None
+    warnings: list[str] = []
+
+    if next_content.upper() in _NO_UPDATE_SENTINELS:
+        return SaveResult(
+            status="no_op",
+            message="No memory update requested.",
+            memory_md=old_memory,
+        )
+
+    if len(next_content) > MEMORY_HARD_LIMIT and llm is not None:
+        rewritten = await forced_rewrite(next_content, llm)
+        if rewritten is not None and len(rewritten) < len(next_content):
+            next_content = strip_preamble_to_first_heading(rewritten)
+            notice = "Memory was automatically rewritten to fit within limits."
+
+    for validation in (
+        validate_memory_size(next_content),
+        validate_heading_sanity(next_content),
+    ):
+        if validation:
+            return SaveResult(
+                status="error",
+                message=validation["message"],
+                memory_md=old_memory,
+            )
+
+    scope_error, scope_warnings = validate_memory_scope(
+        next_content,
+        normalized.value,
+        old_memory=old_memory,
+    )
+    warnings.extend(scope_warnings)
+    if scope_error:
+        return SaveResult(
+            status="error",
+            message=scope_error["message"],
+            memory_md=old_memory,
+            warnings=warnings,
+        )
+
+    next_content = render_memory_document(parse_memory_document(next_content))
+
+    try:
+        _set_memory(target, normalized, next_content)
+        session.add(target)
+        await session.commit()
+    except Exception as e:
+        logger.exception("Failed to update %s memory: %s", normalized.value, e)
+        await session.rollback()
+        return SaveResult(
+            status="error",
+            message=f"Failed to update {normalized.value} memory: {e}",
+            memory_md=old_memory,
+        )
+
+    diff_warnings = validate_diff(old_memory, next_content)
+    format_warnings = validate_bullet_format(next_content)
+    warning = soft_limit_warning(next_content)
+    if warning:
+        warnings.append(warning)
+
+    return SaveResult(
+        status="saved",
+        message=(
+            "Memory updated."
+            if normalized is MemoryScope.USER
+            else "Team memory updated."
+        ),
+        memory_md=next_content,
+        warnings=warnings,
+        diff_warnings=diff_warnings,
+        format_warnings=format_warnings,
+        notice=notice,
+    )
+
+
+async def reset_memory(
+    *,
+    scope: MemoryScope | str,
+    target_id: str | int | UUID,
+    session: AsyncSession,
+) -> SaveResult:
+    return await save_memory(
+        scope=scope,
+        target_id=target_id,
+        content="",
+        session=session,
+        llm=None,
+    )
--- a/surfsense_backend/app/services/memory/validation.py
+++ b/surfsense_backend/app/services/memory/validation.py
@ -0,0 +1,140 @@
+"""Validation helpers for markdown-backed memory."""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from app.services.memory.document import (
+    extract_headings,
+    has_explicit_heading,
+    nonstandard_bullets,
+    parse_memory_document,
+)
+
+MEMORY_SOFT_LIMIT = 18_000
+MEMORY_HARD_LIMIT = 25_000
+
+_FORBIDDEN_TEAM_HEADINGS = {
+    "preferences",
+    "instructions",
+    "personal notes",
+    "personal instructions",
+}
+
+
+def has_markdown_heading(content: str) -> bool:
+    return has_explicit_heading(content)
+
+
+def strip_preamble_to_first_heading(content: str) -> str:
+    """Drop model preamble before the first ``##`` heading, if one exists."""
+    lines = content.splitlines()
+    for index, line in enumerate(lines):
+        if line.startswith("## ") and line[3:].strip():
+            return "\n".join(lines[index:]).strip()
+    return content.strip()
+
+
+def validate_memory_size(content: str) -> dict[str, str] | None:
+    length = len(content)
+    if length > MEMORY_HARD_LIMIT:
+        return {
+            "status": "error",
+            "message": (
+                f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit "
+                f"({length:,} chars). Consolidate by merging related items, "
+                "removing outdated entries, and shortening descriptions."
+            ),
+        }
+    return None
+
+
+def validate_heading_sanity(content: str) -> dict[str, str] | None:
+    """Block long prose blobs without headings unless they are legacy bullets."""
+    stripped = content.strip()
+    if not stripped:
+        return None
+    if has_markdown_heading(stripped):
+        return None
+    if len(stripped) <= 40:
+        return None
+    if parse_memory_document(stripped).sections:
+        return None
+    return {
+        "status": "error",
+        "message": "Memory must be markdown with at least one ## heading.",
+    }
+
+
+def validate_memory_scope(
+    content: str,
+    scope: Literal["user", "team"],
+    *,
+    old_memory: str | None = None,
+) -> tuple[dict[str, str] | None, list[str]]:
+    """Reject new personal headings in team memory, grandfather existing ones."""
+    if scope != "team":
+        return None, []
+
+    old_forbidden = extract_headings(old_memory) & _FORBIDDEN_TEAM_HEADINGS
+    new_forbidden = extract_headings(content) & _FORBIDDEN_TEAM_HEADINGS
+    introduced = sorted(new_forbidden - old_forbidden)
+    grandfathered = sorted(new_forbidden & old_forbidden)
+
+    warnings: list[str] = []
+    if grandfathered:
+        warnings.append(
+            "Team memory contains legacy personal headings: "
+            + ", ".join(grandfathered)
+            + ". Please consolidate them into team-safe headings."
+        )
+    if introduced:
+        return (
+            {
+                "status": "error",
+                "message": (
+                    "Team memory cannot introduce personal headings: "
+                    + ", ".join(introduced)
+                    + ". Use team-safe headings instead."
+                ),
+            },
+            warnings,
+        )
+    return None, warnings
+
+
+def validate_bullet_format(content: str) -> list[str]:
+    return nonstandard_bullets(content)
+
+
+def validate_diff(old_memory: str | None, new_memory: str) -> list[str]:
+    if not old_memory:
+        return []
+
+    warnings: list[str] = []
+    old_headings = extract_headings(old_memory)
+    new_headings = extract_headings(new_memory)
+    dropped = old_headings - new_headings
+    if dropped:
+        names = ", ".join(sorted(dropped))
+        warnings.append(
+            f"Sections removed: {names}. If unintentional, restore them from the memory document."
+        )
+
+    old_len = len(old_memory)
+    new_len = len(new_memory)
+    if old_len > 0 and new_len < old_len * 0.4:
+        warnings.append(
+            f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). Possible data loss."
+        )
+    return warnings
+
+
+def soft_limit_warning(content: str) -> str | None:
+    length = len(content)
+    if length > MEMORY_SOFT_LIMIT:
+        return (
+            f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. "
+            "Consolidate by merging related items and removing less important entries."
+        )
+    return None