Merge remote-tracking branch 'upstream/dev' into improvement-agent-speed

Resolves: surfsense_backend/app/agents/new_chat/middleware/memory_injection.py
- Took both imports: upstream moved MEMORY_HARD_LIMIT/SOFT_LIMIT to
  app.services.memory; kept our perf-logger import for timing.

Pulls in upstream changes:
- Memory document feature (services/memory refactor, removal of
  app.agents.new_chat.memory_extraction and background extraction in
  stream_new_chat — agent now drives memory via update_memory tool).
- BACKEND_URL env refactor across web tool-ui/editor/chat/dashboard/lib.
- GitHub Actions backend test workflow + pre-commit biome bump.
- Token-display polish in MessageInfoDropdown; save_memory no-update
  sentinel.

Verified: 1723 unit tests pass, ruff clean. No semantic regression in
stream_new_chat (their memory-extraction deletion and our preflight
removal touch different functions).
This commit is contained in:
CREDO23 2026-05-20 21:23:48 +02:00
commit 49da7a57df
79 changed files with 1992 additions and 2296 deletions

View file

@ -0,0 +1,32 @@
"""First-class memory service for user and team markdown memory."""
from .schemas import MemoryLimits, MemoryRead
from .service import (
MemoryScope,
SaveResult,
memory_limits,
read_memory,
reset_memory,
save_memory,
)
from .validation import (
MEMORY_HARD_LIMIT,
MEMORY_SOFT_LIMIT,
validate_bullet_format,
validate_memory_scope,
)
__all__ = [
"MEMORY_HARD_LIMIT",
"MEMORY_SOFT_LIMIT",
"MemoryLimits",
"MemoryRead",
"MemoryScope",
"SaveResult",
"memory_limits",
"read_memory",
"reset_memory",
"save_memory",
"validate_bullet_format",
"validate_memory_scope",
]

View file

@ -0,0 +1,200 @@
"""Memory-specific markdown document model and canonical renderer.
This intentionally parses only SurfSense memory's small markdown contract:
``##`` sections with dated bullet items. Unknown lines are preserved so user
edits are not lost, while legacy marker bullets are normalized on render.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import date
DEFAULT_LEGACY_SECTION = "Memory"
LEGACY_MARKERS = frozenset({"fact", "pref", "instr"})
@dataclass(frozen=True)
class MemoryBullet:
entry_date: date
text: str
@dataclass(frozen=True)
class MemoryRawLine:
text: str
MemoryLine = MemoryBullet | MemoryRawLine
@dataclass(frozen=True)
class MemorySection:
heading: str
lines: list[MemoryLine] = field(default_factory=list)
explicit_heading: bool = True
@dataclass(frozen=True)
class MemoryDocument:
sections: list[MemorySection] = field(default_factory=list)
@property
def has_explicit_heading(self) -> bool:
return any(section.explicit_heading for section in self.sections)
def is_section_heading(line: str) -> bool:
return line.startswith("## ") and bool(line[3:].strip())
def heading_text(line: str) -> str:
return line[3:].strip()
def normalize_heading(heading: str) -> str:
chars: list[str] = []
previous_was_space = True
for char in heading.strip().lower():
if char.isalnum():
chars.append(char)
previous_was_space = False
elif not previous_was_space:
chars.append(" ")
previous_was_space = True
return "".join(chars).strip()
def parse_bullet_line(line: str) -> MemoryBullet | None:
stripped = line.strip()
if not stripped.startswith("- "):
return None
body = stripped[2:]
parsed = _parse_canonical_bullet(body)
if parsed is not None:
return parsed
return _parse_legacy_bullet(body)
def _parse_canonical_bullet(body: str) -> MemoryBullet | None:
if len(body) < 13 or body[10:12] != ": ":
return None
try:
entry_date = date.fromisoformat(body[:10])
except ValueError:
return None
text = body[12:].strip()
if not text:
return None
return MemoryBullet(entry_date=entry_date, text=text)
def _parse_legacy_bullet(body: str) -> MemoryBullet | None:
if len(body) < 20 or not body.startswith("("):
return None
if len(body) < 14 or body[11:14] != ") [":
return None
try:
entry_date = date.fromisoformat(body[1:11])
except ValueError:
return None
marker_end = body.find("] ", 14)
if marker_end == -1:
return None
marker = body[14:marker_end]
if marker not in LEGACY_MARKERS:
return None
text = body[marker_end + 2 :].strip()
if not text:
return None
return MemoryBullet(entry_date=entry_date, text=text)
def parse_memory_document(content: str | None) -> MemoryDocument:
if not content:
return MemoryDocument()
sections: list[MemorySection] = []
current_heading: str | None = None
current_explicit = True
current_lines: list[MemoryLine] = []
def flush_current() -> None:
nonlocal current_heading, current_explicit, current_lines
if current_heading is None:
return
sections.append(
MemorySection(
heading=current_heading,
lines=current_lines,
explicit_heading=current_explicit,
)
)
current_heading = None
current_explicit = True
current_lines = []
for raw_line in content.strip().splitlines():
line = raw_line.rstrip()
if is_section_heading(line):
flush_current()
current_heading = heading_text(line)
current_explicit = True
current_lines = []
continue
bullet = parse_bullet_line(line)
if current_heading is None:
if bullet is None:
continue
current_heading = DEFAULT_LEGACY_SECTION
current_explicit = False
current_lines = [bullet]
continue
current_lines.append(bullet if bullet is not None else MemoryRawLine(text=line))
flush_current()
return MemoryDocument(sections=sections)
def render_memory_document(document: MemoryDocument) -> str:
rendered_sections: list[str] = []
for section in document.sections:
section_lines = [f"## {section.heading}"]
for line in section.lines:
if isinstance(line, MemoryBullet):
section_lines.append(f"- {line.entry_date.isoformat()}: {line.text}")
else:
section_lines.append(line.text)
rendered_sections.append("\n".join(section_lines).strip())
return "\n\n".join(section for section in rendered_sections if section).strip()
def extract_headings(memory: str | None) -> set[str]:
document = parse_memory_document(memory)
return {
normalize_heading(section.heading)
for section in document.sections
if section.explicit_heading
}
def has_explicit_heading(content: str) -> bool:
return parse_memory_document(content).has_explicit_heading
def nonstandard_bullets(content: str) -> list[str]:
warnings: list[str] = []
for line in content.splitlines():
stripped = line.strip()
if not stripped.startswith("- "):
continue
if parse_bullet_line(stripped) is not None:
continue
short = stripped[:80] + ("..." if len(stripped) > 80 else "")
warnings.append(f"Non-standard memory bullet: {short}")
return warnings

View file

@ -0,0 +1,20 @@
"""Prompts used by the memory service."""
FORCED_REWRITE_PROMPT = """\
You are a memory curator. The following memory document exceeds the character \
limit and must be shortened.
RULES:
1. Rewrite the document to be under {target} characters.
2. Output Markdown only. Use clear `##` headings and concise bullet points.
3. New-format bullets should look like: `- YYYY-MM-DD: memory text`.
4. If the input contains legacy markers like `(YYYY-MM-DD) [fact]`, preserve the
information but remove the inline marker in the output.
5. Preserve durable instructions and preferences before generic facts when
compressing personal memory.
6. Preserve existing headings when useful; merge duplicate headings and bullets.
7. Output ONLY the consolidated markdown no explanations, no wrapping.
<memory_document>
{content}
</memory_document>"""

View file

@ -0,0 +1,35 @@
"""LLM-backed memory rewrite helpers."""
from __future__ import annotations
import logging
from typing import Any
from langchain_core.messages import HumanMessage
from app.services.memory.prompts import FORCED_REWRITE_PROMPT
from app.services.memory.validation import MEMORY_HARD_LIMIT
from app.utils.content_utils import extract_text_content
logger = logging.getLogger(__name__)
async def forced_rewrite(content: str, llm: Any) -> str | None:
"""Use a focused LLM call to compress memory under the hard limit."""
try:
prompt = FORCED_REWRITE_PROMPT.format(
target=MEMORY_HARD_LIMIT,
content=content,
)
response = await llm.ainvoke(
[HumanMessage(content=prompt)],
config={"tags": ["surfsense:internal", "memory-rewrite"]},
)
text = extract_text_content(response.content).strip()
if not text:
logger.warning("Forced memory rewrite returned empty text")
return None
return text
except Exception:
logger.exception("Forced memory rewrite LLM call failed")
return None

View file

@ -0,0 +1,19 @@
"""Schemas for memory API responses and structured extraction."""
from __future__ import annotations
from pydantic import BaseModel
class MemoryLimits(BaseModel):
"""Canonical memory size limits exposed to clients."""
soft: int
hard: int
class MemoryRead(BaseModel):
"""Memory document payload returned by user and team memory APIs."""
memory_md: str
limits: MemoryLimits

View file

@ -0,0 +1,247 @@
"""Canonical read/write/reset/extract service for markdown memory."""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any, Literal
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import SearchSpace, User
from app.services.memory.document import parse_memory_document, render_memory_document
from app.services.memory.rewrite import forced_rewrite
from app.services.memory.schemas import MemoryLimits
from app.services.memory.validation import (
MEMORY_HARD_LIMIT,
MEMORY_SOFT_LIMIT,
soft_limit_warning,
strip_preamble_to_first_heading,
validate_bullet_format,
validate_diff,
validate_heading_sanity,
validate_memory_scope,
validate_memory_size,
)
logger = logging.getLogger(__name__)
_NO_UPDATE_SENTINELS = frozenset(
{
"NO_UPDATE",
"NO UPDATE",
"NO_CHANGE",
"NO CHANGE",
}
)
class MemoryScope(StrEnum):
USER = "user"
TEAM = "team"
@dataclass(frozen=True)
class SaveResult:
status: Literal["saved", "error", "no_op"]
message: str
memory_md: str = ""
warnings: list[str] = field(default_factory=list)
diff_warnings: list[str] = field(default_factory=list)
format_warnings: list[str] = field(default_factory=list)
notice: str | None = None
def to_dict(self) -> dict[str, Any]:
data: dict[str, Any] = {
"status": self.status,
"message": self.message,
"memory_md": self.memory_md,
}
if self.notice:
data["notice"] = self.notice
if self.warnings:
data["warnings"] = self.warnings
if len(self.warnings) == 1:
data["warning"] = self.warnings[0]
if self.diff_warnings:
data["diff_warnings"] = self.diff_warnings
if self.format_warnings:
data["format_warnings"] = self.format_warnings
return data
def memory_limits() -> MemoryLimits:
return MemoryLimits(soft=MEMORY_SOFT_LIMIT, hard=MEMORY_HARD_LIMIT)
def _normalize_scope(scope: MemoryScope | str) -> MemoryScope:
return scope if isinstance(scope, MemoryScope) else MemoryScope(scope)
def _normalize_user_id(target_id: str | UUID) -> UUID:
return UUID(target_id) if isinstance(target_id, str) else target_id
async def _load_target(
*,
scope: MemoryScope | str,
target_id: str | int | UUID,
session: AsyncSession,
) -> User | SearchSpace | None:
normalized = _normalize_scope(scope)
if normalized is MemoryScope.USER:
result = await session.execute(
select(User).where(User.id == _normalize_user_id(target_id)) # type: ignore[arg-type]
)
return result.scalars().first()
result = await session.execute(
select(SearchSpace).where(SearchSpace.id == int(target_id))
)
return result.scalars().first()
def _get_memory(target: User | SearchSpace, scope: MemoryScope) -> str:
if scope is MemoryScope.USER:
return getattr(target, "memory_md", None) or ""
return getattr(target, "shared_memory_md", None) or ""
def _set_memory(target: User | SearchSpace, scope: MemoryScope, content: str) -> None:
if scope is MemoryScope.USER:
target.memory_md = content
else:
target.shared_memory_md = content
async def read_memory(
*,
scope: MemoryScope | str,
target_id: str | int | UUID,
session: AsyncSession,
) -> str:
normalized = _normalize_scope(scope)
target = await _load_target(scope=normalized, target_id=target_id, session=session)
if target is None:
return ""
return _get_memory(target, normalized)
async def save_memory(
*,
scope: MemoryScope | str,
target_id: str | int | UUID,
content: str,
session: AsyncSession,
llm: Any | None = None,
) -> SaveResult:
normalized = _normalize_scope(scope)
if not isinstance(content, str):
return SaveResult(
status="error",
message="Internal error: memory payload must be a string.",
)
target = await _load_target(scope=normalized, target_id=target_id, session=session)
if target is None:
return SaveResult(
status="error",
message="User not found."
if normalized is MemoryScope.USER
else "Search space not found.",
)
old_memory = _get_memory(target, normalized)
next_content = strip_preamble_to_first_heading(content.strip())
notice: str | None = None
warnings: list[str] = []
if next_content.upper() in _NO_UPDATE_SENTINELS:
return SaveResult(
status="no_op",
message="No memory update requested.",
memory_md=old_memory,
)
if len(next_content) > MEMORY_HARD_LIMIT and llm is not None:
rewritten = await forced_rewrite(next_content, llm)
if rewritten is not None and len(rewritten) < len(next_content):
next_content = strip_preamble_to_first_heading(rewritten)
notice = "Memory was automatically rewritten to fit within limits."
for validation in (
validate_memory_size(next_content),
validate_heading_sanity(next_content),
):
if validation:
return SaveResult(
status="error",
message=validation["message"],
memory_md=old_memory,
)
scope_error, scope_warnings = validate_memory_scope(
next_content,
normalized.value,
old_memory=old_memory,
)
warnings.extend(scope_warnings)
if scope_error:
return SaveResult(
status="error",
message=scope_error["message"],
memory_md=old_memory,
warnings=warnings,
)
next_content = render_memory_document(parse_memory_document(next_content))
try:
_set_memory(target, normalized, next_content)
session.add(target)
await session.commit()
except Exception as e:
logger.exception("Failed to update %s memory: %s", normalized.value, e)
await session.rollback()
return SaveResult(
status="error",
message=f"Failed to update {normalized.value} memory: {e}",
memory_md=old_memory,
)
diff_warnings = validate_diff(old_memory, next_content)
format_warnings = validate_bullet_format(next_content)
warning = soft_limit_warning(next_content)
if warning:
warnings.append(warning)
return SaveResult(
status="saved",
message=(
"Memory updated."
if normalized is MemoryScope.USER
else "Team memory updated."
),
memory_md=next_content,
warnings=warnings,
diff_warnings=diff_warnings,
format_warnings=format_warnings,
notice=notice,
)
async def reset_memory(
*,
scope: MemoryScope | str,
target_id: str | int | UUID,
session: AsyncSession,
) -> SaveResult:
return await save_memory(
scope=scope,
target_id=target_id,
content="",
session=session,
llm=None,
)

View file

@ -0,0 +1,140 @@
"""Validation helpers for markdown-backed memory."""
from __future__ import annotations
from typing import Literal
from app.services.memory.document import (
extract_headings,
has_explicit_heading,
nonstandard_bullets,
parse_memory_document,
)
MEMORY_SOFT_LIMIT = 18_000
MEMORY_HARD_LIMIT = 25_000
_FORBIDDEN_TEAM_HEADINGS = {
"preferences",
"instructions",
"personal notes",
"personal instructions",
}
def has_markdown_heading(content: str) -> bool:
return has_explicit_heading(content)
def strip_preamble_to_first_heading(content: str) -> str:
"""Drop model preamble before the first ``##`` heading, if one exists."""
lines = content.splitlines()
for index, line in enumerate(lines):
if line.startswith("## ") and line[3:].strip():
return "\n".join(lines[index:]).strip()
return content.strip()
def validate_memory_size(content: str) -> dict[str, str] | None:
length = len(content)
if length > MEMORY_HARD_LIMIT:
return {
"status": "error",
"message": (
f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit "
f"({length:,} chars). Consolidate by merging related items, "
"removing outdated entries, and shortening descriptions."
),
}
return None
def validate_heading_sanity(content: str) -> dict[str, str] | None:
"""Block long prose blobs without headings unless they are legacy bullets."""
stripped = content.strip()
if not stripped:
return None
if has_markdown_heading(stripped):
return None
if len(stripped) <= 40:
return None
if parse_memory_document(stripped).sections:
return None
return {
"status": "error",
"message": "Memory must be markdown with at least one ## heading.",
}
def validate_memory_scope(
content: str,
scope: Literal["user", "team"],
*,
old_memory: str | None = None,
) -> tuple[dict[str, str] | None, list[str]]:
"""Reject new personal headings in team memory, grandfather existing ones."""
if scope != "team":
return None, []
old_forbidden = extract_headings(old_memory) & _FORBIDDEN_TEAM_HEADINGS
new_forbidden = extract_headings(content) & _FORBIDDEN_TEAM_HEADINGS
introduced = sorted(new_forbidden - old_forbidden)
grandfathered = sorted(new_forbidden & old_forbidden)
warnings: list[str] = []
if grandfathered:
warnings.append(
"Team memory contains legacy personal headings: "
+ ", ".join(grandfathered)
+ ". Please consolidate them into team-safe headings."
)
if introduced:
return (
{
"status": "error",
"message": (
"Team memory cannot introduce personal headings: "
+ ", ".join(introduced)
+ ". Use team-safe headings instead."
),
},
warnings,
)
return None, warnings
def validate_bullet_format(content: str) -> list[str]:
return nonstandard_bullets(content)
def validate_diff(old_memory: str | None, new_memory: str) -> list[str]:
if not old_memory:
return []
warnings: list[str] = []
old_headings = extract_headings(old_memory)
new_headings = extract_headings(new_memory)
dropped = old_headings - new_headings
if dropped:
names = ", ".join(sorted(dropped))
warnings.append(
f"Sections removed: {names}. If unintentional, restore them from the memory document."
)
old_len = len(old_memory)
new_len = len(new_memory)
if old_len > 0 and new_len < old_len * 0.4:
warnings.append(
f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). Possible data loss."
)
return warnings
def soft_limit_warning(content: str) -> str | None:
length = len(content)
if length > MEMORY_SOFT_LIMIT:
return (
f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. "
"Consolidate by merging related items and removing less important entries."
)
return None