mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
Merge remote-tracking branch 'upstream/dev' into improvement-agent-speed
Resolves: surfsense_backend/app/agents/new_chat/middleware/memory_injection.py - Took both imports: upstream moved MEMORY_HARD_LIMIT/SOFT_LIMIT to app.services.memory; kept our perf-logger import for timing. Pulls in upstream changes: - Memory document feature (services/memory refactor, removal of app.agents.new_chat.memory_extraction and background extraction in stream_new_chat — agent now drives memory via update_memory tool). - BACKEND_URL env refactor across web tool-ui/editor/chat/dashboard/lib. - GitHub Actions backend test workflow + pre-commit biome bump. - Token-display polish in MessageInfoDropdown; save_memory no-update sentinel. Verified: 1723 unit tests pass, ruff clean. No semantic regression in stream_new_chat (their memory-extraction deletion and our preflight removal touch different functions).
This commit is contained in:
commit
49da7a57df
79 changed files with 1992 additions and 2296 deletions
32
surfsense_backend/app/services/memory/__init__.py
Normal file
32
surfsense_backend/app/services/memory/__init__.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
"""First-class memory service for user and team markdown memory."""
|
||||
|
||||
from .schemas import MemoryLimits, MemoryRead
|
||||
from .service import (
|
||||
MemoryScope,
|
||||
SaveResult,
|
||||
memory_limits,
|
||||
read_memory,
|
||||
reset_memory,
|
||||
save_memory,
|
||||
)
|
||||
from .validation import (
|
||||
MEMORY_HARD_LIMIT,
|
||||
MEMORY_SOFT_LIMIT,
|
||||
validate_bullet_format,
|
||||
validate_memory_scope,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"MEMORY_HARD_LIMIT",
|
||||
"MEMORY_SOFT_LIMIT",
|
||||
"MemoryLimits",
|
||||
"MemoryRead",
|
||||
"MemoryScope",
|
||||
"SaveResult",
|
||||
"memory_limits",
|
||||
"read_memory",
|
||||
"reset_memory",
|
||||
"save_memory",
|
||||
"validate_bullet_format",
|
||||
"validate_memory_scope",
|
||||
]
|
||||
200
surfsense_backend/app/services/memory/document.py
Normal file
200
surfsense_backend/app/services/memory/document.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
"""Memory-specific markdown document model and canonical renderer.
|
||||
|
||||
This intentionally parses only SurfSense memory's small markdown contract:
|
||||
``##`` sections with dated bullet items. Unknown lines are preserved so user
|
||||
edits are not lost, while legacy marker bullets are normalized on render.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date
|
||||
|
||||
DEFAULT_LEGACY_SECTION = "Memory"
|
||||
LEGACY_MARKERS = frozenset({"fact", "pref", "instr"})
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryBullet:
|
||||
entry_date: date
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryRawLine:
|
||||
text: str
|
||||
|
||||
|
||||
MemoryLine = MemoryBullet | MemoryRawLine
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemorySection:
|
||||
heading: str
|
||||
lines: list[MemoryLine] = field(default_factory=list)
|
||||
explicit_heading: bool = True
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MemoryDocument:
|
||||
sections: list[MemorySection] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def has_explicit_heading(self) -> bool:
|
||||
return any(section.explicit_heading for section in self.sections)
|
||||
|
||||
|
||||
def is_section_heading(line: str) -> bool:
|
||||
return line.startswith("## ") and bool(line[3:].strip())
|
||||
|
||||
|
||||
def heading_text(line: str) -> str:
|
||||
return line[3:].strip()
|
||||
|
||||
|
||||
def normalize_heading(heading: str) -> str:
|
||||
chars: list[str] = []
|
||||
previous_was_space = True
|
||||
for char in heading.strip().lower():
|
||||
if char.isalnum():
|
||||
chars.append(char)
|
||||
previous_was_space = False
|
||||
elif not previous_was_space:
|
||||
chars.append(" ")
|
||||
previous_was_space = True
|
||||
return "".join(chars).strip()
|
||||
|
||||
|
||||
def parse_bullet_line(line: str) -> MemoryBullet | None:
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith("- "):
|
||||
return None
|
||||
|
||||
body = stripped[2:]
|
||||
parsed = _parse_canonical_bullet(body)
|
||||
if parsed is not None:
|
||||
return parsed
|
||||
return _parse_legacy_bullet(body)
|
||||
|
||||
|
||||
def _parse_canonical_bullet(body: str) -> MemoryBullet | None:
|
||||
if len(body) < 13 or body[10:12] != ": ":
|
||||
return None
|
||||
try:
|
||||
entry_date = date.fromisoformat(body[:10])
|
||||
except ValueError:
|
||||
return None
|
||||
text = body[12:].strip()
|
||||
if not text:
|
||||
return None
|
||||
return MemoryBullet(entry_date=entry_date, text=text)
|
||||
|
||||
|
||||
def _parse_legacy_bullet(body: str) -> MemoryBullet | None:
|
||||
if len(body) < 20 or not body.startswith("("):
|
||||
return None
|
||||
if len(body) < 14 or body[11:14] != ") [":
|
||||
return None
|
||||
try:
|
||||
entry_date = date.fromisoformat(body[1:11])
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
marker_end = body.find("] ", 14)
|
||||
if marker_end == -1:
|
||||
return None
|
||||
marker = body[14:marker_end]
|
||||
if marker not in LEGACY_MARKERS:
|
||||
return None
|
||||
|
||||
text = body[marker_end + 2 :].strip()
|
||||
if not text:
|
||||
return None
|
||||
return MemoryBullet(entry_date=entry_date, text=text)
|
||||
|
||||
|
||||
def parse_memory_document(content: str | None) -> MemoryDocument:
|
||||
if not content:
|
||||
return MemoryDocument()
|
||||
|
||||
sections: list[MemorySection] = []
|
||||
current_heading: str | None = None
|
||||
current_explicit = True
|
||||
current_lines: list[MemoryLine] = []
|
||||
|
||||
def flush_current() -> None:
|
||||
nonlocal current_heading, current_explicit, current_lines
|
||||
if current_heading is None:
|
||||
return
|
||||
sections.append(
|
||||
MemorySection(
|
||||
heading=current_heading,
|
||||
lines=current_lines,
|
||||
explicit_heading=current_explicit,
|
||||
)
|
||||
)
|
||||
current_heading = None
|
||||
current_explicit = True
|
||||
current_lines = []
|
||||
|
||||
for raw_line in content.strip().splitlines():
|
||||
line = raw_line.rstrip()
|
||||
if is_section_heading(line):
|
||||
flush_current()
|
||||
current_heading = heading_text(line)
|
||||
current_explicit = True
|
||||
current_lines = []
|
||||
continue
|
||||
|
||||
bullet = parse_bullet_line(line)
|
||||
if current_heading is None:
|
||||
if bullet is None:
|
||||
continue
|
||||
current_heading = DEFAULT_LEGACY_SECTION
|
||||
current_explicit = False
|
||||
current_lines = [bullet]
|
||||
continue
|
||||
|
||||
current_lines.append(bullet if bullet is not None else MemoryRawLine(text=line))
|
||||
|
||||
flush_current()
|
||||
return MemoryDocument(sections=sections)
|
||||
|
||||
|
||||
def render_memory_document(document: MemoryDocument) -> str:
|
||||
rendered_sections: list[str] = []
|
||||
for section in document.sections:
|
||||
section_lines = [f"## {section.heading}"]
|
||||
for line in section.lines:
|
||||
if isinstance(line, MemoryBullet):
|
||||
section_lines.append(f"- {line.entry_date.isoformat()}: {line.text}")
|
||||
else:
|
||||
section_lines.append(line.text)
|
||||
rendered_sections.append("\n".join(section_lines).strip())
|
||||
return "\n\n".join(section for section in rendered_sections if section).strip()
|
||||
|
||||
|
||||
def extract_headings(memory: str | None) -> set[str]:
|
||||
document = parse_memory_document(memory)
|
||||
return {
|
||||
normalize_heading(section.heading)
|
||||
for section in document.sections
|
||||
if section.explicit_heading
|
||||
}
|
||||
|
||||
|
||||
def has_explicit_heading(content: str) -> bool:
|
||||
return parse_memory_document(content).has_explicit_heading
|
||||
|
||||
|
||||
def nonstandard_bullets(content: str) -> list[str]:
|
||||
warnings: list[str] = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith("- "):
|
||||
continue
|
||||
if parse_bullet_line(stripped) is not None:
|
||||
continue
|
||||
short = stripped[:80] + ("..." if len(stripped) > 80 else "")
|
||||
warnings.append(f"Non-standard memory bullet: {short}")
|
||||
return warnings
|
||||
20
surfsense_backend/app/services/memory/prompts.py
Normal file
20
surfsense_backend/app/services/memory/prompts.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
"""Prompts used by the memory service."""
|
||||
|
||||
FORCED_REWRITE_PROMPT = """\
|
||||
You are a memory curator. The following memory document exceeds the character \
|
||||
limit and must be shortened.
|
||||
|
||||
RULES:
|
||||
1. Rewrite the document to be under {target} characters.
|
||||
2. Output Markdown only. Use clear `##` headings and concise bullet points.
|
||||
3. New-format bullets should look like: `- YYYY-MM-DD: memory text`.
|
||||
4. If the input contains legacy markers like `(YYYY-MM-DD) [fact]`, preserve the
|
||||
information but remove the inline marker in the output.
|
||||
5. Preserve durable instructions and preferences before generic facts when
|
||||
compressing personal memory.
|
||||
6. Preserve existing headings when useful; merge duplicate headings and bullets.
|
||||
7. Output ONLY the consolidated markdown — no explanations, no wrapping.
|
||||
|
||||
<memory_document>
|
||||
{content}
|
||||
</memory_document>"""
|
||||
35
surfsense_backend/app/services/memory/rewrite.py
Normal file
35
surfsense_backend/app/services/memory/rewrite.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
"""LLM-backed memory rewrite helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from app.services.memory.prompts import FORCED_REWRITE_PROMPT
|
||||
from app.services.memory.validation import MEMORY_HARD_LIMIT
|
||||
from app.utils.content_utils import extract_text_content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def forced_rewrite(content: str, llm: Any) -> str | None:
|
||||
"""Use a focused LLM call to compress memory under the hard limit."""
|
||||
try:
|
||||
prompt = FORCED_REWRITE_PROMPT.format(
|
||||
target=MEMORY_HARD_LIMIT,
|
||||
content=content,
|
||||
)
|
||||
response = await llm.ainvoke(
|
||||
[HumanMessage(content=prompt)],
|
||||
config={"tags": ["surfsense:internal", "memory-rewrite"]},
|
||||
)
|
||||
text = extract_text_content(response.content).strip()
|
||||
if not text:
|
||||
logger.warning("Forced memory rewrite returned empty text")
|
||||
return None
|
||||
return text
|
||||
except Exception:
|
||||
logger.exception("Forced memory rewrite LLM call failed")
|
||||
return None
|
||||
19
surfsense_backend/app/services/memory/schemas.py
Normal file
19
surfsense_backend/app/services/memory/schemas.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
"""Schemas for memory API responses and structured extraction."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MemoryLimits(BaseModel):
|
||||
"""Canonical memory size limits exposed to clients."""
|
||||
|
||||
soft: int
|
||||
hard: int
|
||||
|
||||
|
||||
class MemoryRead(BaseModel):
|
||||
"""Memory document payload returned by user and team memory APIs."""
|
||||
|
||||
memory_md: str
|
||||
limits: MemoryLimits
|
||||
247
surfsense_backend/app/services/memory/service.py
Normal file
247
surfsense_backend/app/services/memory/service.py
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
"""Canonical read/write/reset/extract service for markdown memory."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from enum import StrEnum
|
||||
from typing import Any, Literal
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import SearchSpace, User
|
||||
from app.services.memory.document import parse_memory_document, render_memory_document
|
||||
from app.services.memory.rewrite import forced_rewrite
|
||||
from app.services.memory.schemas import MemoryLimits
|
||||
from app.services.memory.validation import (
|
||||
MEMORY_HARD_LIMIT,
|
||||
MEMORY_SOFT_LIMIT,
|
||||
soft_limit_warning,
|
||||
strip_preamble_to_first_heading,
|
||||
validate_bullet_format,
|
||||
validate_diff,
|
||||
validate_heading_sanity,
|
||||
validate_memory_scope,
|
||||
validate_memory_size,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_NO_UPDATE_SENTINELS = frozenset(
|
||||
{
|
||||
"NO_UPDATE",
|
||||
"NO UPDATE",
|
||||
"NO_CHANGE",
|
||||
"NO CHANGE",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class MemoryScope(StrEnum):
|
||||
USER = "user"
|
||||
TEAM = "team"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SaveResult:
|
||||
status: Literal["saved", "error", "no_op"]
|
||||
message: str
|
||||
memory_md: str = ""
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
diff_warnings: list[str] = field(default_factory=list)
|
||||
format_warnings: list[str] = field(default_factory=list)
|
||||
notice: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data: dict[str, Any] = {
|
||||
"status": self.status,
|
||||
"message": self.message,
|
||||
"memory_md": self.memory_md,
|
||||
}
|
||||
if self.notice:
|
||||
data["notice"] = self.notice
|
||||
if self.warnings:
|
||||
data["warnings"] = self.warnings
|
||||
if len(self.warnings) == 1:
|
||||
data["warning"] = self.warnings[0]
|
||||
if self.diff_warnings:
|
||||
data["diff_warnings"] = self.diff_warnings
|
||||
if self.format_warnings:
|
||||
data["format_warnings"] = self.format_warnings
|
||||
return data
|
||||
|
||||
|
||||
def memory_limits() -> MemoryLimits:
|
||||
return MemoryLimits(soft=MEMORY_SOFT_LIMIT, hard=MEMORY_HARD_LIMIT)
|
||||
|
||||
|
||||
def _normalize_scope(scope: MemoryScope | str) -> MemoryScope:
|
||||
return scope if isinstance(scope, MemoryScope) else MemoryScope(scope)
|
||||
|
||||
|
||||
def _normalize_user_id(target_id: str | UUID) -> UUID:
|
||||
return UUID(target_id) if isinstance(target_id, str) else target_id
|
||||
|
||||
|
||||
async def _load_target(
|
||||
*,
|
||||
scope: MemoryScope | str,
|
||||
target_id: str | int | UUID,
|
||||
session: AsyncSession,
|
||||
) -> User | SearchSpace | None:
|
||||
normalized = _normalize_scope(scope)
|
||||
if normalized is MemoryScope.USER:
|
||||
result = await session.execute(
|
||||
select(User).where(User.id == _normalize_user_id(target_id)) # type: ignore[arg-type]
|
||||
)
|
||||
return result.scalars().first()
|
||||
result = await session.execute(
|
||||
select(SearchSpace).where(SearchSpace.id == int(target_id))
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
||||
|
||||
def _get_memory(target: User | SearchSpace, scope: MemoryScope) -> str:
|
||||
if scope is MemoryScope.USER:
|
||||
return getattr(target, "memory_md", None) or ""
|
||||
return getattr(target, "shared_memory_md", None) or ""
|
||||
|
||||
|
||||
def _set_memory(target: User | SearchSpace, scope: MemoryScope, content: str) -> None:
|
||||
if scope is MemoryScope.USER:
|
||||
target.memory_md = content
|
||||
else:
|
||||
target.shared_memory_md = content
|
||||
|
||||
|
||||
async def read_memory(
|
||||
*,
|
||||
scope: MemoryScope | str,
|
||||
target_id: str | int | UUID,
|
||||
session: AsyncSession,
|
||||
) -> str:
|
||||
normalized = _normalize_scope(scope)
|
||||
target = await _load_target(scope=normalized, target_id=target_id, session=session)
|
||||
if target is None:
|
||||
return ""
|
||||
return _get_memory(target, normalized)
|
||||
|
||||
|
||||
async def save_memory(
|
||||
*,
|
||||
scope: MemoryScope | str,
|
||||
target_id: str | int | UUID,
|
||||
content: str,
|
||||
session: AsyncSession,
|
||||
llm: Any | None = None,
|
||||
) -> SaveResult:
|
||||
normalized = _normalize_scope(scope)
|
||||
if not isinstance(content, str):
|
||||
return SaveResult(
|
||||
status="error",
|
||||
message="Internal error: memory payload must be a string.",
|
||||
)
|
||||
|
||||
target = await _load_target(scope=normalized, target_id=target_id, session=session)
|
||||
if target is None:
|
||||
return SaveResult(
|
||||
status="error",
|
||||
message="User not found."
|
||||
if normalized is MemoryScope.USER
|
||||
else "Search space not found.",
|
||||
)
|
||||
|
||||
old_memory = _get_memory(target, normalized)
|
||||
next_content = strip_preamble_to_first_heading(content.strip())
|
||||
notice: str | None = None
|
||||
warnings: list[str] = []
|
||||
|
||||
if next_content.upper() in _NO_UPDATE_SENTINELS:
|
||||
return SaveResult(
|
||||
status="no_op",
|
||||
message="No memory update requested.",
|
||||
memory_md=old_memory,
|
||||
)
|
||||
|
||||
if len(next_content) > MEMORY_HARD_LIMIT and llm is not None:
|
||||
rewritten = await forced_rewrite(next_content, llm)
|
||||
if rewritten is not None and len(rewritten) < len(next_content):
|
||||
next_content = strip_preamble_to_first_heading(rewritten)
|
||||
notice = "Memory was automatically rewritten to fit within limits."
|
||||
|
||||
for validation in (
|
||||
validate_memory_size(next_content),
|
||||
validate_heading_sanity(next_content),
|
||||
):
|
||||
if validation:
|
||||
return SaveResult(
|
||||
status="error",
|
||||
message=validation["message"],
|
||||
memory_md=old_memory,
|
||||
)
|
||||
|
||||
scope_error, scope_warnings = validate_memory_scope(
|
||||
next_content,
|
||||
normalized.value,
|
||||
old_memory=old_memory,
|
||||
)
|
||||
warnings.extend(scope_warnings)
|
||||
if scope_error:
|
||||
return SaveResult(
|
||||
status="error",
|
||||
message=scope_error["message"],
|
||||
memory_md=old_memory,
|
||||
warnings=warnings,
|
||||
)
|
||||
|
||||
next_content = render_memory_document(parse_memory_document(next_content))
|
||||
|
||||
try:
|
||||
_set_memory(target, normalized, next_content)
|
||||
session.add(target)
|
||||
await session.commit()
|
||||
except Exception as e:
|
||||
logger.exception("Failed to update %s memory: %s", normalized.value, e)
|
||||
await session.rollback()
|
||||
return SaveResult(
|
||||
status="error",
|
||||
message=f"Failed to update {normalized.value} memory: {e}",
|
||||
memory_md=old_memory,
|
||||
)
|
||||
|
||||
diff_warnings = validate_diff(old_memory, next_content)
|
||||
format_warnings = validate_bullet_format(next_content)
|
||||
warning = soft_limit_warning(next_content)
|
||||
if warning:
|
||||
warnings.append(warning)
|
||||
|
||||
return SaveResult(
|
||||
status="saved",
|
||||
message=(
|
||||
"Memory updated."
|
||||
if normalized is MemoryScope.USER
|
||||
else "Team memory updated."
|
||||
),
|
||||
memory_md=next_content,
|
||||
warnings=warnings,
|
||||
diff_warnings=diff_warnings,
|
||||
format_warnings=format_warnings,
|
||||
notice=notice,
|
||||
)
|
||||
|
||||
|
||||
async def reset_memory(
|
||||
*,
|
||||
scope: MemoryScope | str,
|
||||
target_id: str | int | UUID,
|
||||
session: AsyncSession,
|
||||
) -> SaveResult:
|
||||
return await save_memory(
|
||||
scope=scope,
|
||||
target_id=target_id,
|
||||
content="",
|
||||
session=session,
|
||||
llm=None,
|
||||
)
|
||||
140
surfsense_backend/app/services/memory/validation.py
Normal file
140
surfsense_backend/app/services/memory/validation.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Validation helpers for markdown-backed memory."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from app.services.memory.document import (
|
||||
extract_headings,
|
||||
has_explicit_heading,
|
||||
nonstandard_bullets,
|
||||
parse_memory_document,
|
||||
)
|
||||
|
||||
MEMORY_SOFT_LIMIT = 18_000
|
||||
MEMORY_HARD_LIMIT = 25_000
|
||||
|
||||
_FORBIDDEN_TEAM_HEADINGS = {
|
||||
"preferences",
|
||||
"instructions",
|
||||
"personal notes",
|
||||
"personal instructions",
|
||||
}
|
||||
|
||||
|
||||
def has_markdown_heading(content: str) -> bool:
|
||||
return has_explicit_heading(content)
|
||||
|
||||
|
||||
def strip_preamble_to_first_heading(content: str) -> str:
|
||||
"""Drop model preamble before the first ``##`` heading, if one exists."""
|
||||
lines = content.splitlines()
|
||||
for index, line in enumerate(lines):
|
||||
if line.startswith("## ") and line[3:].strip():
|
||||
return "\n".join(lines[index:]).strip()
|
||||
return content.strip()
|
||||
|
||||
|
||||
def validate_memory_size(content: str) -> dict[str, str] | None:
|
||||
length = len(content)
|
||||
if length > MEMORY_HARD_LIMIT:
|
||||
return {
|
||||
"status": "error",
|
||||
"message": (
|
||||
f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit "
|
||||
f"({length:,} chars). Consolidate by merging related items, "
|
||||
"removing outdated entries, and shortening descriptions."
|
||||
),
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def validate_heading_sanity(content: str) -> dict[str, str] | None:
|
||||
"""Block long prose blobs without headings unless they are legacy bullets."""
|
||||
stripped = content.strip()
|
||||
if not stripped:
|
||||
return None
|
||||
if has_markdown_heading(stripped):
|
||||
return None
|
||||
if len(stripped) <= 40:
|
||||
return None
|
||||
if parse_memory_document(stripped).sections:
|
||||
return None
|
||||
return {
|
||||
"status": "error",
|
||||
"message": "Memory must be markdown with at least one ## heading.",
|
||||
}
|
||||
|
||||
|
||||
def validate_memory_scope(
|
||||
content: str,
|
||||
scope: Literal["user", "team"],
|
||||
*,
|
||||
old_memory: str | None = None,
|
||||
) -> tuple[dict[str, str] | None, list[str]]:
|
||||
"""Reject new personal headings in team memory, grandfather existing ones."""
|
||||
if scope != "team":
|
||||
return None, []
|
||||
|
||||
old_forbidden = extract_headings(old_memory) & _FORBIDDEN_TEAM_HEADINGS
|
||||
new_forbidden = extract_headings(content) & _FORBIDDEN_TEAM_HEADINGS
|
||||
introduced = sorted(new_forbidden - old_forbidden)
|
||||
grandfathered = sorted(new_forbidden & old_forbidden)
|
||||
|
||||
warnings: list[str] = []
|
||||
if grandfathered:
|
||||
warnings.append(
|
||||
"Team memory contains legacy personal headings: "
|
||||
+ ", ".join(grandfathered)
|
||||
+ ". Please consolidate them into team-safe headings."
|
||||
)
|
||||
if introduced:
|
||||
return (
|
||||
{
|
||||
"status": "error",
|
||||
"message": (
|
||||
"Team memory cannot introduce personal headings: "
|
||||
+ ", ".join(introduced)
|
||||
+ ". Use team-safe headings instead."
|
||||
),
|
||||
},
|
||||
warnings,
|
||||
)
|
||||
return None, warnings
|
||||
|
||||
|
||||
def validate_bullet_format(content: str) -> list[str]:
|
||||
return nonstandard_bullets(content)
|
||||
|
||||
|
||||
def validate_diff(old_memory: str | None, new_memory: str) -> list[str]:
|
||||
if not old_memory:
|
||||
return []
|
||||
|
||||
warnings: list[str] = []
|
||||
old_headings = extract_headings(old_memory)
|
||||
new_headings = extract_headings(new_memory)
|
||||
dropped = old_headings - new_headings
|
||||
if dropped:
|
||||
names = ", ".join(sorted(dropped))
|
||||
warnings.append(
|
||||
f"Sections removed: {names}. If unintentional, restore them from the memory document."
|
||||
)
|
||||
|
||||
old_len = len(old_memory)
|
||||
new_len = len(new_memory)
|
||||
if old_len > 0 and new_len < old_len * 0.4:
|
||||
warnings.append(
|
||||
f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). Possible data loss."
|
||||
)
|
||||
return warnings
|
||||
|
||||
|
||||
def soft_limit_warning(content: str) -> str | None:
|
||||
length = len(content)
|
||||
if length > MEMORY_SOFT_LIMIT:
|
||||
return (
|
||||
f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. "
|
||||
"Consolidate by merging related items and removing less important entries."
|
||||
)
|
||||
return None
|
||||
Loading…
Add table
Add a link
Reference in a new issue