feat: update memory extraction and management protocols to enforce structured bullet formats, utilize user first names, and enhance validation for team and user memory entries

This commit is contained in:
Anish Sarkar 2026-04-10 04:36:51 +05:30
parent ad2a981a77
commit b8e1c9801b
8 changed files with 215 additions and 101 deletions

View file

@ -34,13 +34,12 @@ info, things that only matter for the current task.
If the message contains memorizable information, output the FULL updated \
memory document with the new facts merged into the existing content. Follow \
these rules:
- Use the same ## section structure as the existing memory.
- Preserve any existing ## headings; create new ones if useful.
- Keep entries as single concise bullet points (under 120 chars each).
- Every bullet MUST start with a (YYYY-MM-DD) date prefix.
- Every bullet MUST use format: - (YYYY-MM-DD) [fact|pref|instr] text
[fact] = durable facts, [pref] = preferences, [instr] = standing instructions.
- If a new fact contradicts an existing entry, update the existing entry.
- Do not duplicate information that is already present.
- Standard sections: \
"## About the user", "## Preferences", "## Instructions"
If nothing is worth remembering, output exactly: NO_UPDATE
@ -77,16 +76,13 @@ NOT worth remembering:
If the message contains memorizable team information, output the FULL updated \
team memory document with new facts merged into existing content. Follow rules:
- Use the same ## section structure as the existing memory.
- Preserve any existing ## headings; create new ones if useful.
- Keep entries as single concise bullet points (under 120 chars each).
- Every bullet MUST start with a (YYYY-MM-DD) date prefix.
- Every bullet MUST use format: - (YYYY-MM-DD) [fact] text
Team memory uses ONLY the [fact] marker. Never use [pref] or [instr].
- If a new fact contradicts an existing entry, update the existing entry.
- Do not duplicate existing information.
- NEVER use personal sections like "## About the user", "## Preferences", \
or "## Instructions".
- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored.
- Standard sections: "## Team decisions", "## Team conventions", \
"## Key facts", "## Current priorities"
If nothing is worth remembering, output exactly: NO_UPDATE

View file

@ -281,18 +281,16 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- updated_memory: The FULL updated markdown document (not a diff).
Merge new facts with existing ones, update contradictions, remove outdated entries.
Treat every update as a curation pass consolidate, don't just append.
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Every bullet MUST use this format: - (YYYY-MM-DD) [marker] text
Markers:
[fact] durable facts (role, background, projects, tools, expertise)
[pref] preferences (response style, languages, formats, tools)
[instr] standing instructions (always/never do, response rules)
- Keep it concise and well under the character limit shown in <user_memory>.
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
## About the user
## Preferences
## Instructions
- Section guidance:
* About the user: role, background, company, durable identity context
* Preferences: languages, tools, frameworks, response style preferences
* Instructions: standing instructions, things to always/never do
- Use any `##` heading that fits. Headings are optional and freeform — organize
however makes sense for the content (e.g. ## Work, ## Research, ## Personal).
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- During consolidation, prioritize keeping: identity/instructions > preferences.
- During consolidation, prioritize keeping: [instr] > [pref] > [fact].
""",
"shared": """
- update_memory: Update the team's shared memory document for this search space.
@ -311,18 +309,11 @@ _MEMORY_TOOL_INSTRUCTIONS: dict[str, dict[str, str]] = {
- updated_memory: The FULL updated markdown document (not a diff).
Merge new facts with existing ones, update contradictions, remove outdated entries.
Treat every update as a curation pass consolidate, don't just append.
- Every bullet MUST start with a (YYYY-MM-DD) date prefix indicating when it was recorded or last updated.
- Every bullet MUST use this format: - (YYYY-MM-DD) [fact] text
Team memory uses ONLY the [fact] marker. Never use [pref] or [instr] in team memory.
- Keep it concise and well under the character limit shown in <team_memory>.
- You MUST organize memory using these standard sections (add new `##` sections only if none of the standard ones fit):
## Team decisions
## Conventions
## Key facts
## Current priorities
- Section guidance:
* Team decisions: agreed choices and durable technical/product decisions
* Conventions: coding standards, tools, processes, naming patterns
* Key facts: stable facts about org/team/system setup
* Current priorities: active projects, near-term goals, important blockers
- Use any `##` heading that fits. Headings are optional and freeform — organize
however makes sense for the content (e.g. ## Decisions, ## Architecture, ## Process).
- Each entry MUST be a single bullet point. Keep entries concise (aim for under 120 chars each).
- During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.
""",
@ -334,24 +325,27 @@ _MEMORY_TOOL_EXAMPLES: dict[str, dict[str, str]] = {
"private": """
- <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
- The user casually shared a durable fact about themselves. Save it:
update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n")
update_memory(updated_memory="- (2025-03-15) [fact] Space enthusiast\\n")
- User: "Remember that I prefer concise answers over detailed explanations"
- Durable preference. You see the current <user_memory> and merge:
update_memory(updated_memory="## About the user\\n- (2025-03-15) Space enthusiast\\n\\n## Preferences\\n- (2025-03-15) Prefers concise answers over detailed explanations\\n...")
- Durable preference. Merge with existing memory:
update_memory(updated_memory="- (2025-03-15) [fact] Space enthusiast\\n- (2025-03-15) [pref] Prefers concise answers over detailed explanations\\n")
- User: "I actually moved to Tokyo last month"
- Updated fact, date prefix reflects when recorded:
update_memory(updated_memory="## About the user\\n- (2025-03-15) Lives in Tokyo (previously London)\\n...")
update_memory(updated_memory="- (2025-03-15) [fact] Lives in Tokyo (previously London)\\n...")
- User: "I'm a freelance photographer working on a nature documentary"
- Durable background info. Save it under About the user:
update_memory(updated_memory="## About the user\\n- (2025-03-15) Freelance photographer\\n- (2025-03-15) Working on a nature documentary\\n")
- Durable background info:
update_memory(updated_memory="- (2025-03-15) [fact] Freelance photographer\\n- (2025-03-15) [fact] Working on a nature documentary\\n")
- User: "Always respond in bullet points"
- Standing instruction:
update_memory(updated_memory="...\\n- (2025-03-15) [instr] Always respond in bullet points\\n")
""",
"shared": """
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
- Durable team decision:
update_memory(updated_memory="## Team decisions\\n- (2025-03-15) Weekly standup meetings on Mondays\\n...")
update_memory(updated_memory="- (2025-03-15) [fact] Weekly standup meetings on Mondays\\n...")
- User: "Our office is in downtown Seattle, 5th floor"
- Durable team fact:
update_memory(updated_memory="## Key facts\\n- (2025-03-15) Office location: downtown Seattle, 5th floor\\n...")
update_memory(updated_memory="- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\\n...")
""",
},
}

View file

@ -36,13 +36,11 @@ MEMORY_HARD_LIMIT = 25_000
_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE)
_HEADING_NORMALIZE_RE = re.compile(r"\s+")
_USER_ONLY_HEADINGS = {"about the user", "preferences", "instructions"}
_TEAM_ONLY_HEADINGS = {
"team decisions",
"conventions",
"key facts",
"current priorities",
}
_MARKER_RE = re.compile(r"\[(fact|pref|instr)\]")
_BULLET_FORMAT_RE = re.compile(
r"^- \(\d{4}-\d{2}-\d{2}\) \[(fact|pref|instr)\] .+$"
)
_PERSONAL_ONLY_MARKERS = {"pref", "instr"}
# ---------------------------------------------------------------------------
@ -63,37 +61,40 @@ def _normalize_heading(heading: str) -> str:
def _validate_memory_scope(
content: str, scope: Literal["user", "team"]
) -> dict[str, Any] | None:
"""Reject cross-scope headings (user sections in team memory and vice versa)."""
headings = {_normalize_heading(h) for h in _extract_headings(content)}
if not headings:
"""Reject personal-only markers ([pref], [instr]) in team memory."""
if scope != "team":
return None
if scope == "team":
leaked = sorted(headings & _USER_ONLY_HEADINGS)
if leaked:
return {
"status": "error",
"message": (
"Team memory cannot include personal sections: "
+ ", ".join(leaked)
+ ". Use team sections only."
),
}
return None
leaked = sorted(headings & _TEAM_ONLY_HEADINGS)
markers = set(_MARKER_RE.findall(content))
leaked = sorted(markers & _PERSONAL_ONLY_MARKERS)
if leaked:
tags = ", ".join(f"[{m}]" for m in leaked)
return {
"status": "error",
"message": (
"User memory cannot include team sections: "
+ ", ".join(leaked)
+ ". Use personal sections only."
f"Team memory cannot include personal markers: {tags}. "
"Use [fact] only in team memory."
),
}
return None
def _validate_bullet_format(content: str) -> list[str]:
"""Return warnings for bullet lines that don't match the required format.
Expected: ``- (YYYY-MM-DD) [fact|pref|instr] text``
"""
warnings: list[str] = []
for line in content.splitlines():
stripped = line.strip()
if not stripped.startswith("- "):
continue
if not _BULLET_FORMAT_RE.match(stripped):
short = stripped[:80] + ("..." if len(stripped) > 80 else "")
warnings.append(f"Malformed bullet: {short}")
return warnings
def _validate_diff(old_memory: str | None, new_memory: str) -> list[str]:
"""Return a list of warning strings about suspicious changes."""
if not old_memory:
@ -163,13 +164,11 @@ limit and must be shortened.
RULES:
1. Rewrite the document to be under {target} characters.
2. Preserve all ## section headings.
3. Priority for keeping content: identity/instructions > preferences > \
current context.
2. Preserve any existing ## headings.
3. Priority for keeping content: [instr] > [pref] > [fact].
4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions.
5. Each entry must be a single bullet point.
6. Every bullet MUST keep its (YYYY-MM-DD) date prefix.
7. Output ONLY the consolidated markdown no explanations, no wrapping.
5. Every bullet MUST have format: - (YYYY-MM-DD) [fact|pref|instr] text
6. Output ONLY the consolidated markdown no explanations, no wrapping.
<memory_document>
{content}
@ -275,6 +274,10 @@ async def _save_memory(
if diff_warnings:
resp["diff_warnings"] = diff_warnings
format_warnings = _validate_bullet_format(content)
if format_warnings:
resp["format_warnings"] = format_warnings
warning = _soft_warning(content)
if warning:
resp["warning"] = warning

View file

@ -41,12 +41,13 @@ Apply the user's instruction to the existing memory document and output the \
FULL updated document.
RULES:
1. If the instruction asks to add something, add it in the appropriate \
## section with a (YYYY-MM-DD) date prefix using today's date.
1. If the instruction asks to add something, add it with format: \
- (YYYY-MM-DD) [fact|pref|instr] text, under an existing or new ## heading.
2. If the instruction asks to remove something, remove the matching entry.
3. If the instruction asks to change something, update the matching entry.
4. Preserve the existing ## section structure and all other entries.
5. Output ONLY the updated markdown no explanations, no wrapping.
4. Preserve existing ## headings and all other entries.
5. Every bullet must include a marker: [fact], [pref], or [instr].
6. Output ONLY the updated markdown no explanations, no wrapping.
<current_memory>
{current_memory}

View file

@ -51,13 +51,12 @@ team's shared memory document. Apply the user's instruction to the existing \
memory document and output the FULL updated document.
RULES:
1. If the instruction asks to add something, add it in the appropriate \
## section with a (YYYY-MM-DD) date prefix using today's date.
1. If the instruction asks to add something, add it with format: \
- (YYYY-MM-DD) [fact] text, under an existing or new ## heading.
2. If the instruction asks to remove something, remove the matching entry.
3. If the instruction asks to change something, update the matching entry.
4. Preserve the existing ## section structure and all other entries.
5. NEVER use personal sections like "## About the user", "## Preferences", or
"## Instructions". Team memory must stay team-scoped.
4. Preserve existing ## headings and all other entries.
5. NEVER use [pref] or [instr] markers. Team memory uses [fact] only.
6. Output ONLY the updated markdown no explanations, no wrapping.
<current_memory>

View file

@ -1,8 +1,12 @@
"""Unit tests for memory scope validation."""
"""Unit tests for memory scope validation and bullet format validation."""
import pytest
from app.agents.new_chat.tools.update_memory import _save_memory, _validate_memory_scope
from app.agents.new_chat.tools.update_memory import (
_save_memory,
_validate_bullet_format,
_validate_memory_scope,
)
pytestmark = pytest.mark.unit
@ -23,34 +27,132 @@ class _Recorder:
self.rollback_calls += 1
def test_validate_memory_scope_rejects_user_sections_in_team_scope() -> None:
content = "## About the user\n- (2026-04-10) Student studying DSA\n"
# ---------------------------------------------------------------------------
# _validate_memory_scope — marker-based
# ---------------------------------------------------------------------------
def test_validate_memory_scope_rejects_pref_marker_in_team_scope() -> None:
content = "- (2026-04-10) [pref] Prefers dark mode\n"
result = _validate_memory_scope(content, "team")
assert result is not None
assert result["status"] == "error"
assert "personal sections" in result["message"]
assert "[pref]" in result["message"]
def test_validate_memory_scope_rejects_team_sections_in_user_scope() -> None:
content = "## Team decisions\n- (2026-04-10) Python-first backend policy\n"
def test_validate_memory_scope_rejects_instr_marker_in_team_scope() -> None:
content = "- (2026-04-10) [instr] Always respond in Spanish\n"
result = _validate_memory_scope(content, "team")
assert result is not None
assert result["status"] == "error"
assert "[instr]" in result["message"]
def test_validate_memory_scope_rejects_both_personal_markers_in_team() -> None:
content = (
"- (2026-04-10) [pref] Prefers dark mode\n"
"- (2026-04-10) [instr] Always respond in Spanish\n"
)
result = _validate_memory_scope(content, "team")
assert result is not None
assert result["status"] == "error"
assert "[instr]" in result["message"]
assert "[pref]" in result["message"]
def test_validate_memory_scope_allows_fact_in_team_scope() -> None:
content = "- (2026-04-10) [fact] Office is in downtown Seattle\n"
result = _validate_memory_scope(content, "team")
assert result is None
def test_validate_memory_scope_allows_all_markers_in_user_scope() -> None:
content = (
"- (2026-04-10) [fact] Python developer\n"
"- (2026-04-10) [pref] Prefers concise answers\n"
"- (2026-04-10) [instr] Always use bullet points\n"
)
result = _validate_memory_scope(content, "user")
assert result is not None
assert result["status"] == "error"
assert "team sections" in result["message"]
assert result is None
def test_validate_memory_scope_normalizes_heading_case_and_spacing() -> None:
content = "## About The User \n- (2026-04-10) Student\n"
def test_validate_memory_scope_allows_any_heading_in_team() -> None:
content = (
"## Architecture\n"
"- (2026-04-10) [fact] Uses PostgreSQL for persistence\n"
)
result = _validate_memory_scope(content, "team")
assert result is not None
assert result["status"] == "error"
assert result is None
def test_validate_memory_scope_allows_any_heading_in_user() -> None:
content = (
"## My Projects\n"
"- (2026-04-10) [fact] Working on SurfSense\n"
)
result = _validate_memory_scope(content, "user")
assert result is None
# ---------------------------------------------------------------------------
# _validate_bullet_format
# ---------------------------------------------------------------------------
def test_validate_bullet_format_passes_valid_bullets() -> None:
content = (
"## Work\n"
"- (2026-04-10) [fact] Senior Python developer\n"
"- (2026-04-10) [pref] Prefers dark mode\n"
"- (2026-04-10) [instr] Always respond in bullet points\n"
)
warnings = _validate_bullet_format(content)
assert warnings == []
def test_validate_bullet_format_warns_on_missing_marker() -> None:
content = "- (2026-04-10) Senior Python developer\n"
warnings = _validate_bullet_format(content)
assert len(warnings) == 1
assert "Malformed bullet" in warnings[0]
def test_validate_bullet_format_warns_on_missing_date() -> None:
content = "- [fact] Senior Python developer\n"
warnings = _validate_bullet_format(content)
assert len(warnings) == 1
assert "Malformed bullet" in warnings[0]
def test_validate_bullet_format_warns_on_unknown_marker() -> None:
content = "- (2026-04-10) [context] Working on project X\n"
warnings = _validate_bullet_format(content)
assert len(warnings) == 1
assert "Malformed bullet" in warnings[0]
def test_validate_bullet_format_ignores_non_bullet_lines() -> None:
content = "## Some Heading\nSome paragraph text\n"
warnings = _validate_bullet_format(content)
assert warnings == []
def test_validate_bullet_format_warns_on_old_format_without_marker() -> None:
content = "## About the user\n- (2026-04-10) Likes cats\n"
warnings = _validate_bullet_format(content)
assert len(warnings) == 1
# ---------------------------------------------------------------------------
# _save_memory — end-to-end with marker scope check
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_save_memory_blocks_cross_scope_write_before_commit() -> None:
async def test_save_memory_blocks_pref_in_team_before_commit() -> None:
recorder = _Recorder()
result = await _save_memory(
updated_memory="## About the user\n- (2026-04-10) Student\n",
updated_memory="- (2026-04-10) [pref] Prefers dark mode\n",
old_memory=None,
llm=None,
apply_fn=recorder.apply,
@ -65,9 +167,9 @@ async def test_save_memory_blocks_cross_scope_write_before_commit() -> None:
@pytest.mark.asyncio
async def test_save_memory_allows_valid_scope_and_commits() -> None:
async def test_save_memory_allows_fact_in_team_and_commits() -> None:
recorder = _Recorder()
content = "## Team decisions\n- (2026-04-10) Python-first backend policy\n"
content = "- (2026-04-10) [fact] Weekly standup on Mondays\n"
result = await _save_memory(
updated_memory=content,
old_memory=None,
@ -81,3 +183,22 @@ async def test_save_memory_allows_valid_scope_and_commits() -> None:
assert result["status"] == "saved"
assert recorder.commit_calls == 1
assert recorder.applied_content == content
@pytest.mark.asyncio
async def test_save_memory_includes_format_warnings() -> None:
recorder = _Recorder()
content = "- (2026-04-10) Missing marker text\n"
result = await _save_memory(
updated_memory=content,
old_memory=None,
llm=None,
apply_fn=recorder.apply,
commit_fn=recorder.commit,
rollback_fn=recorder.rollback,
label="memory",
scope="user",
)
assert result["status"] == "saved"
assert "format_warnings" in result
assert len(result["format_warnings"]) == 1

View file

@ -102,7 +102,7 @@ export function MemoryContent() {
}
};
const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*/g, "");
const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*\[(fact|pref|instr)\]\s*/g, "");
const charCount = memory.length;
const getCounterColor = () => {

View file

@ -107,7 +107,7 @@ export function TeamMemoryManager({ searchSpaceId }: TeamMemoryManagerProps) {
}
};
const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*/g, "");
const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*\[(fact|pref|instr)\]\s*/g, "");
const charCount = memory.length;
const getCounterColor = () => {