mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat: migrate legacy memory tables to markdown format and drop old structures
This commit is contained in:
parent
ac87f3fde0
commit
a5bf3af4f8
3 changed files with 250 additions and 108 deletions
|
|
@ -1,107 +0,0 @@
|
|||
"""Drop legacy user_memories and shared_memories tables
|
||||
|
||||
Revision ID: 122
|
||||
Revises: 121
|
||||
|
||||
The old row-per-fact memory system (user_memories, shared_memories tables and
|
||||
memorycategory enum) is replaced by memory_md / shared_memory_md TEXT columns
|
||||
added in migration 121.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
from app.config import config
|
||||
|
||||
revision: str = "122"
|
||||
down_revision: str | None = "121"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
EMBEDDING_DIM = config.embedding_model_instance.dimension
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute("DROP TABLE IF EXISTS shared_memories CASCADE;")
|
||||
op.execute("DROP TABLE IF EXISTS user_memories CASCADE;")
|
||||
op.execute("DROP TYPE IF EXISTS memorycategory;")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'memorycategory') THEN
|
||||
CREATE TYPE memorycategory AS ENUM (
|
||||
'preference',
|
||||
'fact',
|
||||
'instruction',
|
||||
'context'
|
||||
);
|
||||
END IF;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS user_memories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
user_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
|
||||
search_space_id INTEGER REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
memory_text TEXT NOT NULL,
|
||||
category memorycategory NOT NULL DEFAULT 'fact',
|
||||
embedding vector({EMBEDDING_DIM}),
|
||||
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
|
||||
);
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_user_id ON user_memories(user_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_search_space_id ON user_memories(search_space_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_updated_at ON user_memories(updated_at);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_category ON user_memories(category);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_user_search_space ON user_memories(user_id, search_space_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS user_memories_vector_index ON user_memories USING hnsw (embedding public.vector_cosine_ops);"
|
||||
)
|
||||
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS shared_memories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
created_by_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
|
||||
memory_text TEXT NOT NULL,
|
||||
category memorycategory NOT NULL DEFAULT 'fact',
|
||||
embedding vector({EMBEDDING_DIM})
|
||||
);
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_shared_memories_search_space_id ON shared_memories(search_space_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_shared_memories_updated_at ON shared_memories(updated_at);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_shared_memories_created_by_id ON shared_memories(created_by_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS shared_memories_vector_index ON shared_memories USING hnsw (embedding public.vector_cosine_ops);"
|
||||
)
|
||||
|
|
@ -0,0 +1,249 @@
|
|||
"""Migrate row-per-fact memories to markdown, then drop legacy tables
|
||||
|
||||
Revision ID: 122
|
||||
Revises: 121
|
||||
|
||||
Converts user_memories rows into per-user markdown documents stored in
|
||||
user.memory_md, and shared_memories rows into per-search-space markdown
|
||||
stored in searchspaces.shared_memory_md. Then drops the old tables and
|
||||
the memorycategory enum.
|
||||
|
||||
The markdown format matches the new memory system:
|
||||
## Heading
|
||||
- (YYYY-MM-DD) [fact|pref|instr] memory text
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from collections.abc import Sequence
|
||||
from uuid import UUID
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import inspect as sa_inspect
|
||||
|
||||
from alembic import op
|
||||
from app.config import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
revision: str = "122"
|
||||
down_revision: str | None = "121"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
EMBEDDING_DIM = config.embedding_model_instance.dimension
|
||||
|
||||
_CATEGORY_TO_MARKER = {
|
||||
"fact": "fact",
|
||||
"context": "fact",
|
||||
"preference": "pref",
|
||||
"instruction": "instr",
|
||||
}
|
||||
|
||||
_CATEGORY_HEADING = {
|
||||
"fact": "Facts",
|
||||
"preference": "Preferences",
|
||||
"instruction": "Instructions",
|
||||
"context": "Context",
|
||||
}
|
||||
|
||||
_HEADING_ORDER = ["fact", "preference", "instruction", "context"]
|
||||
|
||||
|
||||
def _build_markdown(rows: list[tuple]) -> str:
|
||||
"""Build a markdown document from (memory_text, category, created_at) rows."""
|
||||
by_category: dict[str, list[str]] = defaultdict(list)
|
||||
|
||||
for memory_text, category, created_at in rows:
|
||||
cat = str(category)
|
||||
marker = _CATEGORY_TO_MARKER.get(cat, "fact")
|
||||
date_str = created_at.strftime("%Y-%m-%d")
|
||||
clean_text = str(memory_text).replace("\n", " ").strip()
|
||||
bullet = f"- ({date_str}) [{marker}] {clean_text}"
|
||||
by_category[cat].append(bullet)
|
||||
|
||||
sections: list[str] = []
|
||||
for cat in _HEADING_ORDER:
|
||||
if cat in by_category:
|
||||
heading = _CATEGORY_HEADING[cat]
|
||||
sections.append(f"## {heading}")
|
||||
sections.extend(by_category[cat])
|
||||
sections.append("")
|
||||
|
||||
return "\n".join(sections).strip() + "\n"
|
||||
|
||||
|
||||
def _migrate_user_memories(conn: sa.engine.Connection) -> None:
|
||||
"""Convert user_memories rows → user.memory_md grouped by user_id."""
|
||||
rows = conn.execute(
|
||||
sa.text(
|
||||
"SELECT user_id, memory_text, category::text, created_at "
|
||||
"FROM user_memories ORDER BY created_at"
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
logger.info("user_memories is empty, skipping data migration.")
|
||||
return
|
||||
|
||||
by_user: dict[UUID, list[tuple]] = defaultdict(list)
|
||||
for user_id, memory_text, category, created_at in rows:
|
||||
by_user[user_id].append((memory_text, category, created_at))
|
||||
|
||||
migrated = 0
|
||||
for uid, user_rows in by_user.items():
|
||||
existing = conn.execute(
|
||||
sa.text('SELECT memory_md FROM "user" WHERE id = :uid'),
|
||||
{"uid": uid},
|
||||
).scalar()
|
||||
|
||||
if existing and existing.strip():
|
||||
logger.info("User %s already has memory_md, skipping.", uid)
|
||||
continue
|
||||
|
||||
markdown = _build_markdown(user_rows)
|
||||
conn.execute(
|
||||
sa.text('UPDATE "user" SET memory_md = :md WHERE id = :uid'),
|
||||
{"md": markdown, "uid": uid},
|
||||
)
|
||||
migrated += 1
|
||||
|
||||
logger.info("Migrated user_memories for %d user(s).", migrated)
|
||||
|
||||
|
||||
def _migrate_shared_memories(conn: sa.engine.Connection) -> None:
|
||||
"""Convert shared_memories rows → searchspaces.shared_memory_md."""
|
||||
rows = conn.execute(
|
||||
sa.text(
|
||||
"SELECT search_space_id, memory_text, category::text, created_at "
|
||||
"FROM shared_memories ORDER BY created_at"
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
logger.info("shared_memories is empty, skipping data migration.")
|
||||
return
|
||||
|
||||
by_space: dict[int, list[tuple]] = defaultdict(list)
|
||||
for search_space_id, memory_text, category, created_at in rows:
|
||||
by_space[search_space_id].append((memory_text, category, created_at))
|
||||
|
||||
migrated = 0
|
||||
for space_id, space_rows in by_space.items():
|
||||
existing = conn.execute(
|
||||
sa.text(
|
||||
"SELECT shared_memory_md FROM searchspaces WHERE id = :sid"
|
||||
),
|
||||
{"sid": space_id},
|
||||
).scalar()
|
||||
|
||||
if existing and existing.strip():
|
||||
logger.info("Search space %s already has shared_memory_md, skipping.", space_id)
|
||||
continue
|
||||
|
||||
markdown = _build_markdown(space_rows)
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"UPDATE searchspaces SET shared_memory_md = :md WHERE id = :sid"
|
||||
),
|
||||
{"md": markdown, "sid": space_id},
|
||||
)
|
||||
migrated += 1
|
||||
|
||||
logger.info("Migrated shared_memories for %d search space(s).", migrated)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
inspector = sa_inspect(conn)
|
||||
tables = inspector.get_table_names()
|
||||
|
||||
if "user_memories" in tables:
|
||||
_migrate_user_memories(conn)
|
||||
|
||||
if "shared_memories" in tables:
|
||||
_migrate_shared_memories(conn)
|
||||
|
||||
op.execute("DROP TABLE IF EXISTS shared_memories CASCADE;")
|
||||
op.execute("DROP TABLE IF EXISTS user_memories CASCADE;")
|
||||
op.execute("DROP TYPE IF EXISTS memorycategory;")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'memorycategory') THEN
|
||||
CREATE TYPE memorycategory AS ENUM (
|
||||
'preference',
|
||||
'fact',
|
||||
'instruction',
|
||||
'context'
|
||||
);
|
||||
END IF;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS user_memories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
user_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
|
||||
search_space_id INTEGER REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
memory_text TEXT NOT NULL,
|
||||
category memorycategory NOT NULL DEFAULT 'fact',
|
||||
embedding vector({EMBEDDING_DIM}),
|
||||
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
|
||||
);
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_user_id ON user_memories(user_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_search_space_id ON user_memories(search_space_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_updated_at ON user_memories(updated_at);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_category ON user_memories(category);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_user_memories_user_search_space ON user_memories(user_id, search_space_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS user_memories_vector_index ON user_memories USING hnsw (embedding public.vector_cosine_ops);"
|
||||
)
|
||||
|
||||
op.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS shared_memories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
created_by_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
|
||||
memory_text TEXT NOT NULL,
|
||||
category memorycategory NOT NULL DEFAULT 'fact',
|
||||
embedding vector({EMBEDDING_DIM})
|
||||
);
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_shared_memories_search_space_id ON shared_memories(search_space_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_shared_memories_updated_at ON shared_memories(updated_at);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS ix_shared_memories_created_by_id ON shared_memories(created_by_id);"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS shared_memories_vector_index ON shared_memories USING hnsw (embedding public.vector_cosine_ops);"
|
||||
)
|
||||
|
|
@ -190,7 +190,7 @@ export function MemoryContent() {
|
|||
value={editQuery}
|
||||
onChange={(e) => setEditQuery(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder="Tell SurfSense what to remember or forget..."
|
||||
placeholder="Tell SurfSense what to remember or forget"
|
||||
disabled={editing}
|
||||
className="flex-1 bg-transparent text-sm outline-none placeholder:text-muted-foreground/70"
|
||||
/>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue