SurfSense/surfsense_backend/alembic/versions/162_add_chunk_position.py
CREDO23 c6e71c851c feat(chunks): add explicit position column with backfill migration
Chunk ids stop reflecting document order once incremental re-indexing keeps
unchanged rows across edits. Backfill preserves the historical id ordering
so behavior is identical on day one.
2026-06-12 18:52:45 +02:00

51 lines
1.4 KiB
Python

"""add chunks.position for explicit document order
Incremental re-indexing keeps unchanged chunk rows, so auto-increment ids no
longer reflect document order. Backfill preserves the historical id ordering.
Revision ID: 162
Revises: 161
"""
from collections.abc import Sequence
from alembic import op
revision: str = "162"
down_revision: str | None = "161"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.execute(
"ALTER TABLE chunks ADD COLUMN IF NOT EXISTS position INTEGER NOT NULL DEFAULT 0;"
)
# Backfill: document order so far has been the insertion order (id).
op.execute(
"""
UPDATE chunks
SET position = numbered.rn
FROM (
SELECT id,
ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY id) - 1 AS rn
FROM chunks
) AS numbered
WHERE chunks.id = numbered.id;
"""
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_chunks_position ON chunks(position);"
)
op.execute(
"CREATE INDEX IF NOT EXISTS ix_chunks_document_id_position "
"ON chunks(document_id, position);"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_chunks_document_id_position;")
op.execute("DROP INDEX IF EXISTS ix_chunks_position;")
op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS position;")