mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
Chunk ids stop reflecting document order once incremental re-indexing keeps unchanged rows across edits. Backfill preserves the historical id ordering so behavior is identical on day one.
51 lines
1.4 KiB
Python
51 lines
1.4 KiB
Python
"""add chunks.position for explicit document order
|
|
|
|
Incremental re-indexing keeps unchanged chunk rows, so auto-increment ids no
|
|
longer reflect document order. Backfill preserves the historical id ordering.
|
|
|
|
Revision ID: 162
|
|
Revises: 161
|
|
"""
|
|
|
|
from collections.abc import Sequence
|
|
|
|
from alembic import op
|
|
|
|
revision: str = "162"
|
|
down_revision: str | None = "161"
|
|
branch_labels: str | Sequence[str] | None = None
|
|
depends_on: str | Sequence[str] | None = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
op.execute(
|
|
"ALTER TABLE chunks ADD COLUMN IF NOT EXISTS position INTEGER NOT NULL DEFAULT 0;"
|
|
)
|
|
|
|
# Backfill: document order so far has been the insertion order (id).
|
|
op.execute(
|
|
"""
|
|
UPDATE chunks
|
|
SET position = numbered.rn
|
|
FROM (
|
|
SELECT id,
|
|
ROW_NUMBER() OVER (PARTITION BY document_id ORDER BY id) - 1 AS rn
|
|
FROM chunks
|
|
) AS numbered
|
|
WHERE chunks.id = numbered.id;
|
|
"""
|
|
)
|
|
|
|
op.execute(
|
|
"CREATE INDEX IF NOT EXISTS ix_chunks_position ON chunks(position);"
|
|
)
|
|
op.execute(
|
|
"CREATE INDEX IF NOT EXISTS ix_chunks_document_id_position "
|
|
"ON chunks(document_id, position);"
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.execute("DROP INDEX IF EXISTS ix_chunks_document_id_position;")
|
|
op.execute("DROP INDEX IF EXISTS ix_chunks_position;")
|
|
op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS position;")
|