"""add chunks.position for explicit document order

Incremental re-indexing keeps unchanged chunk rows, so auto-increment ids no
longer reflect document order. The ``position`` column makes that order
explicit and is written by the indexing pipeline for every new or re-indexed
document.

This migration intentionally does NOT backfill historical rows. On a large,
heavily-indexed table (notably a multi-hundred-GB HNSW embedding index) a bulk
UPDATE of every chunk becomes a non-HOT update that rewrites every secondary
index per row -- turning a one-off migration into a multi-day operation.
Instead, existing rows keep ``position = 0`` and therefore order by the
``Chunk.id`` tiebreaker (identical to the pre-feature behavior); new and
re-indexed documents get correct positions from application code, and any
document needing exact order can simply be re-indexed on demand.

Revision ID: 165
Revises: 164
"""

from collections.abc import Sequence

from alembic import op

revision: str = "165"
down_revision: str | None = "164"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None

# Leftover UNLOGGED scratch table from earlier backfill attempts; dropped here
# so re-running this migration converges the schema regardless of past state.
SCRATCH_TABLE = "_chunk_position_backfill"


def upgrade() -> None:
    # Adding a NOT NULL column with a constant default is metadata-only on
    # PostgreSQL 11+, so this is fast even on very large tables. IF NOT EXISTS
    # makes it a no-op where the column already exists.
    op.execute(
        "ALTER TABLE chunks ADD COLUMN IF NOT EXISTS position INTEGER NOT NULL DEFAULT 0;"
    )

    # Clean up the scratch table left behind by the abandoned backfill approach.
    op.execute(f"DROP TABLE IF EXISTS {SCRATCH_TABLE};")


def downgrade() -> None:
    op.execute(f"DROP TABLE IF EXISTS {SCRATCH_TABLE};")
    op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS position;")