"""remove users that never logged back in (last_login IS NULL) Migration 103 added ``user.last_login``. Any user whose ``last_login`` is still NULL has never authenticated since that column existed, i.e. they never logged back in. This migration purges those users together with everything that hangs off them: the search spaces they own, and (via ON DELETE CASCADE) ``searchspaces -> documents -> chunks`` plus all other user/space-scoped rows. This runs BEFORE the chunks.position backfill (revision 165) on purpose: it removes a large amount of dead chunk data first, so the expensive backfill has far fewer rows to rewrite. Work is done in committed batches (not one giant cascading DELETE) so that on a large table it streams progress to the alembic console, keeps each transaction small, bounds WAL/bloat growth, and is resumable if interrupted. Revision ID: 164 Revises: 163 """ import logging import time from collections.abc import Sequence import sqlalchemy as sa from alembic import op revision: str = "164" down_revision: str | None = "163" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None # Documents removed per committed batch. Each document delete cascades to its # chunks (via ix_chunks_document_id), so keep this modest to bound batch size. DOC_BATCH = 1_000 # Users removed per committed batch. Each cascades to owned search spaces and # the remaining space-/user-scoped rows. USER_BATCH = 500 # Minimum seconds between progress log lines (keeps the console readable). LOG_EVERY_SECONDS = 5.0 USER_SCRATCH = "_inactive_user_ids" DOC_SCRATCH = "_inactive_doc_ids" logger = logging.getLogger("alembic.runtime.migration") def _fmt_duration(seconds: float) -> str: seconds = int(seconds) h, rem = divmod(seconds, 3600) m, s = divmod(rem, 60) if h: return f"{h}h{m:02d}m{s:02d}s" if m: return f"{m}m{s:02d}s" return f"{s}s" def upgrade() -> None: bind = op.get_bind() # Run the heavy work outside the migration's single transaction so each # batch can commit on its own. with op.get_context().autocommit_block(): # Materialize the target user ids once. Rebuilt from scratch on every # run, so a re-run after an interruption simply picks up whoever still # has NULL last_login -> the migration is idempotent and resumable. op.execute(f"DROP TABLE IF EXISTS {USER_SCRATCH};") op.execute( f'CREATE UNLOGGED TABLE {USER_SCRATCH} AS ' 'SELECT id FROM "user" WHERE last_login IS NULL;' ) op.execute(f"ALTER TABLE {USER_SCRATCH} ADD PRIMARY KEY (id);") total_users = ( bind.execute(sa.text(f"SELECT count(*) FROM {USER_SCRATCH}")).scalar() or 0 ) if total_users == 0: logger.info("no users with NULL last_login; nothing to remove") op.execute(f"DROP TABLE IF EXISTS {USER_SCRATCH};") return logger.info( "found %s users with NULL last_login (never logged back in); " "removing them and all data in search spaces they own", f"{total_users:,}", ) # Documents living in search spaces owned by those users. Deleting these # explicitly (in batches) is what bounds the otherwise-unbounded # chunks cascade. op.execute(f"DROP TABLE IF EXISTS {DOC_SCRATCH};") op.execute( f""" CREATE UNLOGGED TABLE {DOC_SCRATCH} AS SELECT d.id FROM documents d JOIN searchspaces s ON s.id = d.search_space_id WHERE s.user_id IN (SELECT id FROM {USER_SCRATCH}); """ ) op.execute(f"ALTER TABLE {DOC_SCRATCH} ADD PRIMARY KEY (id);") total_docs = ( bind.execute(sa.text(f"SELECT count(*) FROM {DOC_SCRATCH}")).scalar() or 0 ) # Phase 1: delete documents (cascades chunks, document_versions, # document_files) in committed batches. logger.info( "phase 1/2: deleting %s documents (cascades their chunks) " "in batches of %s...", f"{total_docs:,}", f"{DOC_BATCH:,}", ) _batched_delete( bind, scratch=DOC_SCRATCH, target_table="documents", target_col="id", batch_size=DOC_BATCH, total=total_docs, label="documents", ) op.execute(f"DROP TABLE IF EXISTS {DOC_SCRATCH};") # Phase 2: delete the users themselves. This cascades the now-empty # search spaces plus all remaining user-/space-scoped rows. logger.info( "phase 2/2: deleting %s users (cascades search spaces and " "remaining data) in batches of %s...", f"{total_users:,}", f"{USER_BATCH:,}", ) _batched_delete( bind, scratch=USER_SCRATCH, target_table='"user"', target_col="id", batch_size=USER_BATCH, total=total_users, label="users", ) op.execute(f"DROP TABLE IF EXISTS {USER_SCRATCH};") logger.info("migration 164 finished") def _batched_delete( bind: sa.engine.Connection, *, scratch: str, target_table: str, target_col: str, batch_size: int, total: int, label: str, ) -> None: """Pop ids from ``scratch`` and delete the matching rows, one committed batch at a time, logging progress. Atomic per batch: the row delete and the scratch pop happen in a single statement, so an interrupted run leaves the scratch table in sync with what has actually been deleted.""" started = time.monotonic() last_log = 0.0 done = 0 stmt = sa.text( f""" WITH batch AS ( SELECT id FROM {scratch} LIMIT :n ), deleted AS ( DELETE FROM {target_table} WHERE {target_col} IN (SELECT id FROM batch) ), popped AS ( DELETE FROM {scratch} WHERE id IN (SELECT id FROM batch) RETURNING id ) SELECT count(*) FROM popped """ ) while True: popped = bind.execute(stmt, {"n": batch_size}).scalar() or 0 if popped == 0: break done += popped now = time.monotonic() if now - last_log >= LOG_EVERY_SECONDS or done >= total: elapsed = now - started pct = (100.0 * done / total) if total else 100.0 eta = (elapsed / pct * (100.0 - pct)) if pct > 0 else 0.0 logger.info( "%s deleted: %.1f%% (%s/%s) elapsed %s eta %s", label, pct, f"{done:,}", f"{total:,}", _fmt_duration(elapsed), _fmt_duration(eta), ) last_log = now logger.info( "deleted %s %s in %s", f"{done:,}", label, _fmt_duration(time.monotonic() - started), ) def downgrade() -> None: # Irreversible: deleted users and their cascaded data cannot be restored. # No-op so the downgrade chain can still pass through this revision. logger.warning( "migration 164 (remove_inactive_users) is irreversible; " "downgrade is a no-op (deleted users/data are not restored)" )