From e13ca675d97c7308ed77b754fdcc97afc393dd59 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 2 Apr 2026 20:26:34 -0700 Subject: [PATCH 1/2] chore: optimize zero publication column migration process - Updated migration instructions to emphasize the importance of stopping zero-cache before and after running the migration. - Added a function to terminate blocked PIDs that could interfere with the migration. - Set a lock timeout to prevent deadlocks during the migration process. --- ..._optimize_zero_publication_column_lists.py | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py b/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py index 3c2d34c76..78a26a381 100644 --- a/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py +++ b/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py @@ -11,10 +11,11 @@ to FULL for the old Electric SQL setup (migration 66/75/76). With DEFAULT (primary-key) identity, column-list publications only need to include the PK — not every column. -After running this migration you MUST: - 1. Stop zero-cache - 2. Delete / reset the zero-cache data volume - 3. Restart zero-cache (it will do a fresh initial sync) +IMPORTANT — before AND after running this migration: + 1. Stop zero-cache (it holds replication locks that will deadlock DDL) + 2. Run: alembic upgrade head + 3. Delete / reset the zero-cache data volume + 4. Restart zero-cache (it will do a fresh initial sync) Revision ID: 117 Revises: 116 @@ -62,9 +63,29 @@ CREATE PUBLICATION {PUBLICATION_NAME} FOR TABLE """ +def _terminate_blocked_pids(conn, table: str) -> None: + """Kill backends whose locks on *table* would block our AccessExclusiveLock.""" + conn.execute( + sa.text( + "SELECT pg_terminate_backend(l.pid) " + "FROM pg_locks l " + "JOIN pg_class c ON c.oid = l.relation " + "WHERE c.relname = :tbl " + " AND l.pid != pg_backend_pid()" + ), + {"tbl": table}, + ) + + def upgrade() -> None: conn = op.get_bind() + conn.execute(sa.text("SET lock_timeout = '10s'")) + + for tbl in sorted(TABLES_WITH_FULL_IDENTITY): + _terminate_blocked_pids(conn, tbl) + conn.execute(sa.text(f'LOCK TABLE "{tbl}" IN ACCESS EXCLUSIVE MODE')) + for tbl in TABLES_WITH_FULL_IDENTITY: conn.execute(sa.text(f'ALTER TABLE "{tbl}" REPLICA IDENTITY DEFAULT')) From 6b06d3abb1ce5674339db4d18d52a49f5cc9eb76 Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 2 Apr 2026 20:38:10 -0700 Subject: [PATCH 2/2] chore: optimize zero publication column migration process - Updated migration instructions to emphasize the importance of stopping zero-cache before and after running the migration. - Added a function to terminate blocked PIDs that could interfere with the migration. - Set a lock timeout to prevent deadlocks during the migration process. --- ..._optimize_zero_publication_column_lists.py | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py b/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py index 3c2d34c76..78a26a381 100644 --- a/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py +++ b/surfsense_backend/alembic/versions/117_optimize_zero_publication_column_lists.py @@ -11,10 +11,11 @@ to FULL for the old Electric SQL setup (migration 66/75/76). With DEFAULT (primary-key) identity, column-list publications only need to include the PK — not every column. -After running this migration you MUST: - 1. Stop zero-cache - 2. Delete / reset the zero-cache data volume - 3. Restart zero-cache (it will do a fresh initial sync) +IMPORTANT — before AND after running this migration: + 1. Stop zero-cache (it holds replication locks that will deadlock DDL) + 2. Run: alembic upgrade head + 3. Delete / reset the zero-cache data volume + 4. Restart zero-cache (it will do a fresh initial sync) Revision ID: 117 Revises: 116 @@ -62,9 +63,29 @@ CREATE PUBLICATION {PUBLICATION_NAME} FOR TABLE """ +def _terminate_blocked_pids(conn, table: str) -> None: + """Kill backends whose locks on *table* would block our AccessExclusiveLock.""" + conn.execute( + sa.text( + "SELECT pg_terminate_backend(l.pid) " + "FROM pg_locks l " + "JOIN pg_class c ON c.oid = l.relation " + "WHERE c.relname = :tbl " + " AND l.pid != pg_backend_pid()" + ), + {"tbl": table}, + ) + + def upgrade() -> None: conn = op.get_bind() + conn.execute(sa.text("SET lock_timeout = '10s'")) + + for tbl in sorted(TABLES_WITH_FULL_IDENTITY): + _terminate_blocked_pids(conn, tbl) + conn.execute(sa.text(f'LOCK TABLE "{tbl}" IN ACCESS EXCLUSIVE MODE')) + for tbl in TABLES_WITH_FULL_IDENTITY: conn.execute(sa.text(f'ALTER TABLE "{tbl}" REPLICA IDENTITY DEFAULT'))