Merge pull request #577 from CREDO23/fix-alembic-version-files

[Fix] Alembic version files
2026-06-08 20:25:19 +02:00 · 2025-12-13 14:05:59 -08:00 · 2025-12-13 14:05:59 -08:00 · 136451fff5
commit 136451fff5
parent bb229522dc cc03900d63
6 changed files with 178 additions and 83 deletions
--- a/surfsense_backend/alembic/versions/29_add_unique_identifier_hash_to_documents.py
+++ b/surfsense_backend/alembic/versions/29_add_unique_identifier_hash_to_documents.py
@ -47,8 +47,48 @@ def upgrade() -> None:


 def downgrade() -> None:
-    op.drop_constraint(
-        op.f("uq_documents_unique_identifier_hash"), "documents", type_="unique"
-    )
-    op.drop_index(op.f("ix_documents_unique_identifier_hash"), table_name="documents")
-    op.drop_column("documents", "unique_identifier_hash")
+    # Drop the unique constraint if it exists
+    op.execute("""
+    DO $$
+    BEGIN
+        IF EXISTS (
+            SELECT 1 
+            FROM pg_constraint c
+            JOIN pg_class t ON t.oid = c.conrelid
+            WHERE c.conname = 'uq_documents_unique_identifier_hash'
+              AND t.relname = 'documents'
+        ) THEN
+            ALTER TABLE documents DROP CONSTRAINT uq_documents_unique_identifier_hash;
+        END IF;
+    END$$;
+    """)
+
+    # Drop the index if it exists
+    op.execute("""
+    DO $$
+    BEGIN
+        IF EXISTS (
+            SELECT 1
+            FROM pg_indexes
+            WHERE tablename = 'documents'
+              AND indexname = 'ix_documents_unique_identifier_hash'
+        ) THEN
+            DROP INDEX ix_documents_unique_identifier_hash;
+        END IF;
+    END$$;
+    """)
+
+    # Drop the column if it exists
+    op.execute("""
+    DO $$
+    BEGIN
+        IF EXISTS (
+            SELECT 1
+            FROM information_schema.columns
+            WHERE table_name='documents'
+              AND column_name='unique_identifier_hash'
+        ) THEN
+            ALTER TABLE documents DROP COLUMN unique_identifier_hash;
+        END IF;
+    END$$;
+    """)
--- a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py
+++ b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py
@ -12,6 +12,7 @@ of a search space, rather than being per-user.
 """

 import sqlalchemy as sa
+from sqlalchemy import inspect

 from alembic import op

@ -23,25 +24,29 @@ depends_on = None


 def upgrade():
-    # Add LLM preference columns to searchspaces table
-    op.add_column(
-        "searchspaces",
-        sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
-    )
-    op.add_column(
-        "searchspaces",
-        sa.Column("fast_llm_id", sa.Integer(), nullable=True),
-    )
-    op.add_column(
-        "searchspaces",
-        sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
-    )
+    connection = op.get_bind()
+    inspector = inspect(connection)
+    columns = [col["name"] for col in inspector.get_columns("searchspaces")]
+
+    # Add LLM preference columns to searchspaces table if they don't exist
+    if "long_context_llm_id" not in columns:
+        op.add_column(
+            "searchspaces",
+            sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
+        )
+    if "fast_llm_id" not in columns:
+        op.add_column(
+            "searchspaces",
+            sa.Column("fast_llm_id", sa.Integer(), nullable=True),
+        )
+    if "strategic_llm_id" not in columns:
+        op.add_column(
+            "searchspaces",
+            sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
+        )

    # Migrate existing preferences from user_search_space_preferences to searchspaces
-    # We take the owner's preferences (the user who created the search space)
-    connection = op.get_bind()
-
-    # Get all search spaces and their owner's preferences
+    # Take the owner's preferences (the user who created the search space)
    connection.execute(
        sa.text("""
            UPDATE searchspaces ss
@ -57,7 +62,14 @@ def upgrade():


 def downgrade():
-    # Remove LLM preference columns from searchspaces table
-    op.drop_column("searchspaces", "strategic_llm_id")
-    op.drop_column("searchspaces", "fast_llm_id")
-    op.drop_column("searchspaces", "long_context_llm_id")
+    connection = op.get_bind()
+    inspector = inspect(connection)
+    columns = [col["name"] for col in inspector.get_columns("searchspaces")]
+
+    # Remove columns only if they exist
+    if "strategic_llm_id" in columns:
+        op.drop_column("searchspaces", "strategic_llm_id")
+    if "fast_llm_id" in columns:
+        op.drop_column("searchspaces", "fast_llm_id")
+    if "long_context_llm_id" in columns:
+        op.drop_column("searchspaces", "long_context_llm_id")
--- a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py
+++ b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py
@ -27,26 +27,39 @@ depends_on: str | Sequence[str] | None = None
 def upgrade() -> None:
    """Upgrade schema - Add BlockNote fields and trigger population task."""

-    # Add the columns
-    op.add_column(
-        "documents",
-        sa.Column(
-            "blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
-        ),
-    )
-    op.add_column(
-        "documents",
-        sa.Column(
-            "content_needs_reindexing",
-            sa.Boolean(),
-            nullable=False,
-            server_default=sa.false(),
-        ),
-    )
-    op.add_column(
-        "documents",
-        sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
-    )
+    # Get existing columns to avoid duplicates
+    conn = op.get_bind()
+    existing_columns = [
+        col["name"] for col in sa.inspect(conn).get_columns("documents")
+    ]
+
+    # Add the columns if they don't exist
+    if "blocknote_document" not in existing_columns:
+        op.add_column(
+            "documents",
+            sa.Column(
+                "blocknote_document",
+                postgresql.JSONB(astext_type=sa.Text()),
+                nullable=True,
+            ),
+        )
+
+    if "content_needs_reindexing" not in existing_columns:
+        op.add_column(
+            "documents",
+            sa.Column(
+                "content_needs_reindexing",
+                sa.Boolean(),
+                nullable=False,
+                server_default=sa.false(),
+            ),
+        )
+
+    if "last_edited_at" not in existing_columns:
+        op.add_column(
+            "documents",
+            sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
+        )

    # Trigger the Celery task to populate blocknote_document for existing documents
    try:
@ -60,7 +73,6 @@ def upgrade() -> None:
            "✓ Queued Celery task to populate blocknote_document for existing documents"
        )
    except Exception as e:
-        # If Celery is not available or task queueing fails, log but don't fail the migration
        print(f"⚠ Warning: Could not queue blocknote population task: {e}")
        print("  You can manually trigger it later with:")
        print(
--- a/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py
+++ b/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py
@ -12,6 +12,7 @@ for efficient time-based filtering.
 from collections.abc import Sequence

 import sqlalchemy as sa
+from sqlalchemy import inspect

 from alembic import op

@ -24,19 +25,28 @@ depends_on: str | Sequence[str] | None = None

 def upgrade() -> None:
    """Upgrade schema - Add updated_at field with index to documents."""
-    op.add_column(
-        "documents",
-        sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True),
-    )
-    op.create_index(
-        "ix_documents_updated_at",
-        "documents",
-        ["updated_at"],
-    )
+    connection = op.get_bind()
+    inspector = inspect(connection)
+    columns = [col["name"] for col in inspector.get_columns("documents")]
+
+    if "updated_at" not in columns:
+        op.add_column(
+            "documents",
+            sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True),
+        )
+        op.create_index(
+            "ix_documents_updated_at",
+            "documents",
+            ["updated_at"],
+        )


 def downgrade() -> None:
    """Downgrade schema - Remove updated_at field and index."""
-    # Use if_exists to handle cases where index wasn't created (migration modified after apply)
-    op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True)
-    op.drop_column("documents", "updated_at")
+    connection = op.get_bind()
+    inspector = inspect(connection)
+    columns = [col["name"] for col in inspector.get_columns("documents")]
+
+    if "updated_at" in columns:
+        op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True)
+        op.drop_column("documents", "updated_at")
--- a/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py
+++ b/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py
@ -12,6 +12,7 @@ to track all document updates (indexers, processors, and editor).
 from collections.abc import Sequence

 import sqlalchemy as sa
+from sqlalchemy import inspect, text

 from alembic import op

@ -24,29 +25,33 @@ depends_on: str | Sequence[str] | None = None

 def upgrade() -> None:
    """Upgrade schema - Migrate last_edited_at to updated_at, then remove last_edited_at."""
-    # Step 1: Copy last_edited_at values to updated_at where updated_at is NULL
-    # This preserves edit timestamps for documents that were edited via BlockNote
-    op.execute(
-        """
-        UPDATE documents
-        SET updated_at = last_edited_at
-        WHERE last_edited_at IS NOT NULL
-          AND updated_at IS NULL
-        """
-    )
+    conn = op.get_bind()
+    inspector = inspect(conn)
+    columns = [col["name"] for col in inspector.get_columns("documents")]

-    # Step 2: For documents where both exist, use the most recent timestamp
-    op.execute(
-        """
-        UPDATE documents
-        SET updated_at = GREATEST(updated_at, last_edited_at)
-        WHERE last_edited_at IS NOT NULL
-          AND updated_at IS NOT NULL
-        """
-    )
+    if "last_edited_at" in columns:
+        # Step 1: Copy last_edited_at values to updated_at where updated_at is NULL
+        conn.execute(
+            text("""
+                UPDATE documents
+                SET updated_at = last_edited_at
+                WHERE last_edited_at IS NOT NULL
+                  AND updated_at IS NULL
+            """)
+        )

-    # Step 3: Drop the last_edited_at column
-    op.drop_column("documents", "last_edited_at")
+        # Step 2: For documents where both exist, use the most recent timestamp
+        conn.execute(
+            text("""
+                UPDATE documents
+                SET updated_at = GREATEST(updated_at, last_edited_at)
+                WHERE last_edited_at IS NOT NULL
+                  AND updated_at IS NOT NULL
+            """)
+        )
+
+        # Step 3: Drop the last_edited_at column
+        op.drop_column("documents", "last_edited_at")


 def downgrade() -> None:
--- a/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py
+++ b/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py
@ -66,6 +66,22 @@ def upgrade() -> None:


 def downgrade() -> None:
-    op.drop_constraint(op.f("uq_documents_content_hash"), "documents", type_="unique")
-    op.drop_index(op.f("ix_documents_content_hash"), table_name="documents")
-    op.drop_column("documents", "content_hash")
+    bind = op.get_bind()
+    inspector = inspect(bind)
+
+    # Get existing constraints and indexes on documents
+    constraints = [c["name"] for c in inspector.get_unique_constraints("documents")]
+    indexes = [i["name"] for i in inspector.get_indexes("documents")]
+    columns = [col["name"] for col in inspector.get_columns("documents")]
+
+    # Drop unique constraint if it exists
+    if "uq_documents_content_hash" in constraints:
+        op.drop_constraint("uq_documents_content_hash", "documents", type_="unique")
+
+    # Drop index if it exists
+    if "ix_documents_content_hash" in indexes:
+        op.drop_index("ix_documents_content_hash", table_name="documents")
+
+    # Drop column if it exists
+    if "content_hash" in columns:
+        op.drop_column("documents", "content_hash")