revert: restore alembic migrations to match upstream/dev

- Reverted alembic migration files to upstream/dev version - Added new migration 44_add_bookstack_connector_enums.py from upstream - Removed local modifications to migrations 1, 40, and 43
2026-05-11 16:52:38 +02:00 · 2025-12-08 10:53:12 +00:00 · 2025-12-08 10:53:12 +00:00 · 6566ae209f
commit 6566ae209f
parent 1d49378363
3 changed files with 106 additions and 125 deletions
--- a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py
+++ b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py
@ -2,12 +2,17 @@

 Revision ID: 1
 Revises:
+
 """

 from collections.abc import Sequence

 from alembic import op

+# Import pgvector if needed for other types, though not for this ENUM change
+# import pgvector
+
+
 # revision identifiers, used by Alembic.
 revision: str = "1"
 down_revision: str | None = None
@ -16,24 +21,10 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
-    # Ensure the enum type exists
-    op.execute(
-        """
-DO $$
-BEGIN
-    IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'searchsourceconnectortype') THEN
-        CREATE TYPE searchsourceconnectortype AS ENUM(
-            'SERPER_API', 
-            'TAVILY_API', 
-            'SLACK_CONNECTOR', 
-            'NOTION_CONNECTOR'
-        );
-    END IF;
-END$$;
-"""
-    )
+    # ### commands auto generated by Alembic - please adjust! ###

-    # Add the new enum value if it doesn't exist
+    # Manually add the command to add the enum value
+    # Note: It's generally better to let autogenerate handle this, but we're bypassing it
    op.execute(
        """
 DO $$
@ -52,31 +43,30 @@ END$$;
 """
    )

+    # Pass for the rest, as autogenerate didn't run to add other schema details
+    pass
+    # ### end Alembic commands ###
+

 def downgrade() -> None:
-    # Removing an enum value safely requires recreating the type
+    # ### commands auto generated by Alembic - please adjust! ###
+
+    # Downgrading removal of an enum value is complex and potentially dangerous
+    # if the value is in use. Often omitted or requires manual SQL based on context.
+    # For now, we'll just pass. If you needed to reverse this, you'd likely
+    # have to manually check if 'GITHUB_CONNECTOR' is used in the table
+    # and then potentially recreate the type without it.
    op.execute(
-        """
-DO $$
-BEGIN
-    -- Rename existing type
-    ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old;
-
-    -- Create new type without GITHUB_CONNECTOR
-    CREATE TYPE searchsourceconnectortype AS ENUM(
-        'SERPER_API', 
-        'TAVILY_API', 
-        'SLACK_CONNECTOR', 
-        'NOTION_CONNECTOR'
-    );
-
-    -- Update table columns to use new type
-    ALTER TABLE search_source_connectors
-    ALTER COLUMN connector_type TYPE searchsourceconnectortype
-    USING connector_type::text::searchsourceconnectortype;
-
-    -- Drop old type
-    DROP TYPE searchsourceconnectortype_old;
-END$$;
-"""
+        "ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old"
    )
+    op.execute(
+        "CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR')"
+    )
+    op.execute(
+        "ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING "
+        "connector_type::text::searchsourceconnectortype"
+    )
+    op.execute("DROP TYPE searchsourceconnectortype_old")
+
+    pass
+    # ### end Alembic commands ###
--- a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py
+++ b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py
@ -1,6 +1,19 @@
+"""Move LLM preferences from user-level to search space level
+
+Revision ID: 40
+Revises: 39
+Create Date: 2024-11-27
+
+This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id)
+from the user_search_space_preferences table to the searchspaces table itself.
+
+This change supports the RBAC model where LLM preferences are shared by all members
+of a search space, rather than being per-user.
+"""
+
 import sqlalchemy as sa
+
 from alembic import op
-from sqlalchemy import inspect

 # revision identifiers, used by Alembic.
 revision = "40"
@ -10,32 +23,26 @@ depends_on = None


 def upgrade():
-    conn = op.get_bind()
-    inspector = inspect(conn)
+    # Add LLM preference columns to searchspaces table
+    op.add_column(
+        "searchspaces",
+        sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
+    )
+    op.add_column(
+        "searchspaces",
+        sa.Column("fast_llm_id", sa.Integer(), nullable=True),
+    )
+    op.add_column(
+        "searchspaces",
+        sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
+    )

-    existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")}
+    # Migrate existing preferences from user_search_space_preferences to searchspaces
+    # We take the owner's preferences (the user who created the search space)
+    connection = op.get_bind()

-    # Add columns only if they don't already exist
-    if "long_context_llm_id" not in existing_cols:
-        op.add_column(
-            "searchspaces",
-            sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
-        )
-
-    if "fast_llm_id" not in existing_cols:
-        op.add_column(
-            "searchspaces",
-            sa.Column("fast_llm_id", sa.Integer(), nullable=True),
-        )
-
-    if "strategic_llm_id" not in existing_cols:
-        op.add_column(
-            "searchspaces",
-            sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
-        )
-
-    # Migrate existing data
-    conn.execute(
+    # Get all search spaces and their owner's preferences
+    connection.execute(
        sa.text("""
            UPDATE searchspaces ss
            SET 
@ -50,16 +57,7 @@ def upgrade():


 def downgrade():
-    conn = op.get_bind()
-    inspector = inspect(conn)
-    existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")}
-
-    # Drop columns only if they exist
-    if "strategic_llm_id" in existing_cols:
-        op.drop_column("searchspaces", "strategic_llm_id")
-
-    if "fast_llm_id" in existing_cols:
-        op.drop_column("searchspaces", "fast_llm_id")
-
-    if "long_context_llm_id" in existing_cols:
-        op.drop_column("searchspaces", "long_context_llm_id")
+    # Remove LLM preference columns from searchspaces table
+    op.drop_column("searchspaces", "strategic_llm_id")
+    op.drop_column("searchspaces", "fast_llm_id")
+    op.drop_column("searchspaces", "long_context_llm_id")
--- a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py
+++ b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py
@ -16,7 +16,6 @@ import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

 from alembic import op
-from sqlalchemy import inspect

 # revision identifiers, used by Alembic.
 revision: str = "43"
@ -26,57 +25,51 @@ depends_on: str | Sequence[str] | None = None


 def upgrade() -> None:
-    """Upgrade schema - Add BlockNote fields (idempotent)."""
+    """Upgrade schema - Add BlockNote fields and trigger population task."""

-    conn = op.get_bind()
-    inspector = inspect(conn)
-    existing_cols = {c["name"] for c in inspector.get_columns("documents")}
+    # Add the columns
+    op.add_column(
+        "documents",
+        sa.Column(
+            "blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True
+        ),
+    )
+    op.add_column(
+        "documents",
+        sa.Column(
+            "content_needs_reindexing",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.false(),
+        ),
+    )
+    op.add_column(
+        "documents",
+        sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
+    )

-    # Add blocknote_document (JSONB) if doest not exist
-    if "blocknote_document" not in existing_cols:
-        op.add_column(
-            "documents",
-            sa.Column(
-                "blocknote_document",
-                postgresql.JSONB(astext_type=sa.Text()),
-                nullable=True,
-            ),
+    # Trigger the Celery task to populate blocknote_document for existing documents
+    try:
+        from app.tasks.celery_tasks.blocknote_migration_tasks import (
+            populate_blocknote_for_documents_task,
        )

-    # Add content_needs_reindexing (boolean) if doest not exist
-    if "content_needs_reindexing" not in existing_cols:
-        op.add_column(
-            "documents",
-            sa.Column(
-                "content_needs_reindexing",
-                sa.Boolean(),
-                nullable=False,
-                server_default=sa.false(),
-            ),
+        # Queue the task to run asynchronously
+        populate_blocknote_for_documents_task.apply_async()
+        print(
+            "✓ Queued Celery task to populate blocknote_document for existing documents"
        )
-
-    # Add last_edited_at (timestamp with tz) if doest not exist
-    if "last_edited_at" not in existing_cols:
-        op.add_column(
-            "documents",
-            sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True),
+    except Exception as e:
+        # If Celery is not available or task queueing fails, log but don't fail the migration
+        print(f"⚠ Warning: Could not queue blocknote population task: {e}")
+        print("  You can manually trigger it later with:")
+        print(
+            "  celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task"
        )

-    # NOTE: We intentionally do NOT import or queue Celery tasks here.
-    # Running background jobs during migrations causes hard-to-debug failures.
-    # After running migrations, trigger the backfill task manually (instructions below).
-

 def downgrade() -> None:
-    """Downgrade schema - Remove BlockNote fields (only if present)."""
-
-    conn = op.get_bind()
-    inspector = inspect(conn)
-    existing_cols = {c["name"] for c in inspector.get_columns("documents")}
-
-    if "last_edited_at" in existing_cols:
-        op.drop_column("documents", "last_edited_at")
-    if "content_needs_reindexing" in existing_cols:
-        op.drop_column("documents", "content_needs_reindexing")
-    if "blocknote_document" in existing_cols:
-        op.drop_column("documents", "blocknote_document")
+    """Downgrade schema - Remove BlockNote fields."""
+    op.drop_column("documents", "last_edited_at")
+    op.drop_column("documents", "content_needs_reindexing")
+    op.drop_column("documents", "blocknote_document")