diff --git a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py index a359f8962..235908b1f 100644 --- a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py +++ b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py @@ -2,12 +2,17 @@ Revision ID: 1 Revises: + """ from collections.abc import Sequence from alembic import op +# Import pgvector if needed for other types, though not for this ENUM change +# import pgvector + + # revision identifiers, used by Alembic. revision: str = "1" down_revision: str | None = None @@ -16,24 +21,10 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: - # Ensure the enum type exists - op.execute( - """ -DO $$ -BEGIN - IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'searchsourceconnectortype') THEN - CREATE TYPE searchsourceconnectortype AS ENUM( - 'SERPER_API', - 'TAVILY_API', - 'SLACK_CONNECTOR', - 'NOTION_CONNECTOR' - ); - END IF; -END$$; -""" - ) + # ### commands auto generated by Alembic - please adjust! ### - # Add the new enum value if it doesn't exist + # Manually add the command to add the enum value + # Note: It's generally better to let autogenerate handle this, but we're bypassing it op.execute( """ DO $$ @@ -52,31 +43,30 @@ END$$; """ ) + # Pass for the rest, as autogenerate didn't run to add other schema details + pass + # ### end Alembic commands ### + def downgrade() -> None: - # Removing an enum value safely requires recreating the type + # ### commands auto generated by Alembic - please adjust! ### + + # Downgrading removal of an enum value is complex and potentially dangerous + # if the value is in use. Often omitted or requires manual SQL based on context. + # For now, we'll just pass. If you needed to reverse this, you'd likely + # have to manually check if 'GITHUB_CONNECTOR' is used in the table + # and then potentially recreate the type without it. op.execute( - """ -DO $$ -BEGIN - -- Rename existing type - ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old; - - -- Create new type without GITHUB_CONNECTOR - CREATE TYPE searchsourceconnectortype AS ENUM( - 'SERPER_API', - 'TAVILY_API', - 'SLACK_CONNECTOR', - 'NOTION_CONNECTOR' - ); - - -- Update table columns to use new type - ALTER TABLE search_source_connectors - ALTER COLUMN connector_type TYPE searchsourceconnectortype - USING connector_type::text::searchsourceconnectortype; - - -- Drop old type - DROP TYPE searchsourceconnectortype_old; -END$$; -""" + "ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old" ) + op.execute( + "CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR')" + ) + op.execute( + "ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING " + "connector_type::text::searchsourceconnectortype" + ) + op.execute("DROP TYPE searchsourceconnectortype_old") + + pass + # ### end Alembic commands ### diff --git a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py index 5f6ccb852..1067cffcc 100644 --- a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py +++ b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py @@ -1,6 +1,19 @@ +"""Move LLM preferences from user-level to search space level + +Revision ID: 40 +Revises: 39 +Create Date: 2024-11-27 + +This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id) +from the user_search_space_preferences table to the searchspaces table itself. + +This change supports the RBAC model where LLM preferences are shared by all members +of a search space, rather than being per-user. +""" + import sqlalchemy as sa + from alembic import op -from sqlalchemy import inspect # revision identifiers, used by Alembic. revision = "40" @@ -10,32 +23,26 @@ depends_on = None def upgrade(): - conn = op.get_bind() - inspector = inspect(conn) + # Add LLM preference columns to searchspaces table + op.add_column( + "searchspaces", + sa.Column("long_context_llm_id", sa.Integer(), nullable=True), + ) + op.add_column( + "searchspaces", + sa.Column("fast_llm_id", sa.Integer(), nullable=True), + ) + op.add_column( + "searchspaces", + sa.Column("strategic_llm_id", sa.Integer(), nullable=True), + ) - existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")} + # Migrate existing preferences from user_search_space_preferences to searchspaces + # We take the owner's preferences (the user who created the search space) + connection = op.get_bind() - # Add columns only if they don't already exist - if "long_context_llm_id" not in existing_cols: - op.add_column( - "searchspaces", - sa.Column("long_context_llm_id", sa.Integer(), nullable=True), - ) - - if "fast_llm_id" not in existing_cols: - op.add_column( - "searchspaces", - sa.Column("fast_llm_id", sa.Integer(), nullable=True), - ) - - if "strategic_llm_id" not in existing_cols: - op.add_column( - "searchspaces", - sa.Column("strategic_llm_id", sa.Integer(), nullable=True), - ) - - # Migrate existing data - conn.execute( + # Get all search spaces and their owner's preferences + connection.execute( sa.text(""" UPDATE searchspaces ss SET @@ -50,16 +57,7 @@ def upgrade(): def downgrade(): - conn = op.get_bind() - inspector = inspect(conn) - existing_cols = {col["name"] for col in inspector.get_columns("searchspaces")} - - # Drop columns only if they exist - if "strategic_llm_id" in existing_cols: - op.drop_column("searchspaces", "strategic_llm_id") - - if "fast_llm_id" in existing_cols: - op.drop_column("searchspaces", "fast_llm_id") - - if "long_context_llm_id" in existing_cols: - op.drop_column("searchspaces", "long_context_llm_id") + # Remove LLM preference columns from searchspaces table + op.drop_column("searchspaces", "strategic_llm_id") + op.drop_column("searchspaces", "fast_llm_id") + op.drop_column("searchspaces", "long_context_llm_id") diff --git a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py index ee9230a44..32e7780eb 100644 --- a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py +++ b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py @@ -16,7 +16,6 @@ import sqlalchemy as sa from sqlalchemy.dialects import postgresql from alembic import op -from sqlalchemy import inspect # revision identifiers, used by Alembic. revision: str = "43" @@ -26,57 +25,51 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: - """Upgrade schema - Add BlockNote fields (idempotent).""" + """Upgrade schema - Add BlockNote fields and trigger population task.""" - conn = op.get_bind() - inspector = inspect(conn) - existing_cols = {c["name"] for c in inspector.get_columns("documents")} + # Add the columns + op.add_column( + "documents", + sa.Column( + "blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True + ), + ) + op.add_column( + "documents", + sa.Column( + "content_needs_reindexing", + sa.Boolean(), + nullable=False, + server_default=sa.false(), + ), + ) + op.add_column( + "documents", + sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) - # Add blocknote_document (JSONB) if doest not exist - if "blocknote_document" not in existing_cols: - op.add_column( - "documents", - sa.Column( - "blocknote_document", - postgresql.JSONB(astext_type=sa.Text()), - nullable=True, - ), + # Trigger the Celery task to populate blocknote_document for existing documents + try: + from app.tasks.celery_tasks.blocknote_migration_tasks import ( + populate_blocknote_for_documents_task, ) - # Add content_needs_reindexing (boolean) if doest not exist - if "content_needs_reindexing" not in existing_cols: - op.add_column( - "documents", - sa.Column( - "content_needs_reindexing", - sa.Boolean(), - nullable=False, - server_default=sa.false(), - ), + # Queue the task to run asynchronously + populate_blocknote_for_documents_task.apply_async() + print( + "✓ Queued Celery task to populate blocknote_document for existing documents" ) - - # Add last_edited_at (timestamp with tz) if doest not exist - if "last_edited_at" not in existing_cols: - op.add_column( - "documents", - sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True), + except Exception as e: + # If Celery is not available or task queueing fails, log but don't fail the migration + print(f"⚠ Warning: Could not queue blocknote population task: {e}") + print(" You can manually trigger it later with:") + print( + " celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task" ) - # NOTE: We intentionally do NOT import or queue Celery tasks here. - # Running background jobs during migrations causes hard-to-debug failures. - # After running migrations, trigger the backfill task manually (instructions below). - def downgrade() -> None: - """Downgrade schema - Remove BlockNote fields (only if present).""" - - conn = op.get_bind() - inspector = inspect(conn) - existing_cols = {c["name"] for c in inspector.get_columns("documents")} - - if "last_edited_at" in existing_cols: - op.drop_column("documents", "last_edited_at") - if "content_needs_reindexing" in existing_cols: - op.drop_column("documents", "content_needs_reindexing") - if "blocknote_document" in existing_cols: - op.drop_column("documents", "blocknote_document") + """Downgrade schema - Remove BlockNote fields.""" + op.drop_column("documents", "last_edited_at") + op.drop_column("documents", "content_needs_reindexing") + op.drop_column("documents", "blocknote_document")