diff --git a/surfsense_backend/alembic/versions/29_add_unique_identifier_hash_to_documents.py b/surfsense_backend/alembic/versions/29_add_unique_identifier_hash_to_documents.py index cf3486473..4cceaf86f 100644 --- a/surfsense_backend/alembic/versions/29_add_unique_identifier_hash_to_documents.py +++ b/surfsense_backend/alembic/versions/29_add_unique_identifier_hash_to_documents.py @@ -47,8 +47,48 @@ def upgrade() -> None: def downgrade() -> None: - op.drop_constraint( - op.f("uq_documents_unique_identifier_hash"), "documents", type_="unique" - ) - op.drop_index(op.f("ix_documents_unique_identifier_hash"), table_name="documents") - op.drop_column("documents", "unique_identifier_hash") + # Drop the unique constraint if it exists + op.execute(""" + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 + FROM pg_constraint c + JOIN pg_class t ON t.oid = c.conrelid + WHERE c.conname = 'uq_documents_unique_identifier_hash' + AND t.relname = 'documents' + ) THEN + ALTER TABLE documents DROP CONSTRAINT uq_documents_unique_identifier_hash; + END IF; + END$$; + """) + + # Drop the index if it exists + op.execute(""" + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 + FROM pg_indexes + WHERE tablename = 'documents' + AND indexname = 'ix_documents_unique_identifier_hash' + ) THEN + DROP INDEX ix_documents_unique_identifier_hash; + END IF; + END$$; + """) + + # Drop the column if it exists + op.execute(""" + DO $$ + BEGIN + IF EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE table_name='documents' + AND column_name='unique_identifier_hash' + ) THEN + ALTER TABLE documents DROP COLUMN unique_identifier_hash; + END IF; + END$$; + """) diff --git a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py index 1067cffcc..d582f9681 100644 --- a/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py +++ b/surfsense_backend/alembic/versions/40_move_llm_preferences_to_searchspace.py @@ -12,6 +12,7 @@ of a search space, rather than being per-user. """ import sqlalchemy as sa +from sqlalchemy import inspect from alembic import op @@ -23,25 +24,29 @@ depends_on = None def upgrade(): - # Add LLM preference columns to searchspaces table - op.add_column( - "searchspaces", - sa.Column("long_context_llm_id", sa.Integer(), nullable=True), - ) - op.add_column( - "searchspaces", - sa.Column("fast_llm_id", sa.Integer(), nullable=True), - ) - op.add_column( - "searchspaces", - sa.Column("strategic_llm_id", sa.Integer(), nullable=True), - ) + connection = op.get_bind() + inspector = inspect(connection) + columns = [col["name"] for col in inspector.get_columns("searchspaces")] + + # Add LLM preference columns to searchspaces table if they don't exist + if "long_context_llm_id" not in columns: + op.add_column( + "searchspaces", + sa.Column("long_context_llm_id", sa.Integer(), nullable=True), + ) + if "fast_llm_id" not in columns: + op.add_column( + "searchspaces", + sa.Column("fast_llm_id", sa.Integer(), nullable=True), + ) + if "strategic_llm_id" not in columns: + op.add_column( + "searchspaces", + sa.Column("strategic_llm_id", sa.Integer(), nullable=True), + ) # Migrate existing preferences from user_search_space_preferences to searchspaces - # We take the owner's preferences (the user who created the search space) - connection = op.get_bind() - - # Get all search spaces and their owner's preferences + # Take the owner's preferences (the user who created the search space) connection.execute( sa.text(""" UPDATE searchspaces ss @@ -57,7 +62,14 @@ def upgrade(): def downgrade(): - # Remove LLM preference columns from searchspaces table - op.drop_column("searchspaces", "strategic_llm_id") - op.drop_column("searchspaces", "fast_llm_id") - op.drop_column("searchspaces", "long_context_llm_id") + connection = op.get_bind() + inspector = inspect(connection) + columns = [col["name"] for col in inspector.get_columns("searchspaces")] + + # Remove columns only if they exist + if "strategic_llm_id" in columns: + op.drop_column("searchspaces", "strategic_llm_id") + if "fast_llm_id" in columns: + op.drop_column("searchspaces", "fast_llm_id") + if "long_context_llm_id" in columns: + op.drop_column("searchspaces", "long_context_llm_id") diff --git a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py index 32e7780eb..5206bb659 100644 --- a/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py +++ b/surfsense_backend/alembic/versions/43_add_blocknote_fields_to_documents.py @@ -27,26 +27,39 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Upgrade schema - Add BlockNote fields and trigger population task.""" - # Add the columns - op.add_column( - "documents", - sa.Column( - "blocknote_document", postgresql.JSONB(astext_type=sa.Text()), nullable=True - ), - ) - op.add_column( - "documents", - sa.Column( - "content_needs_reindexing", - sa.Boolean(), - nullable=False, - server_default=sa.false(), - ), - ) - op.add_column( - "documents", - sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True), - ) + # Get existing columns to avoid duplicates + conn = op.get_bind() + existing_columns = [ + col["name"] for col in sa.inspect(conn).get_columns("documents") + ] + + # Add the columns if they don't exist + if "blocknote_document" not in existing_columns: + op.add_column( + "documents", + sa.Column( + "blocknote_document", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + + if "content_needs_reindexing" not in existing_columns: + op.add_column( + "documents", + sa.Column( + "content_needs_reindexing", + sa.Boolean(), + nullable=False, + server_default=sa.false(), + ), + ) + + if "last_edited_at" not in existing_columns: + op.add_column( + "documents", + sa.Column("last_edited_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) # Trigger the Celery task to populate blocknote_document for existing documents try: @@ -60,7 +73,6 @@ def upgrade() -> None: "✓ Queued Celery task to populate blocknote_document for existing documents" ) except Exception as e: - # If Celery is not available or task queueing fails, log but don't fail the migration print(f"⚠ Warning: Could not queue blocknote population task: {e}") print(" You can manually trigger it later with:") print( diff --git a/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py b/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py index 8a0d3b875..39572d76e 100644 --- a/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py +++ b/surfsense_backend/alembic/versions/45_add_updated_at_to_documents.py @@ -12,6 +12,7 @@ for efficient time-based filtering. from collections.abc import Sequence import sqlalchemy as sa +from sqlalchemy import inspect from alembic import op @@ -24,19 +25,28 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Upgrade schema - Add updated_at field with index to documents.""" - op.add_column( - "documents", - sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True), - ) - op.create_index( - "ix_documents_updated_at", - "documents", - ["updated_at"], - ) + connection = op.get_bind() + inspector = inspect(connection) + columns = [col["name"] for col in inspector.get_columns("documents")] + + if "updated_at" not in columns: + op.add_column( + "documents", + sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) + op.create_index( + "ix_documents_updated_at", + "documents", + ["updated_at"], + ) def downgrade() -> None: """Downgrade schema - Remove updated_at field and index.""" - # Use if_exists to handle cases where index wasn't created (migration modified after apply) - op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True) - op.drop_column("documents", "updated_at") + connection = op.get_bind() + inspector = inspect(connection) + columns = [col["name"] for col in inspector.get_columns("documents")] + + if "updated_at" in columns: + op.drop_index("ix_documents_updated_at", table_name="documents", if_exists=True) + op.drop_column("documents", "updated_at") diff --git a/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py b/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py index 958a91807..dc6e43f8c 100644 --- a/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py +++ b/surfsense_backend/alembic/versions/46_remove_last_edited_at_from_documents.py @@ -12,6 +12,7 @@ to track all document updates (indexers, processors, and editor). from collections.abc import Sequence import sqlalchemy as sa +from sqlalchemy import inspect, text from alembic import op @@ -24,29 +25,33 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Upgrade schema - Migrate last_edited_at to updated_at, then remove last_edited_at.""" - # Step 1: Copy last_edited_at values to updated_at where updated_at is NULL - # This preserves edit timestamps for documents that were edited via BlockNote - op.execute( - """ - UPDATE documents - SET updated_at = last_edited_at - WHERE last_edited_at IS NOT NULL - AND updated_at IS NULL - """ - ) + conn = op.get_bind() + inspector = inspect(conn) + columns = [col["name"] for col in inspector.get_columns("documents")] - # Step 2: For documents where both exist, use the most recent timestamp - op.execute( - """ - UPDATE documents - SET updated_at = GREATEST(updated_at, last_edited_at) - WHERE last_edited_at IS NOT NULL - AND updated_at IS NOT NULL - """ - ) + if "last_edited_at" in columns: + # Step 1: Copy last_edited_at values to updated_at where updated_at is NULL + conn.execute( + text(""" + UPDATE documents + SET updated_at = last_edited_at + WHERE last_edited_at IS NOT NULL + AND updated_at IS NULL + """) + ) - # Step 3: Drop the last_edited_at column - op.drop_column("documents", "last_edited_at") + # Step 2: For documents where both exist, use the most recent timestamp + conn.execute( + text(""" + UPDATE documents + SET updated_at = GREATEST(updated_at, last_edited_at) + WHERE last_edited_at IS NOT NULL + AND updated_at IS NOT NULL + """) + ) + + # Step 3: Drop the last_edited_at column + op.drop_column("documents", "last_edited_at") def downgrade() -> None: diff --git a/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py b/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py index 6fa65a858..0a733efd9 100644 --- a/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py +++ b/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py @@ -66,6 +66,22 @@ def upgrade() -> None: def downgrade() -> None: - op.drop_constraint(op.f("uq_documents_content_hash"), "documents", type_="unique") - op.drop_index(op.f("ix_documents_content_hash"), table_name="documents") - op.drop_column("documents", "content_hash") + bind = op.get_bind() + inspector = inspect(bind) + + # Get existing constraints and indexes on documents + constraints = [c["name"] for c in inspector.get_unique_constraints("documents")] + indexes = [i["name"] for i in inspector.get_indexes("documents")] + columns = [col["name"] for col in inspector.get_columns("documents")] + + # Drop unique constraint if it exists + if "uq_documents_content_hash" in constraints: + op.drop_constraint("uq_documents_content_hash", "documents", type_="unique") + + # Drop index if it exists + if "ix_documents_content_hash" in indexes: + op.drop_index("ix_documents_content_hash", table_name="documents") + + # Drop column if it exists + if "content_hash" in columns: + op.drop_column("documents", "content_hash")