From d256fdc7a5a30e8548c3eb2e728ac311cb59e183 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:12:51 +0200 Subject: [PATCH 1/9] Make migration 1 idempotent --- .../versions/1_add_github_connector_enum.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py index 235908b1f..ee008d8c5 100644 --- a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py +++ b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py @@ -7,6 +7,8 @@ Revises: from collections.abc import Sequence +import sqlalchemy as sa + from alembic import op # Import pgvector if needed for other types, though not for this ENUM change @@ -20,9 +22,25 @@ branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None +def enum_exists(enum_name: str) -> bool: + """Check if an enum type exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)" + ), + {"enum_name": enum_name}, + ) + return result.scalar() + + def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### + # Skip if the enum doesn't exist (fresh DB after downgrade - create_db_and_tables will handle it) + if not enum_exists("searchsourceconnectortype"): + return + # Manually add the command to add the enum value # Note: It's generally better to let autogenerate handle this, but we're bypassing it op.execute( @@ -51,6 +69,10 @@ END$$; def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### + # Skip if the enum doesn't exist + if not enum_exists("searchsourceconnectortype"): + return + # Downgrading removal of an enum value is complex and potentially dangerous # if the value is in use. Often omitted or requires manual SQL based on context. # For now, we'll just pass. If you needed to reverse this, you'd likely From 2af555f1d7309cf1e247d052549b36b12cd46b59 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:12:58 +0200 Subject: [PATCH 2/9] Make migration 5 idempotent --- .../versions/5_remove_title_char_limit.py | 104 ++++++++++-------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/surfsense_backend/alembic/versions/5_remove_title_char_limit.py b/surfsense_backend/alembic/versions/5_remove_title_char_limit.py index 2e4cd56d1..afdbaa803 100644 --- a/surfsense_backend/alembic/versions/5_remove_title_char_limit.py +++ b/surfsense_backend/alembic/versions/5_remove_title_char_limit.py @@ -18,59 +18,77 @@ branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None -def upgrade() -> None: - # Alter Chat table - op.alter_column( - "chats", - "title", - existing_type=sa.String(200), - type_=sa.String(), - existing_nullable=False, +def table_exists(table_name: str) -> bool: + """Check if a table exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = :table_name)" + ), + {"table_name": table_name}, ) + return result.scalar() + + +def upgrade() -> None: + # Alter Chat table (may not exist on fresh databases, removed in migration 49) + if table_exists("chats"): + op.alter_column( + "chats", + "title", + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False, + ) # Alter Document table - op.alter_column( - "documents", - "title", - existing_type=sa.String(200), - type_=sa.String(), - existing_nullable=False, - ) + if table_exists("documents"): + op.alter_column( + "documents", + "title", + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False, + ) # Alter Podcast table - op.alter_column( - "podcasts", - "title", - existing_type=sa.String(200), - type_=sa.String(), - existing_nullable=False, - ) + if table_exists("podcasts"): + op.alter_column( + "podcasts", + "title", + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False, + ) def downgrade() -> None: # Revert Chat table - op.alter_column( - "chats", - "title", - existing_type=sa.String(), - type_=sa.String(200), - existing_nullable=False, - ) + if table_exists("chats"): + op.alter_column( + "chats", + "title", + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False, + ) # Revert Document table - op.alter_column( - "documents", - "title", - existing_type=sa.String(), - type_=sa.String(200), - existing_nullable=False, - ) + if table_exists("documents"): + op.alter_column( + "documents", + "title", + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False, + ) # Revert Podcast table - op.alter_column( - "podcasts", - "title", - existing_type=sa.String(), - type_=sa.String(200), - existing_nullable=False, - ) + if table_exists("podcasts"): + op.alter_column( + "podcasts", + "title", + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False, + ) From 8ed295c0533df4195174e539f0a2af2ac7d1977d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:13:03 +0200 Subject: [PATCH 3/9] Make migration 10 idempotent --- ...e_chattype_enum_to_qna_report_structure.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py b/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py index 665585a85..a4f6db0b8 100644 --- a/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py +++ b/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py @@ -6,6 +6,8 @@ Revises: 9 from collections.abc import Sequence +import sqlalchemy as sa + from alembic import op # revision identifiers, used by Alembic. @@ -18,9 +20,37 @@ depends_on: str | Sequence[str] | None = None CHAT_TYPE_ENUM = "chattype" +def enum_exists(enum_name: str) -> bool: + """Check if an enum type exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)" + ), + {"enum_name": enum_name}, + ) + return result.scalar() + + +def table_exists(table_name: str) -> bool: + """Check if a table exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = :table_name)" + ), + {"table_name": table_name}, + ) + return result.scalar() + + def upgrade() -> None: """Upgrade schema - replace ChatType enum values with new QNA/REPORT structure.""" + # Skip if chats table or chattype enum doesn't exist (fresh database) + if not table_exists("chats") or not enum_exists(CHAT_TYPE_ENUM): + return + # Old enum name for temporary storage old_enum_name = f"{CHAT_TYPE_ENUM}_old" @@ -72,6 +102,10 @@ def upgrade() -> None: def downgrade() -> None: """Downgrade schema - revert ChatType enum to old GENERAL/DEEP/DEEPER/DEEPEST structure.""" + # Skip if chats table or chattype enum doesn't exist + if not table_exists("chats") or not enum_exists(CHAT_TYPE_ENUM): + return + # Old enum name for temporary storage old_enum_name = f"{CHAT_TYPE_ENUM}_old" From 60a7269ce82c87fe962bf81243bec9eed23d20bf Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:13:08 +0200 Subject: [PATCH 4/9] Make migration 24 idempotent --- .../alembic/versions/24_fix_null_chat_types.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py index e0d371f1e..e513605f0 100644 --- a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py +++ b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py @@ -7,6 +7,8 @@ Revises: 23 from collections.abc import Sequence +import sqlalchemy as sa + from alembic import op # revision identifiers, used by Alembic. @@ -16,11 +18,27 @@ branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None +def table_exists(table_name: str) -> bool: + """Check if a table exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = :table_name)" + ), + {"table_name": table_name}, + ) + return result.scalar() + + def upgrade() -> None: """ Fix any chats with NULL type values by setting them to QNA. This handles edge cases from previous migrations where type values were not properly migrated. """ + # Skip if chats table doesn't exist (fresh database) + if not table_exists("chats"): + return + # Update any NULL type values to QNA (the default chat type) op.execute( """ From e99e2134fa1b958827712dfebc013699015d64ea Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:13:13 +0200 Subject: [PATCH 5/9] Make migration 34 idempotent --- .../34_add_podcast_staleness_detection.py | 69 ++++++++++++------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/surfsense_backend/alembic/versions/34_add_podcast_staleness_detection.py b/surfsense_backend/alembic/versions/34_add_podcast_staleness_detection.py index 4991cd58e..74bb7fe86 100644 --- a/surfsense_backend/alembic/versions/34_add_podcast_staleness_detection.py +++ b/surfsense_backend/alembic/versions/34_add_podcast_staleness_detection.py @@ -10,6 +10,8 @@ Revises: 33 from collections.abc import Sequence +import sqlalchemy as sa + from alembic import op # revision identifiers @@ -19,42 +21,59 @@ branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None +def table_exists(table_name: str) -> bool: + """Check if a table exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = :table_name)" + ), + {"table_name": table_name}, + ) + return result.scalar() + + def upgrade() -> None: """Add columns only if they don't already exist (safe for re-runs).""" # Add 'state_version' column to chats table (default 1) - op.execute(""" - ALTER TABLE chats - ADD COLUMN IF NOT EXISTS state_version BIGINT DEFAULT 1 NOT NULL - """) + # Skip if chats table doesn't exist (fresh database) + if table_exists("chats"): + op.execute(""" + ALTER TABLE chats + ADD COLUMN IF NOT EXISTS state_version BIGINT DEFAULT 1 NOT NULL + """) # Add 'chat_state_version' column to podcasts table - op.execute(""" - ALTER TABLE podcasts - ADD COLUMN IF NOT EXISTS chat_state_version BIGINT - """) + if table_exists("podcasts"): + op.execute(""" + ALTER TABLE podcasts + ADD COLUMN IF NOT EXISTS chat_state_version BIGINT + """) - # Add 'chat_id' column to podcasts table - op.execute(""" - ALTER TABLE podcasts - ADD COLUMN IF NOT EXISTS chat_id INTEGER - """) + # Add 'chat_id' column to podcasts table + op.execute(""" + ALTER TABLE podcasts + ADD COLUMN IF NOT EXISTS chat_id INTEGER + """) def downgrade() -> None: """Remove columns only if they exist.""" - op.execute(""" - ALTER TABLE podcasts - DROP COLUMN IF EXISTS chat_state_version - """) + if table_exists("podcasts"): + op.execute(""" + ALTER TABLE podcasts + DROP COLUMN IF EXISTS chat_state_version + """) - op.execute(""" - ALTER TABLE podcasts - DROP COLUMN IF EXISTS chat_id - """) + op.execute(""" + ALTER TABLE podcasts + DROP COLUMN IF EXISTS chat_id + """) - op.execute(""" - ALTER TABLE chats - DROP COLUMN IF EXISTS state_version - """) + if table_exists("chats"): + op.execute(""" + ALTER TABLE chats + DROP COLUMN IF EXISTS state_version + """) From 6f1565555210faf9b1604700c98b4571a3d8f5ec Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:13:19 +0200 Subject: [PATCH 6/9] Make migration 49 idempotent --- .../49_migrate_old_chats_to_new_chat.py | 80 +++++++++++++------ 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py b/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py index 61a3ddb48..ef38add26 100644 --- a/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py +++ b/surfsense_backend/alembic/versions/49_migrate_old_chats_to_new_chat.py @@ -62,8 +62,25 @@ def parse_timestamp(ts, fallback): return fallback +def table_exists(table_name: str) -> bool: + """Check if a table exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = :table_name)" + ), + {"table_name": table_name}, + ) + return result.scalar() + + def upgrade() -> None: """Migrate old chats to new_chat_threads and remove old tables.""" + # Skip if chats table doesn't exist (fresh database) + if not table_exists("chats"): + print("[Migration 49] Chats table does not exist, skipping migration") + return + connection = op.get_bind() # Get all old chats @@ -176,36 +193,49 @@ def upgrade() -> None: print("[Migration 49] Migration complete!") +def enum_exists(enum_name: str) -> bool: + """Check if an enum type exists in the database.""" + conn = op.get_bind() + result = conn.execute( + sa.text( + "SELECT EXISTS (SELECT 1 FROM pg_type WHERE typname = :enum_name)" + ), + {"enum_name": enum_name}, + ) + return result.scalar() + + def downgrade() -> None: """Recreate old chats table (data cannot be restored).""" - # Recreate chattype enum + # Skip if chats table already exists + if table_exists("chats"): + print("[Migration 49 Downgrade] Chats table already exists, skipping") + return + + # Recreate chattype enum if it doesn't exist + if not enum_exists("chattype"): + op.execute( + sa.text(""" + CREATE TYPE chattype AS ENUM ('QNA') + """) + ) + + # Recreate chats table using raw SQL to avoid SQLAlchemy trying to create the enum op.execute( sa.text(""" - CREATE TYPE chattype AS ENUM ('QNA') + CREATE TABLE chats ( + id SERIAL PRIMARY KEY, + type chattype NOT NULL, + title VARCHAR NOT NULL, + initial_connectors VARCHAR[], + messages JSON NOT NULL, + state_version BIGINT NOT NULL DEFAULT 1, + search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() + ) """) ) - - # Recreate chats table - op.create_table( - "chats", - sa.Column("id", sa.Integer(), primary_key=True, index=True), - sa.Column("type", sa.Enum("QNA", name="chattype"), nullable=False), - sa.Column("title", sa.String(), nullable=False, index=True), - sa.Column("initial_connectors", sa.ARRAY(sa.String()), nullable=True), - sa.Column("messages", sa.JSON(), nullable=False), - sa.Column("state_version", sa.BigInteger(), nullable=False, default=1), - sa.Column( - "search_space_id", - sa.Integer(), - sa.ForeignKey("searchspaces.id", ondelete="CASCADE"), - nullable=False, - ), - sa.Column( - "created_at", - sa.TIMESTAMP(timezone=True), - nullable=False, - server_default=sa.func.now(), - ), - ) + op.execute(sa.text("CREATE INDEX ix_chats_id ON chats (id)")) + op.execute(sa.text("CREATE INDEX ix_chats_title ON chats (title)")) print("[Migration 49 Downgrade] Chats table recreated (data not restored)") From dcc6e067bd3afe1a4fef45d934b4ea2dc7add727 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:13:30 +0200 Subject: [PATCH 7/9] Make migration 52 idempotent --- .../versions/52_rename_llm_preference_columns.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py b/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py index cd1a1dbbc..08177ca70 100644 --- a/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py +++ b/surfsense_backend/alembic/versions/52_rename_llm_preference_columns.py @@ -39,7 +39,7 @@ def upgrade(): """ ) - # Rename columns (only if they exist with old names) + # Rename columns (only if source exists and target doesn't already exist) op.execute( """ DO $$ @@ -47,6 +47,9 @@ def upgrade(): IF EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_name = 'searchspaces' AND column_name = 'fast_llm_id' + ) AND NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'agent_llm_id' ) THEN ALTER TABLE searchspaces RENAME COLUMN fast_llm_id TO agent_llm_id; END IF; @@ -61,6 +64,9 @@ def upgrade(): IF EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_name = 'searchspaces' AND column_name = 'long_context_llm_id' + ) AND NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'document_summary_llm_id' ) THEN ALTER TABLE searchspaces RENAME COLUMN long_context_llm_id TO document_summary_llm_id; END IF; @@ -100,7 +106,7 @@ def downgrade(): """ ) - # Rename columns back + # Rename columns back (only if source exists and target doesn't already exist) op.execute( """ DO $$ @@ -108,6 +114,9 @@ def downgrade(): IF EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_name = 'searchspaces' AND column_name = 'agent_llm_id' + ) AND NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'fast_llm_id' ) THEN ALTER TABLE searchspaces RENAME COLUMN agent_llm_id TO fast_llm_id; END IF; @@ -122,6 +131,9 @@ def downgrade(): IF EXISTS ( SELECT 1 FROM information_schema.columns WHERE table_name = 'searchspaces' AND column_name = 'document_summary_llm_id' + ) AND NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'searchspaces' AND column_name = 'long_context_llm_id' ) THEN ALTER TABLE searchspaces RENAME COLUMN document_summary_llm_id TO long_context_llm_id; END IF; From 443e877a591b224e581a1d187fa7b0e4794ee4f5 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 13 Jan 2026 20:13:50 +0200 Subject: [PATCH 8/9] Make migration 55 idempotent --- ...5_rename_google_drive_connector_to_file.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py b/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py index 9ce57d95f..baaf1991f 100644 --- a/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py +++ b/surfsense_backend/alembic/versions/55_rename_google_drive_connector_to_file.py @@ -60,14 +60,28 @@ def downgrade() -> None: connection = op.get_bind() - connection.execute( + # Only update if the target enum value exists (it won't on fresh databases) + result = connection.execute( text( """ - UPDATE documents - SET document_type = 'GOOGLE_DRIVE_CONNECTOR' - WHERE document_type = 'GOOGLE_DRIVE_FILE'; + SELECT EXISTS ( + SELECT 1 FROM pg_type t + JOIN pg_enum e ON t.oid = e.enumtypid + WHERE t.typname = 'documenttype' AND e.enumlabel = 'GOOGLE_DRIVE_CONNECTOR' + ); """ ) ) + enum_exists = result.scalar() - connection.commit() + if enum_exists: + connection.execute( + text( + """ + UPDATE documents + SET document_type = 'GOOGLE_DRIVE_CONNECTOR' + WHERE document_type = 'GOOGLE_DRIVE_FILE'; + """ + ) + ) + connection.commit() From 924e621a6bb64a683df02e0e99cb2486041950de Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 14 Jan 2026 17:04:37 +0200 Subject: [PATCH 9/9] feat: add migration 0 for initial schema setup --- .../alembic/versions/0_initial_schema.py | 54 +++++++++++++++++++ .../versions/1_add_github_connector_enum.py | 6 +-- 2 files changed, 55 insertions(+), 5 deletions(-) create mode 100644 surfsense_backend/alembic/versions/0_initial_schema.py diff --git a/surfsense_backend/alembic/versions/0_initial_schema.py b/surfsense_backend/alembic/versions/0_initial_schema.py new file mode 100644 index 000000000..77bd9dd1b --- /dev/null +++ b/surfsense_backend/alembic/versions/0_initial_schema.py @@ -0,0 +1,54 @@ +"""Initial schema setup + +Revision ID: 0 +Revises: None + +Creates all tables from SQLAlchemy models. Idempotent - safe to run on existing databases. +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +revision: str = "0" +down_revision: str | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + from app.db import Base + + connection = op.get_bind() + + # Create tables + op.execute(sa.text("CREATE EXTENSION IF NOT EXISTS vector")) + Base.metadata.create_all(bind=connection) + + # Set up indexes + op.execute( + sa.text( + "CREATE INDEX IF NOT EXISTS document_vector_index ON documents USING hnsw (embedding public.vector_cosine_ops)" + ) + ) + op.execute( + sa.text( + "CREATE INDEX IF NOT EXISTS document_search_index ON documents USING gin (to_tsvector('english', content))" + ) + ) + op.execute( + sa.text( + "CREATE INDEX IF NOT EXISTS chucks_vector_index ON chunks USING hnsw (embedding public.vector_cosine_ops)" + ) + ) + op.execute( + sa.text( + "CREATE INDEX IF NOT EXISTS chucks_search_index ON chunks USING gin (to_tsvector('english', content))" + ) + ) + + +def downgrade() -> None: + pass diff --git a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py index ee008d8c5..6f3ee2a01 100644 --- a/surfsense_backend/alembic/versions/1_add_github_connector_enum.py +++ b/surfsense_backend/alembic/versions/1_add_github_connector_enum.py @@ -11,13 +11,9 @@ import sqlalchemy as sa from alembic import op -# Import pgvector if needed for other types, though not for this ENUM change -# import pgvector - - # revision identifiers, used by Alembic. revision: str = "1" -down_revision: str | None = None +down_revision: str | None = "0" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None