diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f08081a6c..7986a6a85 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,7 @@ repos: .*\.env\.template| .*/tests/.*| .*test.*\.py| + test_.*\.py| .github/workflows/.*\.yml| .github/workflows/.*\.yaml| .*pnpm-lock\.yaml| @@ -44,20 +45,22 @@ repos: rev: v0.12.5 hooks: - id: ruff - name: ruff-lint + name: ruff-check files: ^surfsense_backend/ - args: [--fix, --exit-non-zero-on-fix] + exclude: ^surfsense_backend/(test_.*\.py|.*test.*\.py) + args: [--fix] - id: ruff-format name: ruff-format files: ^surfsense_backend/ + exclude: ^surfsense_backend/(test_.*\.py|.*test.*\.py) - repo: https://github.com/PyCQA/bandit rev: 1.8.6 hooks: - id: bandit files: ^surfsense_backend/ - args: ['-f', 'json'] - exclude: ^surfsense_backend/(tests/|alembic/) + args: ['-f', 'json', '--severity-level', 'high', '--confidence-level', 'high'] + exclude: ^surfsense_backend/(tests/|test_.*\.py|.*test.*\.py|alembic/) # Frontend/Extension Hooks (TypeScript/JavaScript) - repo: https://github.com/pre-commit/mirrors-prettier diff --git a/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py b/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py index 543481fd6..665585a85 100644 --- a/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py +++ b/surfsense_backend/alembic/versions/10_update_chattype_enum_to_qna_report_structure.py @@ -20,87 +20,101 @@ CHAT_TYPE_ENUM = "chattype" def upgrade() -> None: """Upgrade schema - replace ChatType enum values with new QNA/REPORT structure.""" - + # Old enum name for temporary storage old_enum_name = f"{CHAT_TYPE_ENUM}_old" - + # New enum values - new_values = ( - "QNA", - "REPORT_GENERAL", - "REPORT_DEEP", - "REPORT_DEEPER" - ) + new_values = ("QNA", "REPORT_GENERAL", "REPORT_DEEP", "REPORT_DEEPER") new_values_sql = ", ".join([f"'{v}'" for v in new_values]) - + # Table and column info table_name = "chats" column_name = "type" - + # Step 1: Rename the current enum type op.execute(f"ALTER TYPE {CHAT_TYPE_ENUM} RENAME TO {old_enum_name}") - + # Step 2: Create the new enum type with new values op.execute(f"CREATE TYPE {CHAT_TYPE_ENUM} AS ENUM({new_values_sql})") - + # Step 3: Add a temporary column with the new type - op.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name}_new {CHAT_TYPE_ENUM}") - + op.execute( + f"ALTER TABLE {table_name} ADD COLUMN {column_name}_new {CHAT_TYPE_ENUM}" + ) + # Step 4: Update the temporary column with mapped values - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'QNA' WHERE {column_name}::text = 'GENERAL'") - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'REPORT_DEEP' WHERE {column_name}::text = 'DEEP'") - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'REPORT_DEEPER' WHERE {column_name}::text = 'DEEPER'") - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'REPORT_DEEPER' WHERE {column_name}::text = 'DEEPEST'") - + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'QNA' WHERE {column_name}::text = 'GENERAL'" + ) + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'REPORT_DEEP' WHERE {column_name}::text = 'DEEP'" + ) + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'REPORT_DEEPER' WHERE {column_name}::text = 'DEEPER'" + ) + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'REPORT_DEEPER' WHERE {column_name}::text = 'DEEPEST'" + ) + # Step 5: Drop the old column op.execute(f"ALTER TABLE {table_name} DROP COLUMN {column_name}") - + # Step 6: Rename the new column to the original name - op.execute(f"ALTER TABLE {table_name} RENAME COLUMN {column_name}_new TO {column_name}") - + op.execute( + f"ALTER TABLE {table_name} RENAME COLUMN {column_name}_new TO {column_name}" + ) + # Step 7: Drop the old enum type op.execute(f"DROP TYPE {old_enum_name}") def downgrade() -> None: """Downgrade schema - revert ChatType enum to old GENERAL/DEEP/DEEPER/DEEPEST structure.""" - + # Old enum name for temporary storage old_enum_name = f"{CHAT_TYPE_ENUM}_old" - + # Original enum values - original_values = ( - "GENERAL", - "DEEP", - "DEEPER", - "DEEPEST" - ) + original_values = ("GENERAL", "DEEP", "DEEPER", "DEEPEST") original_values_sql = ", ".join([f"'{v}'" for v in original_values]) - + # Table and column info table_name = "chats" column_name = "type" - + # Step 1: Rename the current enum type op.execute(f"ALTER TYPE {CHAT_TYPE_ENUM} RENAME TO {old_enum_name}") - + # Step 2: Create the new enum type with original values op.execute(f"CREATE TYPE {CHAT_TYPE_ENUM} AS ENUM({original_values_sql})") - + # Step 3: Add a temporary column with the original type - op.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name}_new {CHAT_TYPE_ENUM}") - + op.execute( + f"ALTER TABLE {table_name} ADD COLUMN {column_name}_new {CHAT_TYPE_ENUM}" + ) + # Step 4: Update the temporary column with mapped values back to old values - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'GENERAL' WHERE {column_name}::text = 'QNA'") - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'GENERAL' WHERE {column_name}::text = 'REPORT_GENERAL'") - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'DEEP' WHERE {column_name}::text = 'REPORT_DEEP'") - op.execute(f"UPDATE {table_name} SET {column_name}_new = 'DEEPER' WHERE {column_name}::text = 'REPORT_DEEPER'") - + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'GENERAL' WHERE {column_name}::text = 'QNA'" + ) + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'GENERAL' WHERE {column_name}::text = 'REPORT_GENERAL'" + ) + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'DEEP' WHERE {column_name}::text = 'REPORT_DEEP'" + ) + op.execute( + f"UPDATE {table_name} SET {column_name}_new = 'DEEPER' WHERE {column_name}::text = 'REPORT_DEEPER'" + ) + # Step 5: Drop the old column op.execute(f"ALTER TABLE {table_name} DROP COLUMN {column_name}") - + # Step 6: Rename the new column to the original name - op.execute(f"ALTER TABLE {table_name} RENAME COLUMN {column_name}_new TO {column_name}") - + op.execute( + f"ALTER TABLE {table_name} RENAME COLUMN {column_name}_new TO {column_name}" + ) + # Step 7: Drop the old enum type - op.execute(f"DROP TYPE {old_enum_name}") \ No newline at end of file + op.execute(f"DROP TYPE {old_enum_name}") diff --git a/surfsense_backend/alembic/versions/11_add_llm_config_table_and_relationships.py b/surfsense_backend/alembic/versions/11_add_llm_config_table_and_relationships.py index 4740cc277..f807f8b77 100644 --- a/surfsense_backend/alembic/versions/11_add_llm_config_table_and_relationships.py +++ b/surfsense_backend/alembic/versions/11_add_llm_config_table_and_relationships.py @@ -19,7 +19,7 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Upgrade schema - add LiteLLMProvider enum, LLMConfig table and user LLM preferences.""" - + # Check if enum type exists and create if it doesn't op.execute(""" DO $$ @@ -29,7 +29,7 @@ def upgrade() -> None: END IF; END$$; """) - + # Create llm_configs table using raw SQL to avoid enum creation conflicts op.execute(""" CREATE TABLE llm_configs ( @@ -45,41 +45,70 @@ def upgrade() -> None: user_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE ) """) - + # Create indexes - op.create_index(op.f('ix_llm_configs_id'), 'llm_configs', ['id'], unique=False) - op.create_index(op.f('ix_llm_configs_created_at'), 'llm_configs', ['created_at'], unique=False) - op.create_index(op.f('ix_llm_configs_name'), 'llm_configs', ['name'], unique=False) - + op.create_index(op.f("ix_llm_configs_id"), "llm_configs", ["id"], unique=False) + op.create_index( + op.f("ix_llm_configs_created_at"), "llm_configs", ["created_at"], unique=False + ) + op.create_index(op.f("ix_llm_configs_name"), "llm_configs", ["name"], unique=False) + # Add LLM preference columns to user table - op.add_column('user', sa.Column('long_context_llm_id', sa.Integer(), nullable=True)) - op.add_column('user', sa.Column('fast_llm_id', sa.Integer(), nullable=True)) - op.add_column('user', sa.Column('strategic_llm_id', sa.Integer(), nullable=True)) - + op.add_column("user", sa.Column("long_context_llm_id", sa.Integer(), nullable=True)) + op.add_column("user", sa.Column("fast_llm_id", sa.Integer(), nullable=True)) + op.add_column("user", sa.Column("strategic_llm_id", sa.Integer(), nullable=True)) + # Create foreign key constraints for LLM preferences - op.create_foreign_key(op.f('fk_user_long_context_llm_id_llm_configs'), 'user', 'llm_configs', ['long_context_llm_id'], ['id'], ondelete='SET NULL') - op.create_foreign_key(op.f('fk_user_fast_llm_id_llm_configs'), 'user', 'llm_configs', ['fast_llm_id'], ['id'], ondelete='SET NULL') - op.create_foreign_key(op.f('fk_user_strategic_llm_id_llm_configs'), 'user', 'llm_configs', ['strategic_llm_id'], ['id'], ondelete='SET NULL') + op.create_foreign_key( + op.f("fk_user_long_context_llm_id_llm_configs"), + "user", + "llm_configs", + ["long_context_llm_id"], + ["id"], + ondelete="SET NULL", + ) + op.create_foreign_key( + op.f("fk_user_fast_llm_id_llm_configs"), + "user", + "llm_configs", + ["fast_llm_id"], + ["id"], + ondelete="SET NULL", + ) + op.create_foreign_key( + op.f("fk_user_strategic_llm_id_llm_configs"), + "user", + "llm_configs", + ["strategic_llm_id"], + ["id"], + ondelete="SET NULL", + ) def downgrade() -> None: """Downgrade schema - remove LLMConfig table and user LLM preferences.""" - + # Drop foreign key constraints - op.drop_constraint(op.f('fk_user_strategic_llm_id_llm_configs'), 'user', type_='foreignkey') - op.drop_constraint(op.f('fk_user_fast_llm_id_llm_configs'), 'user', type_='foreignkey') - op.drop_constraint(op.f('fk_user_long_context_llm_id_llm_configs'), 'user', type_='foreignkey') - + op.drop_constraint( + op.f("fk_user_strategic_llm_id_llm_configs"), "user", type_="foreignkey" + ) + op.drop_constraint( + op.f("fk_user_fast_llm_id_llm_configs"), "user", type_="foreignkey" + ) + op.drop_constraint( + op.f("fk_user_long_context_llm_id_llm_configs"), "user", type_="foreignkey" + ) + # Drop LLM preference columns from user table - op.drop_column('user', 'strategic_llm_id') - op.drop_column('user', 'fast_llm_id') - op.drop_column('user', 'long_context_llm_id') - + op.drop_column("user", "strategic_llm_id") + op.drop_column("user", "fast_llm_id") + op.drop_column("user", "long_context_llm_id") + # Drop indexes and table - op.drop_index(op.f('ix_llm_configs_name'), table_name='llm_configs') - op.drop_index(op.f('ix_llm_configs_created_at'), table_name='llm_configs') - op.drop_index(op.f('ix_llm_configs_id'), table_name='llm_configs') - op.drop_table('llm_configs') - + op.drop_index(op.f("ix_llm_configs_name"), table_name="llm_configs") + op.drop_index(op.f("ix_llm_configs_created_at"), table_name="llm_configs") + op.drop_index(op.f("ix_llm_configs_id"), table_name="llm_configs") + op.drop_table("llm_configs") + # Drop LiteLLMProvider enum - op.execute("DROP TYPE IF EXISTS litellmprovider") \ No newline at end of file + op.execute("DROP TYPE IF EXISTS litellmprovider") diff --git a/surfsense_backend/alembic/versions/12_add_logs_table.py b/surfsense_backend/alembic/versions/12_add_logs_table.py index e3dacfc5e..9e12fe6b6 100644 --- a/surfsense_backend/alembic/versions/12_add_logs_table.py +++ b/surfsense_backend/alembic/versions/12_add_logs_table.py @@ -17,17 +17,17 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: """Upgrade schema - add LogLevel and LogStatus enums and logs table.""" - + # Create LogLevel enum op.execute(""" CREATE TYPE loglevel AS ENUM ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') """) - - # Create LogStatus enum + + # Create LogStatus enum op.execute(""" CREATE TYPE logstatus AS ENUM ('IN_PROGRESS', 'SUCCESS', 'FAILED') """) - + # Create logs table op.execute(""" CREATE TABLE logs ( @@ -41,28 +41,28 @@ def upgrade() -> None: search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE ) """) - + # Create indexes - op.create_index(op.f('ix_logs_id'), 'logs', ['id'], unique=False) - op.create_index(op.f('ix_logs_created_at'), 'logs', ['created_at'], unique=False) - op.create_index(op.f('ix_logs_level'), 'logs', ['level'], unique=False) - op.create_index(op.f('ix_logs_status'), 'logs', ['status'], unique=False) - op.create_index(op.f('ix_logs_source'), 'logs', ['source'], unique=False) + op.create_index(op.f("ix_logs_id"), "logs", ["id"], unique=False) + op.create_index(op.f("ix_logs_created_at"), "logs", ["created_at"], unique=False) + op.create_index(op.f("ix_logs_level"), "logs", ["level"], unique=False) + op.create_index(op.f("ix_logs_status"), "logs", ["status"], unique=False) + op.create_index(op.f("ix_logs_source"), "logs", ["source"], unique=False) def downgrade() -> None: """Downgrade schema - remove logs table and enums.""" - + # Drop indexes - op.drop_index(op.f('ix_logs_source'), table_name='logs') - op.drop_index(op.f('ix_logs_status'), table_name='logs') - op.drop_index(op.f('ix_logs_level'), table_name='logs') - op.drop_index(op.f('ix_logs_created_at'), table_name='logs') - op.drop_index(op.f('ix_logs_id'), table_name='logs') - + op.drop_index(op.f("ix_logs_source"), table_name="logs") + op.drop_index(op.f("ix_logs_status"), table_name="logs") + op.drop_index(op.f("ix_logs_level"), table_name="logs") + op.drop_index(op.f("ix_logs_created_at"), table_name="logs") + op.drop_index(op.f("ix_logs_id"), table_name="logs") + # Drop logs table - op.drop_table('logs') - + op.drop_table("logs") + # Drop enums op.execute("DROP TYPE IF EXISTS logstatus") - op.execute("DROP TYPE IF EXISTS loglevel") \ No newline at end of file + op.execute("DROP TYPE IF EXISTS loglevel") diff --git a/surfsense_backend/alembic/versions/2_add_linear_connector_enum.py b/surfsense_backend/alembic/versions/2_add_linear_connector_enum.py index 91526120e..ffe629342 100644 --- a/surfsense_backend/alembic/versions/2_add_linear_connector_enum.py +++ b/surfsense_backend/alembic/versions/2_add_linear_connector_enum.py @@ -4,23 +4,24 @@ Revision ID: 2 Revises: e55302644c51 """ + from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. -revision: str = '2' -down_revision: str | None = 'e55302644c51' +revision: str = "2" +down_revision: str | None = "e55302644c51" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - + # Manually add the command to add the enum value op.execute("ALTER TYPE searchsourceconnectortype ADD VALUE 'LINEAR_CONNECTOR'") - + # Pass for the rest, as autogenerate didn't run to add other schema details pass # ### end Alembic commands ### @@ -28,10 +29,14 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - + # Downgrading removal of an enum value requires recreating the type - op.execute("ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old") - op.execute("CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR', 'GITHUB_CONNECTOR')") + op.execute( + "ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old" + ) + op.execute( + "CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR', 'GITHUB_CONNECTOR')" + ) op.execute( "ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING " "connector_type::text::searchsourceconnectortype" @@ -39,4 +44,4 @@ def downgrade() -> None: op.execute("DROP TYPE searchsourceconnectortype_old") pass - # ### end Alembic commands ### \ No newline at end of file + # ### end Alembic commands ### diff --git a/surfsense_backend/alembic/versions/3_add_linear_connector_to_documenttype_.py b/surfsense_backend/alembic/versions/3_add_linear_connector_to_documenttype_.py index 5b8e2bc2e..8c4625b69 100644 --- a/surfsense_backend/alembic/versions/3_add_linear_connector_to_documenttype_.py +++ b/surfsense_backend/alembic/versions/3_add_linear_connector_to_documenttype_.py @@ -4,24 +4,26 @@ Revision ID: 3 Revises: 2 """ + from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. -revision: str = '3' -down_revision: str | None = '2' +revision: str = "3" +down_revision: str | None = "2" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None # Define the ENUM type name and the new value -ENUM_NAME = 'documenttype' # Make sure this matches the name in your DB (usually lowercase class name) -NEW_VALUE = 'LINEAR_CONNECTOR' +ENUM_NAME = "documenttype" # Make sure this matches the name in your DB (usually lowercase class name) +NEW_VALUE = "LINEAR_CONNECTOR" + def upgrade() -> None: """Upgrade schema.""" op.execute(f"ALTER TYPE {ENUM_NAME} ADD VALUE '{NEW_VALUE}'") - + # Warning: This will delete all rows with the new value def downgrade() -> None: @@ -32,19 +34,19 @@ def downgrade() -> None: # Enum values *before* LINEAR_CONNECTOR was added old_values = ( - 'EXTENSION', - 'CRAWLED_URL', - 'FILE', - 'SLACK_CONNECTOR', - 'NOTION_CONNECTOR', - 'YOUTUBE_VIDEO', - 'GITHUB_CONNECTOR' + "EXTENSION", + "CRAWLED_URL", + "FILE", + "SLACK_CONNECTOR", + "NOTION_CONNECTOR", + "YOUTUBE_VIDEO", + "GITHUB_CONNECTOR", ) old_values_sql = ", ".join([f"'{v}'" for v in old_values]) # Table and column names (adjust if different) - table_name = 'documents' - column_name = 'document_type' + table_name = "documents" + column_name = "document_type" # 1. Rename the current enum type op.execute(f"ALTER TYPE {ENUM_NAME} RENAME TO {old_enum_name}") @@ -52,10 +54,8 @@ def downgrade() -> None: # 2. Create the new enum type with the old values op.execute(f"CREATE TYPE {ENUM_NAME} AS ENUM({old_values_sql})") - # 3. Update the table: - op.execute( - f"DELETE FROM {table_name} WHERE {column_name}::text = '{NEW_VALUE}'" - ) + # 3. Update the table: + op.execute(f"DELETE FROM {table_name} WHERE {column_name}::text = '{NEW_VALUE}'") # 4. Alter the column to use the new enum type (casting old values) op.execute( @@ -65,4 +65,4 @@ def downgrade() -> None: # 5. Drop the old enum type op.execute(f"DROP TYPE {old_enum_name}") - # ### end Alembic commands ### \ No newline at end of file + # ### end Alembic commands ### diff --git a/surfsense_backend/alembic/versions/4_add_linkup_api_enum.py b/surfsense_backend/alembic/versions/4_add_linkup_api_enum.py index c1d8cb0f3..26acec3d6 100644 --- a/surfsense_backend/alembic/versions/4_add_linkup_api_enum.py +++ b/surfsense_backend/alembic/versions/4_add_linkup_api_enum.py @@ -4,23 +4,24 @@ Revision ID: 4 Revises: 3 """ + from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. -revision: str = '4' -down_revision: str | None = '3' +revision: str = "4" +down_revision: str | None = "3" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - + # Manually add the command to add the enum value op.execute("ALTER TYPE searchsourceconnectortype ADD VALUE 'LINKUP_API'") - + # Pass for the rest, as autogenerate didn't run to add other schema details pass # ### end Alembic commands ### @@ -28,10 +29,14 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - + # Downgrading removal of an enum value requires recreating the type - op.execute("ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old") - op.execute("CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR', 'GITHUB_CONNECTOR', 'LINEAR_CONNECTOR')") + op.execute( + "ALTER TYPE searchsourceconnectortype RENAME TO searchsourceconnectortype_old" + ) + op.execute( + "CREATE TYPE searchsourceconnectortype AS ENUM('SERPER_API', 'TAVILY_API', 'SLACK_CONNECTOR', 'NOTION_CONNECTOR', 'GITHUB_CONNECTOR', 'LINEAR_CONNECTOR')" + ) op.execute( "ALTER TABLE search_source_connectors ALTER COLUMN connector_type TYPE searchsourceconnectortype USING " "connector_type::text::searchsourceconnectortype" @@ -39,4 +44,4 @@ def downgrade() -> None: op.execute("DROP TYPE searchsourceconnectortype_old") pass - # ### end Alembic commands ### \ No newline at end of file + # ### end Alembic commands ### diff --git a/surfsense_backend/alembic/versions/5_remove_title_char_limit.py b/surfsense_backend/alembic/versions/5_remove_title_char_limit.py index 4163c4605..2e4cd56d1 100644 --- a/surfsense_backend/alembic/versions/5_remove_title_char_limit.py +++ b/surfsense_backend/alembic/versions/5_remove_title_char_limit.py @@ -4,6 +4,7 @@ Revision ID: 5 Revises: 4 """ + from collections.abc import Sequence import sqlalchemy as sa @@ -11,47 +12,65 @@ import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. -revision: str = '5' -down_revision: str | None = '4' +revision: str = "5" +down_revision: str | None = "4" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # Alter Chat table - op.alter_column('chats', 'title', - existing_type=sa.String(200), - type_=sa.String(), - existing_nullable=False) - + op.alter_column( + "chats", + "title", + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False, + ) + # Alter Document table - op.alter_column('documents', 'title', - existing_type=sa.String(200), - type_=sa.String(), - existing_nullable=False) - + op.alter_column( + "documents", + "title", + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False, + ) + # Alter Podcast table - op.alter_column('podcasts', 'title', - existing_type=sa.String(200), - type_=sa.String(), - existing_nullable=False) + op.alter_column( + "podcasts", + "title", + existing_type=sa.String(200), + type_=sa.String(), + existing_nullable=False, + ) def downgrade() -> None: # Revert Chat table - op.alter_column('chats', 'title', - existing_type=sa.String(), - type_=sa.String(200), - existing_nullable=False) - + op.alter_column( + "chats", + "title", + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False, + ) + # Revert Document table - op.alter_column('documents', 'title', - existing_type=sa.String(), - type_=sa.String(200), - existing_nullable=False) - + op.alter_column( + "documents", + "title", + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False, + ) + # Revert Podcast table - op.alter_column('podcasts', 'title', - existing_type=sa.String(), - type_=sa.String(200), - existing_nullable=False) \ No newline at end of file + op.alter_column( + "podcasts", + "title", + existing_type=sa.String(), + type_=sa.String(200), + existing_nullable=False, + ) diff --git a/surfsense_backend/alembic/versions/6_change_podcast_content_to_transcript.py b/surfsense_backend/alembic/versions/6_change_podcast_content_to_transcript.py index 8799ae7e8..8a23e867a 100644 --- a/surfsense_backend/alembic/versions/6_change_podcast_content_to_transcript.py +++ b/surfsense_backend/alembic/versions/6_change_podcast_content_to_transcript.py @@ -4,6 +4,7 @@ Revision ID: 6 Revises: 5 """ + from collections.abc import Sequence import sqlalchemy as sa @@ -12,8 +13,8 @@ from sqlalchemy.dialects.postgresql import JSON from alembic import op # revision identifiers, used by Alembic. -revision: str = '6' -down_revision: str | None = '5' +revision: str = "6" +down_revision: str | None = "5" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None @@ -21,23 +22,33 @@ depends_on: str | Sequence[str] | None = None def upgrade() -> None: # Drop the old column and create a new one with the new name and type # We need to do this because PostgreSQL doesn't support direct column renames with type changes - op.add_column('podcasts', sa.Column('podcast_transcript', JSON, nullable=False, server_default='{}')) - + op.add_column( + "podcasts", + sa.Column("podcast_transcript", JSON, nullable=False, server_default="{}"), + ) + # Copy data from old column to new column # Convert text to JSON by storing it as a JSON string value - op.execute("UPDATE podcasts SET podcast_transcript = jsonb_build_object('text', podcast_content) WHERE podcast_content != ''") - + op.execute( + "UPDATE podcasts SET podcast_transcript = jsonb_build_object('text', podcast_content) WHERE podcast_content != ''" + ) + # Drop the old column - op.drop_column('podcasts', 'podcast_content') + op.drop_column("podcasts", "podcast_content") def downgrade() -> None: # Add back the original column - op.add_column('podcasts', sa.Column('podcast_content', sa.Text(), nullable=False, server_default='')) - + op.add_column( + "podcasts", + sa.Column("podcast_content", sa.Text(), nullable=False, server_default=""), + ) + # Copy data from JSON column back to text column # Extract the 'text' field if it exists, otherwise use empty string - op.execute("UPDATE podcasts SET podcast_content = COALESCE((podcast_transcript->>'text'), '')") - + op.execute( + "UPDATE podcasts SET podcast_content = COALESCE((podcast_transcript->>'text'), '')" + ) + # Drop the new column - op.drop_column('podcasts', 'podcast_transcript') \ No newline at end of file + op.drop_column("podcasts", "podcast_transcript") diff --git a/surfsense_backend/alembic/versions/7_remove_is_generated_column.py b/surfsense_backend/alembic/versions/7_remove_is_generated_column.py index e721e219a..041694441 100644 --- a/surfsense_backend/alembic/versions/7_remove_is_generated_column.py +++ b/surfsense_backend/alembic/versions/7_remove_is_generated_column.py @@ -4,6 +4,7 @@ Revision ID: 7 Revises: 6 """ + from collections.abc import Sequence import sqlalchemy as sa @@ -11,17 +12,20 @@ import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. -revision: str = '7' -down_revision: str | None = '6' +revision: str = "7" +down_revision: str | None = "6" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # Drop the is_generated column - op.drop_column('podcasts', 'is_generated') + op.drop_column("podcasts", "is_generated") def downgrade() -> None: # Add back the is_generated column with its original constraints - op.add_column('podcasts', sa.Column('is_generated', sa.Boolean(), nullable=False, server_default='false')) \ No newline at end of file + op.add_column( + "podcasts", + sa.Column("is_generated", sa.Boolean(), nullable=False, server_default="false"), + ) diff --git a/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py b/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py index f9cf88ab5..10f68d447 100644 --- a/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py +++ b/surfsense_backend/alembic/versions/8_add_content_hash_to_documents.py @@ -3,6 +3,7 @@ Revision ID: 8 Revises: 7 """ + from collections.abc import Sequence import sqlalchemy as sa @@ -10,16 +11,16 @@ import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. -revision: str = '8' -down_revision: str | None = '7' +revision: str = "8" +down_revision: str | None = "7" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None def upgrade() -> None: # Add content_hash column as nullable first to handle existing data - op.add_column('documents', sa.Column('content_hash', sa.String(), nullable=True)) - + op.add_column("documents", sa.Column("content_hash", sa.String(), nullable=True)) + # Update existing documents to generate content hashes # Using SHA-256 hash of the content column with proper UTF-8 encoding op.execute(""" @@ -27,7 +28,7 @@ def upgrade() -> None: SET content_hash = encode(sha256(convert_to(content, 'UTF8')), 'hex') WHERE content_hash IS NULL """) - + # Handle duplicate content hashes by keeping only the oldest document for each hash # Delete newer documents with duplicate content hashes op.execute(""" @@ -38,19 +39,23 @@ def upgrade() -> None: GROUP BY content_hash ) """) - + # Now alter the column to match the model: nullable=False, index=True, unique=True - op.alter_column('documents', 'content_hash', - existing_type=sa.String(), - nullable=False) - op.create_index(op.f('ix_documents_content_hash'), 'documents', ['content_hash'], unique=False) - op.create_unique_constraint(op.f('uq_documents_content_hash'), 'documents', ['content_hash']) + op.alter_column( + "documents", "content_hash", existing_type=sa.String(), nullable=False + ) + op.create_index( + op.f("ix_documents_content_hash"), "documents", ["content_hash"], unique=False + ) + op.create_unique_constraint( + op.f("uq_documents_content_hash"), "documents", ["content_hash"] + ) def downgrade() -> None: # Remove constraints and index first - op.drop_constraint(op.f('uq_documents_content_hash'), 'documents', type_='unique') - op.drop_index(op.f('ix_documents_content_hash'), table_name='documents') - + op.drop_constraint(op.f("uq_documents_content_hash"), "documents", type_="unique") + op.drop_index(op.f("ix_documents_content_hash"), table_name="documents") + # Remove content_hash column from documents table - op.drop_column('documents', 'content_hash') \ No newline at end of file + op.drop_column("documents", "content_hash") diff --git a/surfsense_backend/alembic/versions/9_add_discord_connector_enum_and_documenttype.py b/surfsense_backend/alembic/versions/9_add_discord_connector_enum_and_documenttype.py index f4ee8378c..8be1e391d 100644 --- a/surfsense_backend/alembic/versions/9_add_discord_connector_enum_and_documenttype.py +++ b/surfsense_backend/alembic/versions/9_add_discord_connector_enum_and_documenttype.py @@ -83,7 +83,6 @@ def downgrade() -> None: # 4. Drop the old connector enum type op.execute(f"DROP TYPE {old_connector_enum_name}") - # Document Enum Downgrade Steps # 1. Rename the current document enum type op.execute(f"ALTER TYPE {DOCUMENT_ENUM} RENAME TO {old_document_enum_name}") diff --git a/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py b/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py index d691ac0db..3c7b3a772 100644 --- a/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py +++ b/surfsense_backend/alembic/versions/e55302644c51_add_github_connector_to_documenttype_.py @@ -4,24 +4,26 @@ Revision ID: e55302644c51 Revises: 1 """ + from collections.abc import Sequence from alembic import op # revision identifiers, used by Alembic. -revision: str = 'e55302644c51' -down_revision: str | None = '1' +revision: str = "e55302644c51" +down_revision: str | None = "1" branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None # Define the ENUM type name and the new value -ENUM_NAME = 'documenttype' # Make sure this matches the name in your DB (usually lowercase class name) -NEW_VALUE = 'GITHUB_CONNECTOR' +ENUM_NAME = "documenttype" # Make sure this matches the name in your DB (usually lowercase class name) +NEW_VALUE = "GITHUB_CONNECTOR" + def upgrade() -> None: """Upgrade schema.""" op.execute(f"ALTER TYPE {ENUM_NAME} ADD VALUE '{NEW_VALUE}'") - + # Warning: This will delete all rows with the new value def downgrade() -> None: @@ -32,18 +34,18 @@ def downgrade() -> None: # Enum values *before* GITHUB_CONNECTOR was added old_values = ( - 'EXTENSION', - 'CRAWLED_URL', - 'FILE', - 'SLACK_CONNECTOR', - 'NOTION_CONNECTOR', - 'YOUTUBE_VIDEO' + "EXTENSION", + "CRAWLED_URL", + "FILE", + "SLACK_CONNECTOR", + "NOTION_CONNECTOR", + "YOUTUBE_VIDEO", ) old_values_sql = ", ".join([f"'{v}'" for v in old_values]) # Table and column names (adjust if different) - table_name = 'documents' - column_name = 'document_type' + table_name = "documents" + column_name = "document_type" # 1. Rename the current enum type op.execute(f"ALTER TYPE {ENUM_NAME} RENAME TO {old_enum_name}") @@ -51,10 +53,8 @@ def downgrade() -> None: # 2. Create the new enum type with the old values op.execute(f"CREATE TYPE {ENUM_NAME} AS ENUM({old_values_sql})") - # 3. Update the table: - op.execute( - f"DELETE FROM {table_name} WHERE {column_name}::text = '{NEW_VALUE}'" - ) + # 3. Update the table: + op.execute(f"DELETE FROM {table_name} WHERE {column_name}::text = '{NEW_VALUE}'") # 4. Alter the column to use the new enum type (casting old values) op.execute( @@ -64,4 +64,4 @@ def downgrade() -> None: # 5. Drop the old enum type op.execute(f"DROP TYPE {old_enum_name}") - # ### end Alembic commands ### + # ### end Alembic commands ###