"""add document tables Revision ID: dc33eef8dabe Revises: dcb0a27d98c6 Create Date: 2026-01-16 13:40:17.808807 """ from typing import Sequence, Union import sqlalchemy as sa from alembic import op from pgvector.sqlalchemy import Vector from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision: str = "dc33eef8dabe" down_revision: Union[str, None] = "dcb0a27d98c6" branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### # Enable pgvector extension op.execute("CREATE EXTENSION IF NOT EXISTS vector") sa.Enum( "pending", "processing", "completed", "failed", name="document_processing_status", ).create(op.get_bind()) op.create_table( "knowledge_base_documents", sa.Column("id", sa.Integer(), nullable=False), sa.Column("document_uuid", sa.String(length=36), nullable=False), sa.Column("organization_id", sa.Integer(), nullable=False), sa.Column("filename", sa.String(length=500), nullable=False), sa.Column("file_size_bytes", sa.Integer(), nullable=True), sa.Column("file_hash", sa.String(length=64), nullable=True), sa.Column("mime_type", sa.String(length=100), nullable=True), sa.Column("source_url", sa.String(), nullable=True), sa.Column("total_chunks", sa.Integer(), nullable=False), sa.Column( "processing_status", postgresql.ENUM( "pending", "processing", "completed", "failed", name="document_processing_status", create_type=False, ), server_default=sa.text("'pending'::document_processing_status"), nullable=False, ), sa.Column("processing_error", sa.Text(), nullable=True), sa.Column("docling_metadata", sa.JSON(), nullable=False), sa.Column("custom_metadata", sa.JSON(), nullable=False), sa.Column("created_by", sa.Integer(), nullable=False), sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), sa.Column("is_active", sa.Boolean(), nullable=False), sa.Column("archived_at", sa.DateTime(timezone=True), nullable=True), sa.ForeignKeyConstraint( ["created_by"], ["users.id"], ), sa.ForeignKeyConstraint( ["organization_id"], ["organizations.id"], ondelete="CASCADE" ), sa.PrimaryKeyConstraint("id"), ) op.create_index( "ix_kb_documents_created_at", "knowledge_base_documents", ["created_at"], unique=False, ) op.create_index( "ix_kb_documents_organization_id", "knowledge_base_documents", ["organization_id"], unique=False, ) op.create_index( "ix_kb_documents_status", "knowledge_base_documents", ["processing_status"], unique=False, ) op.create_index( "ix_kb_documents_uuid", "knowledge_base_documents", ["document_uuid"], unique=False, ) op.create_index( op.f("ix_knowledge_base_documents_document_uuid"), "knowledge_base_documents", ["document_uuid"], unique=True, ) op.create_table( "knowledge_base_chunks", sa.Column("id", sa.Integer(), nullable=False), sa.Column("document_id", sa.Integer(), nullable=False), sa.Column("organization_id", sa.Integer(), nullable=False), sa.Column("chunk_text", sa.Text(), nullable=False), sa.Column("contextualized_text", sa.Text(), nullable=True), sa.Column("chunk_index", sa.Integer(), nullable=False), sa.Column("chunk_metadata", sa.JSON(), nullable=False), sa.Column("embedding_model", sa.String(length=200), nullable=False), sa.Column("embedding_dimension", sa.Integer(), nullable=False), sa.Column("embedding", Vector(1536), nullable=True), sa.Column("token_count", sa.Integer(), nullable=True), sa.Column("created_at", sa.DateTime(timezone=True), nullable=True), sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True), sa.ForeignKeyConstraint( ["document_id"], ["knowledge_base_documents.id"], ondelete="CASCADE" ), sa.ForeignKeyConstraint( ["organization_id"], ["organizations.id"], ondelete="CASCADE" ), sa.PrimaryKeyConstraint("id"), ) op.create_index( "ix_kb_chunks_chunk_index", "knowledge_base_chunks", ["chunk_index"], unique=False, ) op.create_index( "ix_kb_chunks_document_id", "knowledge_base_chunks", ["document_id"], unique=False, ) op.create_index( "ix_kb_chunks_embedding_ivfflat", "knowledge_base_chunks", ["embedding"], unique=False, postgresql_using="ivfflat", postgresql_with={"lists": 100}, postgresql_ops={"embedding": "vector_cosine_ops"}, ) op.create_index( "ix_kb_chunks_organization_id", "knowledge_base_chunks", ["organization_id"], unique=False, ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_index("ix_kb_chunks_organization_id", table_name="knowledge_base_chunks") op.drop_index( "ix_kb_chunks_embedding_ivfflat", table_name="knowledge_base_chunks", postgresql_using="ivfflat", postgresql_with={"lists": 100}, postgresql_ops={"embedding": "vector_cosine_ops"}, ) op.drop_index("ix_kb_chunks_document_id", table_name="knowledge_base_chunks") op.drop_index("ix_kb_chunks_chunk_index", table_name="knowledge_base_chunks") op.drop_table("knowledge_base_chunks") op.drop_index( op.f("ix_knowledge_base_documents_document_uuid"), table_name="knowledge_base_documents", ) op.drop_index("ix_kb_documents_uuid", table_name="knowledge_base_documents") op.drop_index("ix_kb_documents_status", table_name="knowledge_base_documents") op.drop_index( "ix_kb_documents_organization_id", table_name="knowledge_base_documents" ) op.drop_index("ix_kb_documents_created_at", table_name="knowledge_base_documents") op.drop_table("knowledge_base_documents") sa.Enum( "pending", "processing", "completed", "failed", name="document_processing_status", ).drop(op.get_bind()) # Note: We don't drop the vector extension as it may be used by other tables # If you want to drop it, uncomment the following line: # op.execute('DROP EXTENSION IF EXISTS vector') # ### end Alembic commands ###