"""add document tables

Revision ID: dc33eef8dabe
Revises: dcb0a27d98c6
Create Date: 2026-01-16 13:40:17.808807

"""

from typing import Sequence, Union

import sqlalchemy as sa
from alembic import op
from pgvector.sqlalchemy import Vector
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = "dc33eef8dabe"
down_revision: Union[str, None] = "dcb0a27d98c6"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    # Enable pgvector extension
    op.execute("CREATE EXTENSION IF NOT EXISTS vector")

    sa.Enum(
        "pending",
        "processing",
        "completed",
        "failed",
        name="document_processing_status",
    ).create(op.get_bind())
    op.create_table(
        "knowledge_base_documents",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("document_uuid", sa.String(length=36), nullable=False),
        sa.Column("organization_id", sa.Integer(), nullable=False),
        sa.Column("filename", sa.String(length=500), nullable=False),
        sa.Column("file_size_bytes", sa.Integer(), nullable=True),
        sa.Column("file_hash", sa.String(length=64), nullable=True),
        sa.Column("mime_type", sa.String(length=100), nullable=True),
        sa.Column("source_url", sa.String(), nullable=True),
        sa.Column("total_chunks", sa.Integer(), nullable=False),
        sa.Column(
            "processing_status",
            postgresql.ENUM(
                "pending",
                "processing",
                "completed",
                "failed",
                name="document_processing_status",
                create_type=False,
            ),
            server_default=sa.text("'pending'::document_processing_status"),
            nullable=False,
        ),
        sa.Column("processing_error", sa.Text(), nullable=True),
        sa.Column("docling_metadata", sa.JSON(), nullable=False),
        sa.Column("custom_metadata", sa.JSON(), nullable=False),
        sa.Column("created_by", sa.Integer(), nullable=False),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("is_active", sa.Boolean(), nullable=False),
        sa.Column("archived_at", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(
            ["created_by"],
            ["users.id"],
        ),
        sa.ForeignKeyConstraint(
            ["organization_id"], ["organizations.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(
        "ix_kb_documents_created_at",
        "knowledge_base_documents",
        ["created_at"],
        unique=False,
    )
    op.create_index(
        "ix_kb_documents_organization_id",
        "knowledge_base_documents",
        ["organization_id"],
        unique=False,
    )
    op.create_index(
        "ix_kb_documents_status",
        "knowledge_base_documents",
        ["processing_status"],
        unique=False,
    )
    op.create_index(
        "ix_kb_documents_uuid",
        "knowledge_base_documents",
        ["document_uuid"],
        unique=False,
    )
    op.create_index(
        op.f("ix_knowledge_base_documents_document_uuid"),
        "knowledge_base_documents",
        ["document_uuid"],
        unique=True,
    )
    op.create_table(
        "knowledge_base_chunks",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("document_id", sa.Integer(), nullable=False),
        sa.Column("organization_id", sa.Integer(), nullable=False),
        sa.Column("chunk_text", sa.Text(), nullable=False),
        sa.Column("contextualized_text", sa.Text(), nullable=True),
        sa.Column("chunk_index", sa.Integer(), nullable=False),
        sa.Column("chunk_metadata", sa.JSON(), nullable=False),
        sa.Column("embedding_model", sa.String(length=200), nullable=False),
        sa.Column("embedding_dimension", sa.Integer(), nullable=False),
        sa.Column("embedding", Vector(1536), nullable=True),
        sa.Column("token_count", sa.Integer(), nullable=True),
        sa.Column("created_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
        sa.ForeignKeyConstraint(
            ["document_id"], ["knowledge_base_documents.id"], ondelete="CASCADE"
        ),
        sa.ForeignKeyConstraint(
            ["organization_id"], ["organizations.id"], ondelete="CASCADE"
        ),
        sa.PrimaryKeyConstraint("id"),
    )
    op.create_index(
        "ix_kb_chunks_chunk_index",
        "knowledge_base_chunks",
        ["chunk_index"],
        unique=False,
    )
    op.create_index(
        "ix_kb_chunks_document_id",
        "knowledge_base_chunks",
        ["document_id"],
        unique=False,
    )
    op.create_index(
        "ix_kb_chunks_embedding_ivfflat",
        "knowledge_base_chunks",
        ["embedding"],
        unique=False,
        postgresql_using="ivfflat",
        postgresql_with={"lists": 100},
        postgresql_ops={"embedding": "vector_cosine_ops"},
    )
    op.create_index(
        "ix_kb_chunks_organization_id",
        "knowledge_base_chunks",
        ["organization_id"],
        unique=False,
    )
    # ### end Alembic commands ###


def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_index("ix_kb_chunks_organization_id", table_name="knowledge_base_chunks")
    op.drop_index(
        "ix_kb_chunks_embedding_ivfflat",
        table_name="knowledge_base_chunks",
        postgresql_using="ivfflat",
        postgresql_with={"lists": 100},
        postgresql_ops={"embedding": "vector_cosine_ops"},
    )
    op.drop_index("ix_kb_chunks_document_id", table_name="knowledge_base_chunks")
    op.drop_index("ix_kb_chunks_chunk_index", table_name="knowledge_base_chunks")
    op.drop_table("knowledge_base_chunks")
    op.drop_index(
        op.f("ix_knowledge_base_documents_document_uuid"),
        table_name="knowledge_base_documents",
    )
    op.drop_index("ix_kb_documents_uuid", table_name="knowledge_base_documents")
    op.drop_index("ix_kb_documents_status", table_name="knowledge_base_documents")
    op.drop_index(
        "ix_kb_documents_organization_id", table_name="knowledge_base_documents"
    )
    op.drop_index("ix_kb_documents_created_at", table_name="knowledge_base_documents")
    op.drop_table("knowledge_base_documents")
    sa.Enum(
        "pending",
        "processing",
        "completed",
        "failed",
        name="document_processing_status",
    ).drop(op.get_bind())

    # Note: We don't drop the vector extension as it may be used by other tables
    # If you want to drop it, uncomment the following line:
    # op.execute('DROP EXTENSION IF EXISTS vector')
    # ### end Alembic commands ###