diff --git a/surfsense_backend/alembic/versions/104_add_chunks_document_id_index.py b/surfsense_backend/alembic/versions/104_add_chunks_document_id_index.py new file mode 100644 index 000000000..4e6dc6a15 --- /dev/null +++ b/surfsense_backend/alembic/versions/104_add_chunks_document_id_index.py @@ -0,0 +1,41 @@ +"""104_add_chunks_document_id_index + +Revision ID: 104 +Revises: 103 +Create Date: 2026-03-09 + +Adds a B-tree index on chunks.document_id to speed up chunk lookups +during hybrid search (both retrievers fetch chunks by document_id +after RRF ranking selects the top documents). +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from alembic import op + +revision: str = "104" +down_revision: str | None = "103" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.execute( + """ + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM pg_indexes + WHERE tablename = 'chunks' AND indexname = 'ix_chunks_document_id' + ) THEN + CREATE INDEX ix_chunks_document_id ON chunks(document_id); + END IF; + END$$; + """ + ) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS ix_chunks_document_id") diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 9f0af4fc5..bc3d1c514 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -959,7 +959,7 @@ class Chunk(BaseModel, TimestampMixin): embedding = Column(Vector(config.embedding_model_instance.dimension)) document_id = Column( - Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False + Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False, index=True ) document = relationship("Document", back_populates="chunks")