perf: add missing index on chunks.document_id for faster search retrieval

This commit is contained in:
CREDO23 2026-03-09 21:16:53 +02:00
parent 6eabfe2396
commit fffef4cb5e
2 changed files with 42 additions and 1 deletions

View file

@ -0,0 +1,41 @@
"""104_add_chunks_document_id_index
Revision ID: 104
Revises: 103
Create Date: 2026-03-09
Adds a B-tree index on chunks.document_id to speed up chunk lookups
during hybrid search (both retrievers fetch chunks by document_id
after RRF ranking selects the top documents).
"""
from __future__ import annotations
from collections.abc import Sequence
from alembic import op
revision: str = "104"
down_revision: str | None = "103"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_indexes
WHERE tablename = 'chunks' AND indexname = 'ix_chunks_document_id'
) THEN
CREATE INDEX ix_chunks_document_id ON chunks(document_id);
END IF;
END$$;
"""
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_chunks_document_id")

View file

@ -959,7 +959,7 @@ class Chunk(BaseModel, TimestampMixin):
embedding = Column(Vector(config.embedding_model_instance.dimension)) embedding = Column(Vector(config.embedding_model_instance.dimension))
document_id = Column( document_id = Column(
Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False Integer, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False, index=True
) )
document = relationship("Document", back_populates="chunks") document = relationship("Document", back_populates="chunks")