From 1048490ba87f809dc6f95416bc81a872337d5b64 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 18 Jun 2026 20:06:26 +0200 Subject: [PATCH] feat: migrate chunks with start_char/end_char columns --- .../versions/166_add_chunk_char_spans.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 surfsense_backend/alembic/versions/166_add_chunk_char_spans.py diff --git a/surfsense_backend/alembic/versions/166_add_chunk_char_spans.py b/surfsense_backend/alembic/versions/166_add_chunk_char_spans.py new file mode 100644 index 000000000..336711612 --- /dev/null +++ b/surfsense_backend/alembic/versions/166_add_chunk_char_spans.py @@ -0,0 +1,31 @@ +"""add chunks.start_char/end_char for citation offsets + +Char offsets into the document's source_markdown (half-open span) let citations +resolve the exact passage a chunk came from. Nullable because historical rows +have no span; they populate on the next connector sync or user edit/reindex. + +No backfill: a bulk UPDATE of every chunk on a large HNSW-indexed table rewrites +every secondary index per row (see migration 165 for the same reasoning). + +Revision ID: 166 +Revises: 165 +""" + +from collections.abc import Sequence + +from alembic import op + +revision: str = "166" +down_revision: str | None = "165" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.execute("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS start_char INTEGER;") + op.execute("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS end_char INTEGER;") + + +def downgrade() -> None: + op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS end_char;") + op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS start_char;")