From b89f242a89997f031dfdfa370ae6fa0f60d66f4e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 18 Jun 2026 20:06:26 +0200 Subject: [PATCH] feat: add start_char/end_char span columns to chunk model --- surfsense_backend/app/db.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 3f098d5d2..9aa217d2c 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -1467,6 +1467,11 @@ class Chunk(BaseModel, TimestampMixin): # ordering reads are document-scoped (covered by ix_chunks_document_id) and # building a position index on the large chunks table is not worth it. position = Column(Integer, nullable=False, server_default="0") + # Half-open char span into the document's source_markdown the chunk was cut + # from. Nullable: historical rows predate spans and populate on reindex. + # Invariant for span-aware rows: source_markdown[start_char:end_char] == content. + start_char = Column(Integer, nullable=True) + end_char = Column(Integer, nullable=True) document_id = Column( Integer,