feat: add start_char/end_char span columns to chunk model

This commit is contained in:
CREDO23 2026-06-18 20:06:26 +02:00
parent b446897638
commit b89f242a89

View file

@ -1467,6 +1467,11 @@ class Chunk(BaseModel, TimestampMixin):
# ordering reads are document-scoped (covered by ix_chunks_document_id) and
# building a position index on the large chunks table is not worth it.
position = Column(Integer, nullable=False, server_default="0")
# Half-open char span into the document's source_markdown the chunk was cut
# from. Nullable: historical rows predate spans and populate on reindex.
# Invariant for span-aware rows: source_markdown[start_char:end_char] == content.
start_char = Column(Integer, nullable=True)
end_char = Column(Integer, nullable=True)
document_id = Column(
Integer,