Validate validity/rowids blob sizes in rescore KNN path

The rescore KNN loop read validity and rowids blobs from the chunks
iterator without checking their sizes matched chunk_size expectations.
A truncated or corrupt blob could cause OOB reads in bitmap_copy or
rowid array access. The flat KNN path already had these checks.

Adds corruption tests: truncated rowids blob and truncated validity
blob both produce errors instead of crashes.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-31 17:49:40 -07:00
parent f2c9fb8f08
commit 5522e86cd2
2 changed files with 36 additions and 5 deletions

View file

@ -426,10 +426,18 @@ static int rescore_knn(vec0_vtab *p, vec0_cursor *pCur,
unsigned char *chunkValidity = unsigned char *chunkValidity =
(unsigned char *)sqlite3_column_blob(stmtChunks, 1); (unsigned char *)sqlite3_column_blob(stmtChunks, 1);
i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2); i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2);
int validityBytes = sqlite3_column_bytes(stmtChunks, 1);
int rowidsBytes = sqlite3_column_bytes(stmtChunks, 2);
if (!chunkValidity || !chunkRowids) { if (!chunkValidity || !chunkRowids) {
rc = SQLITE_ERROR; rc = SQLITE_ERROR;
goto cleanup; goto cleanup;
} }
// Validate blob sizes match chunk_size expectations
if (validityBytes < (p->chunk_size + 7) / 8 ||
rowidsBytes < p->chunk_size * (int)sizeof(i64)) {
rc = SQLITE_ERROR;
goto cleanup;
}
memset(chunk_distances, 0, p->chunk_size * sizeof(f32)); memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
memset(chunk_topk_idxs, 0, k_oversample * sizeof(i32)); memset(chunk_topk_idxs, 0, k_oversample * sizeof(i32));

View file

@ -587,14 +587,37 @@ def test_corrupt_zeroblob_validity(db):
# Corrupt: replace rowids with a truncated blob (wrong size) # Corrupt: replace rowids with a truncated blob (wrong size)
db.execute("UPDATE t_chunks SET rowids = x'00'") db.execute("UPDATE t_chunks SET rowids = x'00'")
# Should not crash — may return wrong results or error # Should error, not crash — blob size validation catches the mismatch
try: with pytest.raises(sqlite3.OperationalError):
rows = db.execute( db.execute(
"SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1", "SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1",
[float_vec([1, 0, 0, 0, 0, 0, 0, 0])], [float_vec([1, 0, 0, 0, 0, 0, 0, 0])],
).fetchall() ).fetchall()
except sqlite3.OperationalError:
pass # Error is acceptable — crash is not
def test_corrupt_truncated_validity_blob(db):
"""KNN should error when rescore chunk validity blob is truncated."""
db.execute(
"CREATE VIRTUAL TABLE t USING vec0("
" embedding float[128] indexed by rescore(quantizer=bit)"
")"
)
for i in range(5):
import random
random.seed(i)
db.execute(
"INSERT INTO t(rowid, embedding) VALUES (?, ?)",
[i + 1, float_vec([random.gauss(0, 1) for _ in range(128)])],
)
# Corrupt: truncate validity blob to 1 byte (should be chunk_size/8 = 128 bytes)
db.execute("UPDATE t_chunks SET validity = x'FF'")
with pytest.raises(sqlite3.OperationalError):
db.execute(
"SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1",
[float_vec([1.0] * 128)],
).fetchall()
def test_rescore_text_pk_insert_knn_delete(db): def test_rescore_text_pk_insert_knn_delete(db):