From 5522e86cd237a3e15276a8d1f03e34fedadd7177 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Tue, 31 Mar 2026 17:49:40 -0700 Subject: [PATCH] Validate validity/rowids blob sizes in rescore KNN path The rescore KNN loop read validity and rowids blobs from the chunks iterator without checking their sizes matched chunk_size expectations. A truncated or corrupt blob could cause OOB reads in bitmap_copy or rowid array access. The flat KNN path already had these checks. Adds corruption tests: truncated rowids blob and truncated validity blob both produce errors instead of crashes. Co-Authored-By: Claude Opus 4.6 (1M context) --- sqlite-vec-rescore.c | 8 ++++++++ tests/test-rescore.py | 33 ++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/sqlite-vec-rescore.c b/sqlite-vec-rescore.c index 1cf67bf..5432612 100644 --- a/sqlite-vec-rescore.c +++ b/sqlite-vec-rescore.c @@ -426,10 +426,18 @@ static int rescore_knn(vec0_vtab *p, vec0_cursor *pCur, unsigned char *chunkValidity = (unsigned char *)sqlite3_column_blob(stmtChunks, 1); i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2); + int validityBytes = sqlite3_column_bytes(stmtChunks, 1); + int rowidsBytes = sqlite3_column_bytes(stmtChunks, 2); if (!chunkValidity || !chunkRowids) { rc = SQLITE_ERROR; goto cleanup; } + // Validate blob sizes match chunk_size expectations + if (validityBytes < (p->chunk_size + 7) / 8 || + rowidsBytes < p->chunk_size * (int)sizeof(i64)) { + rc = SQLITE_ERROR; + goto cleanup; + } memset(chunk_distances, 0, p->chunk_size * sizeof(f32)); memset(chunk_topk_idxs, 0, k_oversample * sizeof(i32)); diff --git a/tests/test-rescore.py b/tests/test-rescore.py index 7c9c669..aa8586e 100644 --- a/tests/test-rescore.py +++ b/tests/test-rescore.py @@ -587,14 +587,37 @@ def test_corrupt_zeroblob_validity(db): # Corrupt: replace rowids with a truncated blob (wrong size) db.execute("UPDATE t_chunks SET rowids = x'00'") - # Should not crash — may return wrong results or error - try: - rows = db.execute( + # Should error, not crash — blob size validation catches the mismatch + with pytest.raises(sqlite3.OperationalError): + db.execute( "SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1", [float_vec([1, 0, 0, 0, 0, 0, 0, 0])], ).fetchall() - except sqlite3.OperationalError: - pass # Error is acceptable — crash is not + + +def test_corrupt_truncated_validity_blob(db): + """KNN should error when rescore chunk validity blob is truncated.""" + db.execute( + "CREATE VIRTUAL TABLE t USING vec0(" + " embedding float[128] indexed by rescore(quantizer=bit)" + ")" + ) + for i in range(5): + import random + random.seed(i) + db.execute( + "INSERT INTO t(rowid, embedding) VALUES (?, ?)", + [i + 1, float_vec([random.gauss(0, 1) for _ in range(128)])], + ) + + # Corrupt: truncate validity blob to 1 byte (should be chunk_size/8 = 128 bytes) + db.execute("UPDATE t_chunks SET validity = x'FF'") + + with pytest.raises(sqlite3.OperationalError): + db.execute( + "SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1", + [float_vec([1.0] * 128)], + ).fetchall() def test_rescore_text_pk_insert_knn_delete(db):