mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Validate validity/rowids blob sizes in rescore KNN path
The rescore KNN loop read validity and rowids blobs from the chunks iterator without checking their sizes matched chunk_size expectations. A truncated or corrupt blob could cause OOB reads in bitmap_copy or rowid array access. The flat KNN path already had these checks. Adds corruption tests: truncated rowids blob and truncated validity blob both produce errors instead of crashes. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f2c9fb8f08
commit
5522e86cd2
2 changed files with 36 additions and 5 deletions
|
|
@ -426,10 +426,18 @@ static int rescore_knn(vec0_vtab *p, vec0_cursor *pCur,
|
||||||
unsigned char *chunkValidity =
|
unsigned char *chunkValidity =
|
||||||
(unsigned char *)sqlite3_column_blob(stmtChunks, 1);
|
(unsigned char *)sqlite3_column_blob(stmtChunks, 1);
|
||||||
i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2);
|
i64 *chunkRowids = (i64 *)sqlite3_column_blob(stmtChunks, 2);
|
||||||
|
int validityBytes = sqlite3_column_bytes(stmtChunks, 1);
|
||||||
|
int rowidsBytes = sqlite3_column_bytes(stmtChunks, 2);
|
||||||
if (!chunkValidity || !chunkRowids) {
|
if (!chunkValidity || !chunkRowids) {
|
||||||
rc = SQLITE_ERROR;
|
rc = SQLITE_ERROR;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
// Validate blob sizes match chunk_size expectations
|
||||||
|
if (validityBytes < (p->chunk_size + 7) / 8 ||
|
||||||
|
rowidsBytes < p->chunk_size * (int)sizeof(i64)) {
|
||||||
|
rc = SQLITE_ERROR;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
|
memset(chunk_distances, 0, p->chunk_size * sizeof(f32));
|
||||||
memset(chunk_topk_idxs, 0, k_oversample * sizeof(i32));
|
memset(chunk_topk_idxs, 0, k_oversample * sizeof(i32));
|
||||||
|
|
|
||||||
|
|
@ -587,14 +587,37 @@ def test_corrupt_zeroblob_validity(db):
|
||||||
# Corrupt: replace rowids with a truncated blob (wrong size)
|
# Corrupt: replace rowids with a truncated blob (wrong size)
|
||||||
db.execute("UPDATE t_chunks SET rowids = x'00'")
|
db.execute("UPDATE t_chunks SET rowids = x'00'")
|
||||||
|
|
||||||
# Should not crash — may return wrong results or error
|
# Should error, not crash — blob size validation catches the mismatch
|
||||||
try:
|
with pytest.raises(sqlite3.OperationalError):
|
||||||
rows = db.execute(
|
db.execute(
|
||||||
"SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1",
|
"SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1",
|
||||||
[float_vec([1, 0, 0, 0, 0, 0, 0, 0])],
|
[float_vec([1, 0, 0, 0, 0, 0, 0, 0])],
|
||||||
).fetchall()
|
).fetchall()
|
||||||
except sqlite3.OperationalError:
|
|
||||||
pass # Error is acceptable — crash is not
|
|
||||||
|
def test_corrupt_truncated_validity_blob(db):
|
||||||
|
"""KNN should error when rescore chunk validity blob is truncated."""
|
||||||
|
db.execute(
|
||||||
|
"CREATE VIRTUAL TABLE t USING vec0("
|
||||||
|
" embedding float[128] indexed by rescore(quantizer=bit)"
|
||||||
|
")"
|
||||||
|
)
|
||||||
|
for i in range(5):
|
||||||
|
import random
|
||||||
|
random.seed(i)
|
||||||
|
db.execute(
|
||||||
|
"INSERT INTO t(rowid, embedding) VALUES (?, ?)",
|
||||||
|
[i + 1, float_vec([random.gauss(0, 1) for _ in range(128)])],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Corrupt: truncate validity blob to 1 byte (should be chunk_size/8 = 128 bytes)
|
||||||
|
db.execute("UPDATE t_chunks SET validity = x'FF'")
|
||||||
|
|
||||||
|
with pytest.raises(sqlite3.OperationalError):
|
||||||
|
db.execute(
|
||||||
|
"SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1",
|
||||||
|
[float_vec([1.0] * 128)],
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
|
||||||
def test_rescore_text_pk_insert_knn_delete(db):
|
def test_rescore_text_pk_insert_knn_delete(db):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue