Add NULL checks after sqlite3_column_blob in rescore and DiskANN

sqlite3_column_blob() returns NULL for zero-length blobs or on OOM.
Several call sites in rescore KNN and DiskANN node/vector read passed
the result directly to memcpy without checking, risking NULL deref on
corrupt or empty databases. IVF already had proper NULL checks.

Adds corruption regression tests that truncate shadow table blobs and
verify the query errors cleanly instead of crashing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-31 14:31:49 -07:00
parent 9df59b4c03
commit 82f4eb08bf
4 changed files with 76 additions and 4 deletions

View file

@ -1149,3 +1149,30 @@ def test_diskann_large_batch_insert_500(db):
distances = [r[1] for r in rows]
for i in range(len(distances) - 1):
assert distances[i] <= distances[i + 1]
def test_corrupt_truncated_node_blob(db):
"""KNN should error (not crash) when DiskANN node blob is truncated."""
db.execute("""
CREATE VIRTUAL TABLE t USING vec0(
emb float[8] INDEXED BY diskann(neighbor_quantizer=binary, n_neighbors=8)
)
""")
for i in range(5):
vec = [0.0] * 8
vec[i % 8] = 1.0
db.execute("INSERT INTO t(rowid, emb) VALUES (?, ?)", [i + 1, _f32(vec)])
# Corrupt a DiskANN node: truncate neighbor_ids to 1 byte (wrong size)
db.execute(
"UPDATE t_diskann_nodes00 SET neighbor_ids = x'00' WHERE rowid = 1"
)
# Should not crash — may return wrong results or error
try:
db.execute(
"SELECT rowid FROM t WHERE emb MATCH ? AND k=3",
[_f32([1, 0, 0, 0, 0, 0, 0, 0])],
).fetchall()
except sqlite3.OperationalError:
pass # Error is acceptable — crash is not

View file

@ -566,3 +566,32 @@ def test_multiple_vector_columns(db):
[float_vec([1.0] * 8)],
).fetchall()
assert rows[0]["rowid"] == 2
def test_corrupt_zeroblob_validity(db):
"""KNN should error (not crash) when rescore chunk rowids blob is zeroed out."""
db.execute(
"CREATE VIRTUAL TABLE t USING vec0("
" embedding float[8] indexed by rescore(quantizer=bit)"
")"
)
db.execute(
"INSERT INTO t(rowid, embedding) VALUES (1, ?)",
[float_vec([1, 0, 0, 0, 0, 0, 0, 0])],
)
db.execute(
"INSERT INTO t(rowid, embedding) VALUES (2, ?)",
[float_vec([0, 1, 0, 0, 0, 0, 0, 0])],
)
# Corrupt: replace rowids with a truncated blob (wrong size)
db.execute("UPDATE t_chunks SET rowids = x'00'")
# Should not crash — may return wrong results or error
try:
rows = db.execute(
"SELECT rowid FROM t WHERE embedding MATCH ? ORDER BY distance LIMIT 1",
[float_vec([1, 0, 0, 0, 0, 0, 0, 0])],
).fetchall()
except sqlite3.OperationalError:
pass # Error is acceptable — crash is not