Reject NaN and Inf in float32 vector input

NaN/Inf values in vectors break heap/sort invariants in KNN, causing
wrong or unpredictable results. Now rejected at parse time in
fvec_from_value() for both blob and JSON text input paths, with a
clear error message identifying the offending element index.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-31 17:52:12 -07:00
parent 5522e86cd2
commit c4c23bd8ba
2 changed files with 46 additions and 1 deletions

View file

@ -984,8 +984,18 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
return SQLITE_NOMEM; return SQLITE_NOMEM;
} }
memcpy(buf, blob, bytes); memcpy(buf, blob, bytes);
size_t n = bytes / sizeof(f32);
for (size_t i = 0; i < n; i++) {
if (isnan(buf[i]) || isinf(buf[i])) {
*pzErr = sqlite3_mprintf(
"invalid float32 vector: element %d is %s",
(int)i, isnan(buf[i]) ? "NaN" : "Inf");
sqlite3_free(buf);
return SQLITE_ERROR;
}
}
*vector = buf; *vector = buf;
*dimensions = bytes / sizeof(f32); *dimensions = n;
*cleanup = sqlite3_free; *cleanup = sqlite3_free;
return SQLITE_OK; return SQLITE_OK;
} }
@ -1053,6 +1063,13 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
} }
f32 res = (f32)result; f32 res = (f32)result;
if (isnan(res) || isinf(res)) {
sqlite3_free(x.z);
*pzErr = sqlite3_mprintf(
"invalid float32 vector: element %d is %s",
(int)x.length, isnan(res) ? "NaN" : "Inf");
return SQLITE_ERROR;
}
array_append(&x, (const void *)&res); array_append(&x, (const void *)&res);
offset += (endptr - ptr); offset += (endptr - ptr);

View file

@ -365,6 +365,34 @@ def test_vec_distance_l1():
) )
def test_vec_reject_nan_inf():
"""NaN and Inf in float32 vectors should be rejected."""
import struct, math
# NaN via blob
nan_blob = struct.pack("4f", 1.0, float("nan"), 3.0, 4.0)
with pytest.raises(sqlite3.OperationalError, match="NaN"):
db.execute("SELECT vec_length(?)", [nan_blob])
# Inf via blob
inf_blob = struct.pack("4f", 1.0, float("inf"), 3.0, 4.0)
with pytest.raises(sqlite3.OperationalError, match="Inf"):
db.execute("SELECT vec_length(?)", [inf_blob])
# -Inf via blob
ninf_blob = struct.pack("4f", 1.0, float("-inf"), 3.0, 4.0)
with pytest.raises(sqlite3.OperationalError, match="Inf"):
db.execute("SELECT vec_length(?)", [ninf_blob])
# NaN via JSON
# Note: JSON doesn't have NaN literal, but strtod may parse "NaN"
# This tests the blob path which is the primary input method
# Valid vectors still work
ok_blob = struct.pack("4f", 1.0, 2.0, 3.0, 4.0)
assert db.execute("SELECT vec_length(?)", [ok_blob]).fetchone()[0] == 4
def test_vec_distance_l2(): def test_vec_distance_l2():
vec_distance_l2 = lambda *args, a="?", b="?": db.execute( vec_distance_l2 = lambda *args, a="?", b="?": db.execute(
f"select vec_distance_l2({a}, {b})", args f"select vec_distance_l2({a}, {b})", args