mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-24 16:26:37 +02:00
Reject NaN and Inf in float32 vector input
NaN/Inf values in vectors break heap/sort invariants in KNN, causing wrong or unpredictable results. Now rejected at parse time in fvec_from_value() for both blob and JSON text input paths, with a clear error message identifying the offending element index. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5522e86cd2
commit
c4c23bd8ba
2 changed files with 46 additions and 1 deletions
19
sqlite-vec.c
19
sqlite-vec.c
|
|
@ -984,8 +984,18 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
|
|||
return SQLITE_NOMEM;
|
||||
}
|
||||
memcpy(buf, blob, bytes);
|
||||
size_t n = bytes / sizeof(f32);
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (isnan(buf[i]) || isinf(buf[i])) {
|
||||
*pzErr = sqlite3_mprintf(
|
||||
"invalid float32 vector: element %d is %s",
|
||||
(int)i, isnan(buf[i]) ? "NaN" : "Inf");
|
||||
sqlite3_free(buf);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
}
|
||||
*vector = buf;
|
||||
*dimensions = bytes / sizeof(f32);
|
||||
*dimensions = n;
|
||||
*cleanup = sqlite3_free;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
|
@ -1053,6 +1063,13 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
|
|||
}
|
||||
|
||||
f32 res = (f32)result;
|
||||
if (isnan(res) || isinf(res)) {
|
||||
sqlite3_free(x.z);
|
||||
*pzErr = sqlite3_mprintf(
|
||||
"invalid float32 vector: element %d is %s",
|
||||
(int)x.length, isnan(res) ? "NaN" : "Inf");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
array_append(&x, (const void *)&res);
|
||||
|
||||
offset += (endptr - ptr);
|
||||
|
|
|
|||
|
|
@ -365,6 +365,34 @@ def test_vec_distance_l1():
|
|||
)
|
||||
|
||||
|
||||
def test_vec_reject_nan_inf():
|
||||
"""NaN and Inf in float32 vectors should be rejected."""
|
||||
import struct, math
|
||||
|
||||
# NaN via blob
|
||||
nan_blob = struct.pack("4f", 1.0, float("nan"), 3.0, 4.0)
|
||||
with pytest.raises(sqlite3.OperationalError, match="NaN"):
|
||||
db.execute("SELECT vec_length(?)", [nan_blob])
|
||||
|
||||
# Inf via blob
|
||||
inf_blob = struct.pack("4f", 1.0, float("inf"), 3.0, 4.0)
|
||||
with pytest.raises(sqlite3.OperationalError, match="Inf"):
|
||||
db.execute("SELECT vec_length(?)", [inf_blob])
|
||||
|
||||
# -Inf via blob
|
||||
ninf_blob = struct.pack("4f", 1.0, float("-inf"), 3.0, 4.0)
|
||||
with pytest.raises(sqlite3.OperationalError, match="Inf"):
|
||||
db.execute("SELECT vec_length(?)", [ninf_blob])
|
||||
|
||||
# NaN via JSON
|
||||
# Note: JSON doesn't have NaN literal, but strtod may parse "NaN"
|
||||
# This tests the blob path which is the primary input method
|
||||
|
||||
# Valid vectors still work
|
||||
ok_blob = struct.pack("4f", 1.0, 2.0, 3.0, 4.0)
|
||||
assert db.execute("SELECT vec_length(?)", [ok_blob]).fetchone()[0] == 4
|
||||
|
||||
|
||||
def test_vec_distance_l2():
|
||||
vec_distance_l2 = lambda *args, a="?", b="?": db.execute(
|
||||
f"select vec_distance_l2({a}, {b})", args
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue