From c4c23bd8baaf70b079e3ead2675872dd452ba922 Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Tue, 31 Mar 2026 17:52:12 -0700 Subject: [PATCH] Reject NaN and Inf in float32 vector input NaN/Inf values in vectors break heap/sort invariants in KNN, causing wrong or unpredictable results. Now rejected at parse time in fvec_from_value() for both blob and JSON text input paths, with a clear error message identifying the offending element index. Co-Authored-By: Claude Opus 4.6 (1M context) --- sqlite-vec.c | 19 ++++++++++++++++++- tests/test-loadable.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index f239d47..7261436 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -984,8 +984,18 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector, return SQLITE_NOMEM; } memcpy(buf, blob, bytes); + size_t n = bytes / sizeof(f32); + for (size_t i = 0; i < n; i++) { + if (isnan(buf[i]) || isinf(buf[i])) { + *pzErr = sqlite3_mprintf( + "invalid float32 vector: element %d is %s", + (int)i, isnan(buf[i]) ? "NaN" : "Inf"); + sqlite3_free(buf); + return SQLITE_ERROR; + } + } *vector = buf; - *dimensions = bytes / sizeof(f32); + *dimensions = n; *cleanup = sqlite3_free; return SQLITE_OK; } @@ -1053,6 +1063,13 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector, } f32 res = (f32)result; + if (isnan(res) || isinf(res)) { + sqlite3_free(x.z); + *pzErr = sqlite3_mprintf( + "invalid float32 vector: element %d is %s", + (int)x.length, isnan(res) ? "NaN" : "Inf"); + return SQLITE_ERROR; + } array_append(&x, (const void *)&res); offset += (endptr - ptr); diff --git a/tests/test-loadable.py b/tests/test-loadable.py index 1ac0cf3..0044144 100644 --- a/tests/test-loadable.py +++ b/tests/test-loadable.py @@ -365,6 +365,34 @@ def test_vec_distance_l1(): ) +def test_vec_reject_nan_inf(): + """NaN and Inf in float32 vectors should be rejected.""" + import struct, math + + # NaN via blob + nan_blob = struct.pack("4f", 1.0, float("nan"), 3.0, 4.0) + with pytest.raises(sqlite3.OperationalError, match="NaN"): + db.execute("SELECT vec_length(?)", [nan_blob]) + + # Inf via blob + inf_blob = struct.pack("4f", 1.0, float("inf"), 3.0, 4.0) + with pytest.raises(sqlite3.OperationalError, match="Inf"): + db.execute("SELECT vec_length(?)", [inf_blob]) + + # -Inf via blob + ninf_blob = struct.pack("4f", 1.0, float("-inf"), 3.0, 4.0) + with pytest.raises(sqlite3.OperationalError, match="Inf"): + db.execute("SELECT vec_length(?)", [ninf_blob]) + + # NaN via JSON + # Note: JSON doesn't have NaN literal, but strtod may parse "NaN" + # This tests the blob path which is the primary input method + + # Valid vectors still work + ok_blob = struct.pack("4f", 1.0, 2.0, 3.0, 4.0) + assert db.execute("SELECT vec_length(?)", [ok_blob]).fetchone()[0] == 4 + + def test_vec_distance_l2(): vec_distance_l2 = lambda *args, a="?", b="?": db.execute( f"select vec_distance_l2({a}, {b})", args