Add UBSAN findings TODO and improve vec-mismatch fuzzer

Document three classes of undefined behavior found by UBSAN:
function pointer type mismatches, misaligned f32 reads, and
float-to-integer overflow in vec_quantize_int8.

Improve vec-mismatch fuzzer to cover all error-path cleanup patterns:
type mismatches, dimension mismatches, single-arg functions, and
both text and blob inputs.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-02 21:19:33 -08:00
parent 4ce1ef3c6f
commit b669801d31
9 changed files with 190 additions and 90 deletions

View file

@ -8,119 +8,170 @@
#include <assert.h>
/*
* Fuzz target for two-argument vector functions (vec_distance_*, vec_add,
* vec_sub) where the first argument is always a valid vector and the second
* is fuzz-derived. This exercises the ensure_vector_match() error paths
* where the first vector parses successfully but the second does not.
* Fuzz target that exercises error-path cleanup in vector functions.
*
* Critical coverage: when arg1 is TEXT (JSON-parsed), the cleanup function
* is sqlite3_free rather than a no-op, so cleanup bugs become observable.
* The key insight: when a vector is parsed from JSON TEXT, the cleanup
* function is sqlite3_free (heap allocator). When parsed from BLOB,
* cleanup is a no-op. Bugs in cleanup code (wrong pointer, missing
* cleanup, double-free) are only observable with the sqlite3_free path.
*
* The first byte selects the function. The remaining bytes form arg 2.
* This fuzzer systematically covers:
* 1. Valid JSON arg1 + invalid fuzz arg2 (parse failure cleanup arg1)
* 2. Valid JSON arg1 + valid JSON arg2 with different dimensions
* (dimension mismatch cleanup both)
* 3. Valid JSON arg1 + int8/bit blob arg2 with mismatched type
* (type mismatch cleanup both)
* 4. Fuzz arg1 + valid JSON arg2 (parse failure of arg1, no cleanup)
* 5. Single-arg functions with JSON text (normal cleanup path)
* 6. Single-arg functions with fuzz text (parse failure path)
*/
/* Helper: bind a valid vector to a statement parameter.
* mode selects the vector type and format. */
static void bind_valid_vector(sqlite3_stmt *stmt, int param, int mode) {
/* JSON text vectors — cleanup = sqlite3_free */
static const char *json_f32_4d = "[1.0, 0.0, 0.0, 0.0]";
static const char *json_f32_2d = "[1.0, 2.0]";
static const char *json_f32_1d = "[1.0]";
/* Blob vectors — cleanup = noop */
static const float blob_f32_4d[] = {1.0f, 0.0f, 0.0f, 0.0f};
static const float blob_f32_2d[] = {1.0f, 2.0f};
/* int8 blob — 4 bytes = 4 dimensions */
static const int8_t blob_int8_4d[] = {10, 20, 30, 40};
/* bit blob — 1 byte = 8 bits */
static const uint8_t blob_bit_1b[] = {0xAA};
switch (mode % 7) {
case 0: sqlite3_bind_text(stmt, param, json_f32_4d, -1, SQLITE_STATIC); break;
case 1: sqlite3_bind_text(stmt, param, json_f32_2d, -1, SQLITE_STATIC); break;
case 2: sqlite3_bind_text(stmt, param, json_f32_1d, -1, SQLITE_STATIC); break;
case 3: sqlite3_bind_blob(stmt, param, blob_f32_4d, sizeof(blob_f32_4d), SQLITE_STATIC); break;
case 4: sqlite3_bind_blob(stmt, param, blob_f32_2d, sizeof(blob_f32_2d), SQLITE_STATIC); break;
case 5: /* int8 — must set subtype */
sqlite3_bind_blob(stmt, param, blob_int8_4d, sizeof(blob_int8_4d), SQLITE_STATIC);
break;
case 6: /* bit — must set subtype */
sqlite3_bind_blob(stmt, param, blob_bit_1b, sizeof(blob_bit_1b), SQLITE_STATIC);
break;
}
}
static void run_query(sqlite3 *db, const char *sql,
int arg1_mode, int arg2_mode,
const uint8_t *fuzz, int fuzz_len,
int fuzz_arg, int fuzz_as_text) {
sqlite3_stmt *stmt = NULL;
int rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
if (rc != SQLITE_OK) return;
int nParams = sqlite3_bind_parameter_count(stmt);
for (int p = 1; p <= nParams; p++) {
if (p == fuzz_arg) {
/* Bind fuzz data */
if (fuzz_as_text)
sqlite3_bind_text(stmt, p, (const char *)fuzz, fuzz_len, SQLITE_STATIC);
else
sqlite3_bind_blob(stmt, p, fuzz, fuzz_len, SQLITE_STATIC);
} else if (p == 1) {
bind_valid_vector(stmt, p, arg1_mode);
} else {
bind_valid_vector(stmt, p, arg2_mode);
}
}
sqlite3_step(stmt);
sqlite3_finalize(stmt);
}
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (size < 2) return 0;
if (size < 3) return 0;
int rc;
sqlite3 *db;
sqlite3_stmt *stmt = NULL;
rc = sqlite3_open(":memory:", &db);
assert(rc == SQLITE_OK);
rc = sqlite3_vec_init(db, NULL, NULL);
assert(rc == SQLITE_OK);
/* --- Decode fuzz control bytes --- */
uint8_t b0 = data[0];
uint8_t b1 = data[1];
uint8_t b2 = data[2];
const uint8_t *payload = data + 3;
int payload_size = (int)(size - 3);
/* Two-argument vector functions */
static const char *queries[] = {
"SELECT vec_distance_l2(?, ?)", /* 0 */
"SELECT vec_distance_cosine(?, ?)", /* 1 */
"SELECT vec_distance_l1(?, ?)", /* 2 */
"SELECT vec_distance_hamming(?, ?)", /* 3 */
"SELECT vec_add(?, ?)", /* 4 */
"SELECT vec_sub(?, ?)", /* 5 */
static const char *two_arg[] = {
"SELECT vec_distance_l2(?, ?)",
"SELECT vec_distance_cosine(?, ?)",
"SELECT vec_distance_l1(?, ?)",
"SELECT vec_distance_hamming(?, ?)",
"SELECT vec_add(?, ?)",
"SELECT vec_sub(?, ?)",
};
static const int nQueries = sizeof(queries) / sizeof(queries[0]);
/* Valid JSON vectors (TEXT) — parsed via fvec_from_value text path,
* which sets cleanup = sqlite3_free */
static const char *json_vecs[] = {
"[1.0, 0.0, 0.0, 0.0]", /* 4d */
"[1.0, 2.0]", /* 2d */
"[1.0]", /* 1d */
/* Single-argument vector functions that call cleanup */
static const char *one_arg[] = {
"SELECT vec_f32(?)",
"SELECT vec_int8(?)",
"SELECT vec_bit(?)",
"SELECT vec_length(?)",
"SELECT vec_type(?)",
"SELECT vec_to_json(?)",
"SELECT vec_normalize(?)",
"SELECT vec_quantize_binary(?)",
};
static const int nJsonVecs = sizeof(json_vecs) / sizeof(json_vecs[0]);
/* Valid blob vectors (BLOB) — parsed via fvec_from_value blob path,
* which sets cleanup = fvec_cleanup_noop */
static const float blob_vec[] = {1.0f, 0.0f, 0.0f, 0.0f};
int qIdx2 = b0 % 6;
int qIdx1 = b0 % 8;
int arg1_mode = b1 % 7;
int arg2_mode = b2 % 7;
uint8_t selector = data[0];
int qIdx = selector % nQueries;
/* Bits 3-4: select which valid vector and format for arg1 */
int arg1_mode = (selector / nQueries) % 4;
/*
* Phase 1: Two-arg functions fuzz arg2, valid arg1
* Exercises: parse-failure cleanup of arg1 (the fixed bug),
* type mismatch cleanup, dimension mismatch cleanup.
*/
/* arg2 as fuzz blob */
run_query(db, two_arg[qIdx2], arg1_mode, 0,
payload, payload_size, /*fuzz_arg=*/2, /*as_text=*/0);
/* arg2 as fuzz text */
run_query(db, two_arg[qIdx2], arg1_mode, 0,
payload, payload_size, /*fuzz_arg=*/2, /*as_text=*/1);
const uint8_t *payload = data + 1;
int payload_size = (int)(size - 1);
/*
* Phase 2: Two-arg functions fuzz arg1, valid arg2
* Exercises: parse-failure of arg1 (no cleanup needed), and
* type/dimension mismatch when arg1 parses to unexpected type.
*/
run_query(db, two_arg[qIdx2], 0, arg2_mode,
payload, payload_size, /*fuzz_arg=*/1, /*as_text=*/0);
run_query(db, two_arg[qIdx2], 0, arg2_mode,
payload, payload_size, /*fuzz_arg=*/1, /*as_text=*/1);
/* --- Test 1: valid arg1, fuzz arg2 --- */
rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
/*
* Phase 3: Two-arg both valid but deliberately mismatched types/dims.
* arg1_mode and arg2_mode often produce different types or dimensions.
* Exercises: type mismatch (lines 1035-1042) and dimension mismatch
* (lines 1044-1051) with sqlite3_free cleanup on both sides.
*/
run_query(db, two_arg[qIdx2], arg1_mode, arg2_mode,
NULL, 0, /*fuzz_arg=*/0, /*as_text=*/0);
/* Bind arg1 as either JSON text or blob */
switch (arg1_mode) {
case 0: /* JSON text — triggers sqlite3_free cleanup */
sqlite3_bind_text(stmt, 1, json_vecs[0], -1, SQLITE_STATIC);
break;
case 1:
sqlite3_bind_text(stmt, 1, json_vecs[1], -1, SQLITE_STATIC);
break;
case 2:
sqlite3_bind_text(stmt, 1, json_vecs[2], -1, SQLITE_STATIC);
break;
case 3: /* blob — triggers noop cleanup */
sqlite3_bind_blob(stmt, 1, blob_vec, sizeof(blob_vec), SQLITE_STATIC);
break;
}
/* Bind arg2 as fuzz blob (most likely to fail parsing for non-4-aligned sizes) */
sqlite3_bind_blob(stmt, 2, payload, payload_size, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
/* --- Test 2: same but arg2 as fuzz text --- */
rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
switch (arg1_mode) {
case 0:
sqlite3_bind_text(stmt, 1, json_vecs[0], -1, SQLITE_STATIC);
break;
case 1:
sqlite3_bind_text(stmt, 1, json_vecs[1], -1, SQLITE_STATIC);
break;
case 2:
sqlite3_bind_text(stmt, 1, json_vecs[2], -1, SQLITE_STATIC);
break;
case 3:
sqlite3_bind_blob(stmt, 1, blob_vec, sizeof(blob_vec), SQLITE_STATIC);
break;
}
sqlite3_bind_text(stmt, 2, (const char *)payload, payload_size, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
stmt = NULL;
/* --- Test 3: fuzz arg1, valid arg2 --- */
rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL);
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
sqlite3_bind_text(stmt, 2, json_vecs[0], -1, SQLITE_STATIC);
sqlite3_step(stmt);
sqlite3_finalize(stmt);
/*
* Phase 4: Single-arg functions fuzz as blob and text.
* Exercises: parse failure paths in vec_f32, vec_int8, vec_bit, etc.
* Also exercises normal cleanup when fuzz data happens to be valid.
*/
run_query(db, one_arg[qIdx1], 0, 0,
payload, payload_size, /*fuzz_arg=*/1, /*as_text=*/0);
run_query(db, one_arg[qIdx1], 0, 0,
payload, payload_size, /*fuzz_arg=*/1, /*as_text=*/1);
sqlite3_close(db);
return 0;