From 0dd0765cc6645cb2b249b0b2a8c929b04e177b6e Mon Sep 17 00:00:00 2001 From: Alex Garcia Date: Mon, 2 Mar 2026 20:45:50 -0800 Subject: [PATCH] Add vec-mismatch fuzz target that catches aCleanup(a) bug in ensure_vector_match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Targeted fuzzer for two-argument vector functions (vec_distance_*, vec_add, vec_sub) that binds a valid JSON vector as arg1 and fuzz data as arg2. This exercises the error path in ensure_vector_match() where the first vector parses successfully (with sqlite3_free cleanup) but the second fails, triggering the buggy aCleanup(a) call on line 1031 of sqlite-vec.c (should be aCleanup(*a)). The fuzzer catches this immediately — ASAN reports "bad-free" when sqlite3_free is called on a stack address. Co-Authored-By: Claude Opus 4.6 --- tests/fuzz/Makefile | 5 +- .../corpus/vec-mismatch/json_1d_blob_5byte | 1 + .../corpus/vec-mismatch/json_2d_blob_3byte | 1 + .../vec-mismatch/json_valid_blob_invalid | Bin 0 -> 2 bytes .../fuzz/corpus/vec-mismatch/json_valid_empty | Bin 0 -> 1 bytes tests/fuzz/vec-mismatch.c | 127 ++++++++++++++++++ 6 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 tests/fuzz/corpus/vec-mismatch/json_1d_blob_5byte create mode 100644 tests/fuzz/corpus/vec-mismatch/json_2d_blob_3byte create mode 100644 tests/fuzz/corpus/vec-mismatch/json_valid_blob_invalid create mode 100644 tests/fuzz/corpus/vec-mismatch/json_valid_empty create mode 100644 tests/fuzz/vec-mismatch.c diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 987f765..0f1e5ba 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -66,9 +66,12 @@ $(TARGET_DIR)/metadata_columns: metadata-columns.c $(FUZZ_SRCS) | $(TARGET_DIR) $(TARGET_DIR)/vec_each: vec-each.c $(FUZZ_SRCS) | $(TARGET_DIR) $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ +$(TARGET_DIR)/vec_mismatch: vec-mismatch.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ + FUZZ_TARGETS = vec0_create exec json numpy \ shadow_corrupt vec0_operations scalar_functions \ - vec0_create_full metadata_columns vec_each + vec0_create_full metadata_columns vec_each vec_mismatch all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS)) diff --git a/tests/fuzz/corpus/vec-mismatch/json_1d_blob_5byte b/tests/fuzz/corpus/vec-mismatch/json_1d_blob_5byte new file mode 100644 index 0000000..99a9dbe --- /dev/null +++ b/tests/fuzz/corpus/vec-mismatch/json_1d_blob_5byte @@ -0,0 +1 @@ + ABCDE \ No newline at end of file diff --git a/tests/fuzz/corpus/vec-mismatch/json_2d_blob_3byte b/tests/fuzz/corpus/vec-mismatch/json_2d_blob_3byte new file mode 100644 index 0000000..0130aaa --- /dev/null +++ b/tests/fuzz/corpus/vec-mismatch/json_2d_blob_3byte @@ -0,0 +1 @@ +ABC \ No newline at end of file diff --git a/tests/fuzz/corpus/vec-mismatch/json_valid_blob_invalid b/tests/fuzz/corpus/vec-mismatch/json_valid_blob_invalid new file mode 100644 index 0000000000000000000000000000000000000000..6def16c99e4fb37553c536ad6d23bf7829b1517b GIT binary patch literal 2 JcmZQ@1ONa-073u& literal 0 HcmV?d00001 diff --git a/tests/fuzz/corpus/vec-mismatch/json_valid_empty b/tests/fuzz/corpus/vec-mismatch/json_valid_empty new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/tests/fuzz/vec-mismatch.c b/tests/fuzz/vec-mismatch.c new file mode 100644 index 0000000..921782e --- /dev/null +++ b/tests/fuzz/vec-mismatch.c @@ -0,0 +1,127 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +/* + * Fuzz target for two-argument vector functions (vec_distance_*, vec_add, + * vec_sub) where the first argument is always a valid vector and the second + * is fuzz-derived. This exercises the ensure_vector_match() error paths + * where the first vector parses successfully but the second does not. + * + * Critical coverage: when arg1 is TEXT (JSON-parsed), the cleanup function + * is sqlite3_free rather than a no-op, so cleanup bugs become observable. + * + * The first byte selects the function. The remaining bytes form arg 2. + */ + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 2) return 0; + + int rc; + sqlite3 *db; + sqlite3_stmt *stmt = NULL; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + /* Two-argument vector functions */ + static const char *queries[] = { + "SELECT vec_distance_l2(?, ?)", /* 0 */ + "SELECT vec_distance_cosine(?, ?)", /* 1 */ + "SELECT vec_distance_l1(?, ?)", /* 2 */ + "SELECT vec_distance_hamming(?, ?)", /* 3 */ + "SELECT vec_add(?, ?)", /* 4 */ + "SELECT vec_sub(?, ?)", /* 5 */ + }; + static const int nQueries = sizeof(queries) / sizeof(queries[0]); + + /* Valid JSON vectors (TEXT) — parsed via fvec_from_value text path, + * which sets cleanup = sqlite3_free */ + static const char *json_vecs[] = { + "[1.0, 0.0, 0.0, 0.0]", /* 4d */ + "[1.0, 2.0]", /* 2d */ + "[1.0]", /* 1d */ + }; + static const int nJsonVecs = sizeof(json_vecs) / sizeof(json_vecs[0]); + + /* Valid blob vectors (BLOB) — parsed via fvec_from_value blob path, + * which sets cleanup = fvec_cleanup_noop */ + static const float blob_vec[] = {1.0f, 0.0f, 0.0f, 0.0f}; + + uint8_t selector = data[0]; + int qIdx = selector % nQueries; + /* Bits 3-4: select which valid vector and format for arg1 */ + int arg1_mode = (selector / nQueries) % 4; + + const uint8_t *payload = data + 1; + int payload_size = (int)(size - 1); + + /* --- Test 1: valid arg1, fuzz arg2 --- */ + rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL); + if (rc != SQLITE_OK) { sqlite3_close(db); return 0; } + + /* Bind arg1 as either JSON text or blob */ + switch (arg1_mode) { + case 0: /* JSON text — triggers sqlite3_free cleanup */ + sqlite3_bind_text(stmt, 1, json_vecs[0], -1, SQLITE_STATIC); + break; + case 1: + sqlite3_bind_text(stmt, 1, json_vecs[1], -1, SQLITE_STATIC); + break; + case 2: + sqlite3_bind_text(stmt, 1, json_vecs[2], -1, SQLITE_STATIC); + break; + case 3: /* blob — triggers noop cleanup */ + sqlite3_bind_blob(stmt, 1, blob_vec, sizeof(blob_vec), SQLITE_STATIC); + break; + } + + /* Bind arg2 as fuzz blob (most likely to fail parsing for non-4-aligned sizes) */ + sqlite3_bind_blob(stmt, 2, payload, payload_size, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + stmt = NULL; + + /* --- Test 2: same but arg2 as fuzz text --- */ + rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL); + if (rc != SQLITE_OK) { sqlite3_close(db); return 0; } + + switch (arg1_mode) { + case 0: + sqlite3_bind_text(stmt, 1, json_vecs[0], -1, SQLITE_STATIC); + break; + case 1: + sqlite3_bind_text(stmt, 1, json_vecs[1], -1, SQLITE_STATIC); + break; + case 2: + sqlite3_bind_text(stmt, 1, json_vecs[2], -1, SQLITE_STATIC); + break; + case 3: + sqlite3_bind_blob(stmt, 1, blob_vec, sizeof(blob_vec), SQLITE_STATIC); + break; + } + + sqlite3_bind_text(stmt, 2, (const char *)payload, payload_size, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + stmt = NULL; + + /* --- Test 3: fuzz arg1, valid arg2 --- */ + rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL); + if (rc != SQLITE_OK) { sqlite3_close(db); return 0; } + + sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC); + sqlite3_bind_text(stmt, 2, json_vecs[0], -1, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + + sqlite3_close(db); + return 0; +}