diff --git a/Makefile b/Makefile index 9e7c18f..1ebdbed 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,39 @@ test-loadable-watch: test-unit: $(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit +fuzz-build: + $(MAKE) -C tests/fuzz all + +fuzz-quick: fuzz-build + @echo "Running all fuzz targets for 30 seconds each..." + @for target in tests/fuzz/targets/*; do \ + [ -f "$$target" ] && [ -x "$$target" ] || continue; \ + name=$$(basename $$target); \ + echo "=== Fuzzing $$name ==="; \ + corpus="tests/fuzz/corpus/$$name"; \ + mkdir -p "$$corpus"; \ + dict="tests/fuzz/$${name//_/-}.dict"; \ + dict_flag=""; \ + [ -f "$$dict" ] && dict_flag="-dict=$$dict"; \ + "$$target" $$dict_flag \ + -max_total_time=30 "$$corpus" 2>&1 || true; \ + done + +fuzz-long: fuzz-build + @echo "Running all fuzz targets for 5 minutes each..." + @for target in tests/fuzz/targets/*; do \ + [ -f "$$target" ] && [ -x "$$target" ] || continue; \ + name=$$(basename $$target); \ + echo "=== Fuzzing $$name ==="; \ + corpus="tests/fuzz/corpus/$$name"; \ + mkdir -p "$$corpus"; \ + dict="tests/fuzz/$${name//_/-}.dict"; \ + dict_flag=""; \ + [ -f "$$dict" ] && dict_flag="-dict=$$dict"; \ + "$$target" $$dict_flag \ + -max_total_time=300 "$$corpus" 2>&1 || true; \ + done + site-dev: npm --prefix site run dev diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 7bd0e0a..987f765 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -1,48 +1,78 @@ +# Auto-detect clang with libFuzzer support. +# Priority: Homebrew LLVM (macOS ARM) → Homebrew LLVM (macOS Intel) → +# versioned clang (Linux) → system clang +FUZZ_CC ?= $(shell \ + if [ -x /opt/homebrew/opt/llvm/bin/clang ]; then \ + echo "/opt/homebrew/opt/llvm/bin/clang"; \ + elif [ -x /usr/local/opt/llvm/bin/clang ]; then \ + echo "/usr/local/opt/llvm/bin/clang"; \ + elif command -v clang-18 >/dev/null 2>&1; then \ + echo "clang-18"; \ + elif command -v clang-17 >/dev/null 2>&1; then \ + echo "clang-17"; \ + elif command -v clang >/dev/null 2>&1; then \ + echo "clang"; \ + else \ + echo "FUZZ_CC_NOT_FOUND"; \ + fi) -TARGET_DIR=./targets +# AddressSanitizer + UndefinedBehaviorSanitizer + libFuzzer. +# Override FUZZ_SANITIZERS to change (e.g., drop ubsan on Windows). +FUZZ_SANITIZERS ?= -fsanitize=address,undefined,fuzzer + +# On macOS, Homebrew LLVM may need -Wl,-ld_classic to work with the system linker. +FUZZ_LDFLAGS ?= $(shell \ + if [ "$$(uname -s)" = "Darwin" ]; then \ + echo "-Wl,-ld_classic"; \ + fi) + +FUZZ_CFLAGS = $(FUZZ_SANITIZERS) -I ../../ -I ../../vendor -DSQLITE_CORE -g $(FUZZ_LDFLAGS) +FUZZ_SRCS = ../../vendor/sqlite3.c ../../sqlite-vec.c + +TARGET_DIR = ./targets $(TARGET_DIR): mkdir -p $@ -# ASAN_OPTIONS=detect_leaks=1 ./fuzz_json -detect_leaks=1 '-trace_malloc=[12]' tmp -$(TARGET_DIR)/json: json.c $(TARGET_DIR) - /opt/homebrew/opt/llvm/bin/clang \ - -fsanitize=address,fuzzer \ - -I ../../ -I ../../vendor -DSQLITE_CORE -g \ - ../../vendor/sqlite3.c \ - ../../sqlite-vec.c \ - $< \ - -o $@ +# Existing targets (filename uses -, Makefile target uses _) +$(TARGET_DIR)/vec0_create: vec0-create.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ +$(TARGET_DIR)/exec: exec.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ -$(TARGET_DIR)/vec0_create: vec0-create.c ../../sqlite-vec.c $(TARGET_DIR) - /opt/homebrew/opt/llvm/bin/clang \ - -fsanitize=address,fuzzer \ - -I ../../ -I ../../vendor -DSQLITE_CORE -g \ - ../../vendor/sqlite3.c \ - ../../sqlite-vec.c \ - $< \ - -o $@ +$(TARGET_DIR)/json: json.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ -$(TARGET_DIR)/numpy: numpy.c ../../sqlite-vec.c $(TARGET_DIR) - /opt/homebrew/opt/llvm/bin/clang \ - -fsanitize=address,fuzzer \ - -I ../../ -I ../../vendor -DSQLITE_CORE -g \ - ../../vendor/sqlite3.c \ - ../../sqlite-vec.c \ - $< \ - -o $@ +$(TARGET_DIR)/numpy: numpy.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ -$(TARGET_DIR)/exec: exec.c ../../sqlite-vec.c $(TARGET_DIR) - /opt/homebrew/opt/llvm/bin/clang \ - -fsanitize=address,fuzzer \ - -I ../../ -I ../../vendor -DSQLITE_CORE -g \ - ../../vendor/sqlite3.c \ - ../../sqlite-vec.c \ - $< \ - -o $@ +# New targets +$(TARGET_DIR)/shadow_corrupt: shadow-corrupt.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ -all: $(TARGET_DIR)/json $(TARGET_DIR)/numpy $(TARGET_DIR)/json $(TARGET_DIR)/exec +$(TARGET_DIR)/vec0_operations: vec0-operations.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ + +$(TARGET_DIR)/scalar_functions: scalar-functions.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ + +$(TARGET_DIR)/vec0_create_full: vec0-create-full.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ + +$(TARGET_DIR)/metadata_columns: metadata-columns.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ + +$(TARGET_DIR)/vec_each: vec-each.c $(FUZZ_SRCS) | $(TARGET_DIR) + $(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@ + +FUZZ_TARGETS = vec0_create exec json numpy \ + shadow_corrupt vec0_operations scalar_functions \ + vec0_create_full metadata_columns vec_each + +all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS)) clean: rm -rf $(TARGET_DIR)/* + +.PHONY: all clean diff --git a/tests/fuzz/corpus/exec/select1 b/tests/fuzz/corpus/exec/select1 new file mode 100644 index 0000000..7a8f142 --- /dev/null +++ b/tests/fuzz/corpus/exec/select1 @@ -0,0 +1 @@ +SELECT 1 \ No newline at end of file diff --git a/tests/fuzz/corpus/exec/vec_version b/tests/fuzz/corpus/exec/vec_version new file mode 100644 index 0000000..6f0c7d3 --- /dev/null +++ b/tests/fuzz/corpus/exec/vec_version @@ -0,0 +1 @@ +SELECT vec_version() \ No newline at end of file diff --git a/tests/fuzz/corpus/json/empty b/tests/fuzz/corpus/json/empty new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/tests/fuzz/corpus/json/empty @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/tests/fuzz/corpus/json/valid_2d b/tests/fuzz/corpus/json/valid_2d new file mode 100644 index 0000000..0d64024 --- /dev/null +++ b/tests/fuzz/corpus/json/valid_2d @@ -0,0 +1 @@ +[0.5, -0.5] \ No newline at end of file diff --git a/tests/fuzz/corpus/json/valid_4d b/tests/fuzz/corpus/json/valid_4d new file mode 100644 index 0000000..3794bf5 --- /dev/null +++ b/tests/fuzz/corpus/json/valid_4d @@ -0,0 +1 @@ +[1.0, 2.0, 3.0, 4.0] \ No newline at end of file diff --git a/tests/fuzz/corpus/shadow-corrupt/target0 b/tests/fuzz/corpus/shadow-corrupt/target0 new file mode 100644 index 0000000..969007c Binary files /dev/null and b/tests/fuzz/corpus/shadow-corrupt/target0 differ diff --git a/tests/fuzz/corpus/shadow-corrupt/target1 b/tests/fuzz/corpus/shadow-corrupt/target1 new file mode 100644 index 0000000..250c80a Binary files /dev/null and b/tests/fuzz/corpus/shadow-corrupt/target1 differ diff --git a/tests/fuzz/corpus/shadow-corrupt/target2 b/tests/fuzz/corpus/shadow-corrupt/target2 new file mode 100644 index 0000000..02b1f3d Binary files /dev/null and b/tests/fuzz/corpus/shadow-corrupt/target2 differ diff --git a/tests/fuzz/corpus/shadow-corrupt/target3 b/tests/fuzz/corpus/shadow-corrupt/target3 new file mode 100644 index 0000000..9d1e225 Binary files /dev/null and b/tests/fuzz/corpus/shadow-corrupt/target3 differ diff --git a/tests/fuzz/corpus/shadow-corrupt/target4 b/tests/fuzz/corpus/shadow-corrupt/target4 new file mode 100644 index 0000000..593a70c Binary files /dev/null and b/tests/fuzz/corpus/shadow-corrupt/target4 differ diff --git a/tests/fuzz/corpus/shadow-corrupt/target5 b/tests/fuzz/corpus/shadow-corrupt/target5 new file mode 100644 index 0000000..e34b3a4 Binary files /dev/null and b/tests/fuzz/corpus/shadow-corrupt/target5 differ diff --git a/tests/fuzz/corpus/vec0-operations/ins_del_ins b/tests/fuzz/corpus/vec0-operations/ins_del_ins new file mode 100644 index 0000000..9061010 Binary files /dev/null and b/tests/fuzz/corpus/vec0-operations/ins_del_ins differ diff --git a/tests/fuzz/corpus/vec0-operations/insert5 b/tests/fuzz/corpus/vec0-operations/insert5 new file mode 100644 index 0000000..779ead4 Binary files /dev/null and b/tests/fuzz/corpus/vec0-operations/insert5 differ diff --git a/tests/fuzz/metadata-columns.c b/tests/fuzz/metadata-columns.c new file mode 100644 index 0000000..d5d7492 --- /dev/null +++ b/tests/fuzz/metadata-columns.c @@ -0,0 +1,115 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 8) return 0; + + int rc; + sqlite3 *db; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + rc = sqlite3_exec(db, + "CREATE VIRTUAL TABLE v USING vec0(" + " emb float[4]," + " flag boolean metadata," + " count integer metadata," + " score float metadata," + " label text metadata," + " aux_data text auxiliary" + ")", NULL, NULL, NULL); + if (rc != SQLITE_OK) { sqlite3_close(db); return 0; } + + // Prepare statements for insert and query + sqlite3_stmt *stmtInsert = NULL; + sqlite3_stmt *stmtKnn = NULL; + sqlite3_stmt *stmtKnnFilter = NULL; + sqlite3_stmt *stmtDelete = NULL; + + sqlite3_prepare_v2(db, + "INSERT INTO v(rowid, emb, flag, count, score, label, aux_data) " + "VALUES (?, ?, ?, ?, ?, ?, ?)", -1, &stmtInsert, NULL); + sqlite3_prepare_v2(db, + "SELECT rowid, distance FROM v WHERE emb MATCH ? LIMIT 3", + -1, &stmtKnn, NULL); + sqlite3_prepare_v2(db, + "SELECT rowid, distance FROM v WHERE emb MATCH ? AND flag = 1 LIMIT 3", + -1, &stmtKnnFilter, NULL); + sqlite3_prepare_v2(db, + "DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL); + + if (!stmtInsert || !stmtKnn || !stmtKnnFilter || !stmtDelete) goto cleanup; + + size_t i = 0; + while (i + 6 <= size) { + uint8_t op = data[i++] % 4; + uint8_t rowid_byte = data[i++]; + int64_t rowid = (int64_t)(rowid_byte % 50) + 1; + + switch (op) { + case 0: { + // INSERT with fuzz-derived vector and metadata + float vec[4]; + for (int j = 0; j < 4 && i < size; j++, i++) { + vec[j] = (float)((int8_t)data[i]) / 10.0f; + } + int flag_val = (i < size) ? data[i++] % 2 : 0; + int count_val = (i < size) ? (int)((int8_t)data[i++]) : 0; + float score_val = (i < size) ? (float)((int8_t)data[i++]) / 10.0f : 0.0f; + + sqlite3_reset(stmtInsert); + sqlite3_bind_int64(stmtInsert, 1, rowid); + sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT); + sqlite3_bind_int(stmtInsert, 3, flag_val); + sqlite3_bind_int(stmtInsert, 4, count_val); + sqlite3_bind_double(stmtInsert, 5, (double)score_val); + sqlite3_bind_text(stmtInsert, 6, "label", -1, SQLITE_STATIC); + sqlite3_bind_text(stmtInsert, 7, "aux", -1, SQLITE_STATIC); + sqlite3_step(stmtInsert); + break; + } + case 1: { + // KNN query (no filter) + float qvec[4] = {1.0f, 0.0f, 0.0f, 0.0f}; + sqlite3_reset(stmtKnn); + sqlite3_bind_blob(stmtKnn, 1, qvec, sizeof(qvec), SQLITE_STATIC); + while (sqlite3_step(stmtKnn) == SQLITE_ROW) {} + break; + } + case 2: { + // KNN query WITH metadata filter + float qvec[4] = {0.0f, 1.0f, 0.0f, 0.0f}; + sqlite3_reset(stmtKnnFilter); + sqlite3_bind_blob(stmtKnnFilter, 1, qvec, sizeof(qvec), SQLITE_STATIC); + while (sqlite3_step(stmtKnnFilter) == SQLITE_ROW) {} + break; + } + case 3: { + // DELETE + sqlite3_reset(stmtDelete); + sqlite3_bind_int64(stmtDelete, 1, rowid); + sqlite3_step(stmtDelete); + break; + } + } + } + + sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL); + +cleanup: + sqlite3_finalize(stmtInsert); + sqlite3_finalize(stmtKnn); + sqlite3_finalize(stmtKnnFilter); + sqlite3_finalize(stmtDelete); + sqlite3_close(db); + return 0; +} diff --git a/tests/fuzz/numpy.c b/tests/fuzz/numpy.c index a2c8273..9e2900b 100644 --- a/tests/fuzz/numpy.c +++ b/tests/fuzz/numpy.c @@ -8,6 +8,9 @@ #include "sqlite3.h" #include +extern int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg, + const sqlite3_api_routines *pApi); + int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { int rc = SQLITE_OK; sqlite3 *db; @@ -17,25 +20,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { assert(rc == SQLITE_OK); rc = sqlite3_vec_init(db, NULL, NULL); assert(rc == SQLITE_OK); - + rc = sqlite3_vec_numpy_init(db, NULL, NULL); + assert(rc == SQLITE_OK); rc = sqlite3_prepare_v2(db, "select * from vec_npy_each(?)", -1, &stmt, NULL); assert(rc == SQLITE_OK); sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC); rc = sqlite3_step(stmt); - if(rc != SQLITE_DONE || rc != SQLITE_ROW) { - sqlite3_finalize(stmt); - sqlite3_close(db); - return -1; + while (rc == SQLITE_ROW) { + rc = sqlite3_step(stmt); } - while(1) { - if(rc == SQLITE_DONE) break; - if(rc == SQLITE_ROW) continue; - sqlite3_finalize(stmt); - sqlite3_close(db); - return 1; - } sqlite3_finalize(stmt); sqlite3_close(db); return 0; diff --git a/tests/fuzz/scalar-functions.c b/tests/fuzz/scalar-functions.c new file mode 100644 index 0000000..2874195 --- /dev/null +++ b/tests/fuzz/scalar-functions.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 2) return 0; + + int rc; + sqlite3 *db; + sqlite3_stmt *stmt = NULL; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + static const char *queries[] = { + "SELECT vec_f32(cast(? as text))", /* 0: JSON text -> f32 */ + "SELECT vec_f32(?)", /* 1: blob -> f32 */ + "SELECT vec_int8(?)", /* 2: blob -> int8 */ + "SELECT vec_bit(?)", /* 3: blob -> bit */ + "SELECT vec_length(?)", /* 4: vector length */ + "SELECT vec_type(?)", /* 5: vector type string */ + "SELECT vec_to_json(?)", /* 6: vector -> JSON */ + "SELECT vec_normalize(?)", /* 7: normalize */ + "SELECT vec_quantize_binary(?)", /* 8: quantize to binary */ + "SELECT vec_quantize_int8(?, 'unit')", /* 9: quantize to int8 */ + "SELECT vec_distance_l2(?, ?)", /* 10: L2 distance */ + "SELECT vec_distance_cosine(?, ?)", /* 11: cosine distance */ + "SELECT vec_distance_l1(?, ?)", /* 12: L1 distance */ + "SELECT vec_distance_hamming(?, ?)", /* 13: hamming distance */ + "SELECT vec_add(?, ?)", /* 14: vector add */ + "SELECT vec_sub(?, ?)", /* 15: vector subtract */ + "SELECT vec_slice(?, 0, ?)", /* 16: vector slice */ + }; + static const int nQueries = sizeof(queries) / sizeof(queries[0]); + + int qIdx = data[0] % nQueries; + const uint8_t *payload = data + 1; + int payload_size = (int)(size - 1); + + rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL); + if (rc != SQLITE_OK) { + sqlite3_close(db); + return 0; + } + + int nParams = sqlite3_bind_parameter_count(stmt); + + // Bind param 1: fuzz payload as blob + sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC); + + if (nParams >= 2) { + if (qIdx == 16) { + // vec_slice 3rd param is integer (end index) + int end_idx = (payload_size > 0) ? (payload[0] % 64) : 0; + sqlite3_bind_int(stmt, 2, end_idx); + } else { + // For 2-param functions (distance, add, sub): split payload in half + int half = payload_size / 2; + sqlite3_bind_blob(stmt, 2, payload + half, + payload_size - half, SQLITE_STATIC); + } + } + + if (nParams >= 3) { + // vec_slice: param 3 is the end index + int end_idx = (payload_size > 1) ? (payload[1] % 64) : 0; + sqlite3_bind_int(stmt, 3, end_idx); + } + + sqlite3_step(stmt); + sqlite3_finalize(stmt); + sqlite3_close(db); + return 0; +} diff --git a/tests/fuzz/scalar-functions.dict b/tests/fuzz/scalar-functions.dict new file mode 100644 index 0000000..ae52618 --- /dev/null +++ b/tests/fuzz/scalar-functions.dict @@ -0,0 +1,8 @@ +json_vec1="[1.0, 2.0, 3.0, 4.0]" +json_vec2="[0.5, -0.5]" +json_empty="[]" +json_nan="[NaN]" +json_inf="[Infinity]" +json_large="[1e38, -1e38]" +unit="unit" +null="null" diff --git a/tests/fuzz/shadow-corrupt.c b/tests/fuzz/shadow-corrupt.c new file mode 100644 index 0000000..d5aa68f --- /dev/null +++ b/tests/fuzz/shadow-corrupt.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 2) return 0; + + int rc; + sqlite3 *db; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + // Build a valid table with 3 vectors (float[4] = 16 bytes each) + // [1,0,0,0], [0,-1,0,1], [1,1,0,1] as little-endian float32 hex + rc = sqlite3_exec(db, + "CREATE VIRTUAL TABLE v USING vec0(emb float[4]);" + "INSERT INTO v(rowid, emb) VALUES (1, X'0000803f000000000000000000000000');" + "INSERT INTO v(rowid, emb) VALUES (2, X'00000000000080bf000000000000803f');" + "INSERT INTO v(rowid, emb) VALUES (3, X'0000803f0000803f000000000000803f');", + NULL, NULL, NULL); + if (rc != SQLITE_OK) { + sqlite3_close(db); + return 0; + } + + // Use first byte to select corruption strategy + int target = data[0] % 6; + const uint8_t *payload = data + 1; + int payload_size = (int)(size - 1); + + sqlite3_stmt *stmt = NULL; + + switch (target) { + case 0: { + // Corrupt _chunks validity blob with fuzz data + rc = sqlite3_prepare_v2(db, + "UPDATE v_chunks SET validity = ? WHERE rowid = 1", -1, &stmt, NULL); + if (rc == SQLITE_OK) { + sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + } + break; + } + case 1: { + // Corrupt _chunks rowids blob with fuzz data + rc = sqlite3_prepare_v2(db, + "UPDATE v_chunks SET rowids = ? WHERE rowid = 1", -1, &stmt, NULL); + if (rc == SQLITE_OK) { + sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + } + break; + } + case 2: { + // Corrupt _vector_chunks00 vectors blob with fuzz data + rc = sqlite3_prepare_v2(db, + "UPDATE v_vector_chunks00 SET vectors = ? WHERE rowid = 1", -1, &stmt, NULL); + if (rc == SQLITE_OK) { + sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC); + sqlite3_step(stmt); + sqlite3_finalize(stmt); + } + break; + } + case 3: { + // Set validity to NULL (violates NOT NULL but shadow tables are writable) + sqlite3_exec(db, + "UPDATE v_chunks SET validity = NULL WHERE rowid = 1", + NULL, NULL, NULL); + break; + } + case 4: { + // Set rowids to NULL + sqlite3_exec(db, + "UPDATE v_chunks SET rowids = NULL WHERE rowid = 1", + NULL, NULL, NULL); + break; + } + case 5: { + // Delete shadow table rows entirely (orphan the virtual table data) + sqlite3_exec(db, + "DELETE FROM v_vector_chunks00 WHERE rowid = 1", + NULL, NULL, NULL); + break; + } + } + + // Exercise all read paths — NONE should crash + sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL); + sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 1", NULL, NULL, NULL); + sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 2", NULL, NULL, NULL); + sqlite3_exec(db, + "SELECT rowid, distance FROM v " + "WHERE emb MATCH X'0000803f000000000000000000000000' LIMIT 3", + NULL, NULL, NULL); + sqlite3_exec(db, "DELETE FROM v WHERE rowid = 2", NULL, NULL, NULL); + sqlite3_exec(db, + "INSERT INTO v(rowid, emb) VALUES (4, X'0000803f000000000000000000000000')", + NULL, NULL, NULL); + sqlite3_exec(db, "DROP TABLE v", NULL, NULL, NULL); + + sqlite3_close(db); + return 0; +} diff --git a/tests/fuzz/targets/.gitignore b/tests/fuzz/targets/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/tests/fuzz/targets/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/tests/fuzz/vec-each.c b/tests/fuzz/vec-each.c new file mode 100644 index 0000000..6ec3114 --- /dev/null +++ b/tests/fuzz/vec-each.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + int rc; + sqlite3 *db; + sqlite3_stmt *stmt = NULL; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + rc = sqlite3_prepare_v2(db, + "SELECT * FROM vec_each(?)", -1, &stmt, NULL); + assert(rc == SQLITE_OK); + + sqlite3_bind_blob(stmt, 1, data, (int)size, SQLITE_STATIC); + while (sqlite3_step(stmt) == SQLITE_ROW) { + // Consume all rows — just exercise the iteration path + } + + sqlite3_finalize(stmt); + sqlite3_close(db); + return 0; +} diff --git a/tests/fuzz/vec0-create-full.c b/tests/fuzz/vec0-create-full.c new file mode 100644 index 0000000..d385225 --- /dev/null +++ b/tests/fuzz/vec0-create-full.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + int rc; + sqlite3 *db; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + // Create table with fuzz input as column definitions + sqlite3_str *s = sqlite3_str_new(NULL); + assert(s); + sqlite3_str_appendall(s, "CREATE VIRTUAL TABLE v USING vec0("); + sqlite3_str_appendf(s, "%.*s", (int)size, data); + sqlite3_str_appendall(s, ")"); + char *zSql = sqlite3_str_finish(s); + assert(zSql); + + rc = sqlite3_exec(db, zSql, NULL, NULL, NULL); + sqlite3_free(zSql); + + if (rc == SQLITE_OK) { + // Table was created — try to use it. These may fail (errors are fine), + // but must never crash. + sqlite3_exec(db, "INSERT INTO v(rowid) VALUES (1)", NULL, NULL, NULL); + sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL); + sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 1", NULL, NULL, NULL); + sqlite3_exec(db, "DELETE FROM v WHERE rowid = 1", NULL, NULL, NULL); + sqlite3_exec(db, "DROP TABLE v", NULL, NULL, NULL); + } + + sqlite3_close(db); + return 0; +} diff --git a/tests/fuzz/vec0-operations.c b/tests/fuzz/vec0-operations.c new file mode 100644 index 0000000..f99537f --- /dev/null +++ b/tests/fuzz/vec0-operations.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include "sqlite-vec.h" +#include "sqlite3.h" +#include + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size < 6) return 0; + + int rc; + sqlite3 *db; + sqlite3_stmt *stmtInsert = NULL; + sqlite3_stmt *stmtDelete = NULL; + sqlite3_stmt *stmtKnn = NULL; + sqlite3_stmt *stmtScan = NULL; + + rc = sqlite3_open(":memory:", &db); + assert(rc == SQLITE_OK); + rc = sqlite3_vec_init(db, NULL, NULL); + assert(rc == SQLITE_OK); + + rc = sqlite3_exec(db, + "CREATE VIRTUAL TABLE v USING vec0(emb float[4])", + NULL, NULL, NULL); + if (rc != SQLITE_OK) { sqlite3_close(db); return 0; } + + sqlite3_prepare_v2(db, + "INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL); + sqlite3_prepare_v2(db, + "DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL); + sqlite3_prepare_v2(db, + "SELECT rowid, distance FROM v WHERE emb MATCH ? LIMIT 3", + -1, &stmtKnn, NULL); + sqlite3_prepare_v2(db, + "SELECT rowid FROM v", -1, &stmtScan, NULL); + + if (!stmtInsert || !stmtDelete || !stmtKnn || !stmtScan) goto cleanup; + + size_t i = 0; + while (i + 2 <= size) { + uint8_t op = data[i++] % 4; + uint8_t rowid_byte = data[i++]; + int64_t rowid = (int64_t)(rowid_byte % 32) + 1; + + switch (op) { + case 0: { + // INSERT: consume 16 bytes for 4 floats, or use what's left + float vec[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + for (int j = 0; j < 4 && i < size; j++, i++) { + vec[j] = (float)((int8_t)data[i]) / 10.0f; + } + sqlite3_reset(stmtInsert); + sqlite3_bind_int64(stmtInsert, 1, rowid); + sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT); + sqlite3_step(stmtInsert); + break; + } + case 1: { + // DELETE + sqlite3_reset(stmtDelete); + sqlite3_bind_int64(stmtDelete, 1, rowid); + sqlite3_step(stmtDelete); + break; + } + case 2: { + // KNN query with a fixed query vector + float qvec[4] = {1.0f, 0.0f, 0.0f, 0.0f}; + sqlite3_reset(stmtKnn); + sqlite3_bind_blob(stmtKnn, 1, qvec, sizeof(qvec), SQLITE_STATIC); + while (sqlite3_step(stmtKnn) == SQLITE_ROW) {} + break; + } + case 3: { + // Full scan + sqlite3_reset(stmtScan); + while (sqlite3_step(stmtScan) == SQLITE_ROW) {} + break; + } + } + } + + // Final operations — must not crash regardless of prior state + sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL); + +cleanup: + sqlite3_finalize(stmtInsert); + sqlite3_finalize(stmtDelete); + sqlite3_finalize(stmtKnn); + sqlite3_finalize(stmtScan); + sqlite3_close(db); + return 0; +}