mirror of
https://github.com/asg017/sqlite-vec.git
synced 2026-04-25 08:46:49 +02:00
Add comprehensive fuzz testing infrastructure with 6 new targets
- Fix numpy.c: tautology bug (|| → &&), infinite loop, and missing sqlite3_vec_numpy_init call - Replace tests/fuzz/Makefile: auto-detect clang, add UBSAN, macOS ld_classic workaround, generic build rules for all 10 targets - Add 6 new fuzz targets: shadow-corrupt (corrupted shadow tables), vec0-operations (INSERT/DELETE/query sequences), scalar-functions (all 18 SQL scalar functions), vec0-create-full (CREATE + lifecycle), metadata-columns (metadata/auxiliary columns), vec-each (vec_each TVF) - Add seed corpora for shadow-corrupt, vec0-operations, exec, and json - Add fuzz-build/fuzz-quick/fuzz-long targets to root Makefile All 10 targets verified building and running on macOS ARM (Apple Silicon). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
9a6bf96b92
commit
a61d45183b
24 changed files with 600 additions and 47 deletions
33
Makefile
33
Makefile
|
|
@ -192,6 +192,39 @@ test-loadable-watch:
|
||||||
test-unit:
|
test-unit:
|
||||||
$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit
|
$(CC) -DSQLITE_CORE -DSQLITE_VEC_TEST tests/test-unit.c sqlite-vec.c vendor/sqlite3.c -I./ -Ivendor -o $(prefix)/test-unit && $(prefix)/test-unit
|
||||||
|
|
||||||
|
fuzz-build:
|
||||||
|
$(MAKE) -C tests/fuzz all
|
||||||
|
|
||||||
|
fuzz-quick: fuzz-build
|
||||||
|
@echo "Running all fuzz targets for 30 seconds each..."
|
||||||
|
@for target in tests/fuzz/targets/*; do \
|
||||||
|
[ -f "$$target" ] && [ -x "$$target" ] || continue; \
|
||||||
|
name=$$(basename $$target); \
|
||||||
|
echo "=== Fuzzing $$name ==="; \
|
||||||
|
corpus="tests/fuzz/corpus/$$name"; \
|
||||||
|
mkdir -p "$$corpus"; \
|
||||||
|
dict="tests/fuzz/$${name//_/-}.dict"; \
|
||||||
|
dict_flag=""; \
|
||||||
|
[ -f "$$dict" ] && dict_flag="-dict=$$dict"; \
|
||||||
|
"$$target" $$dict_flag \
|
||||||
|
-max_total_time=30 "$$corpus" 2>&1 || true; \
|
||||||
|
done
|
||||||
|
|
||||||
|
fuzz-long: fuzz-build
|
||||||
|
@echo "Running all fuzz targets for 5 minutes each..."
|
||||||
|
@for target in tests/fuzz/targets/*; do \
|
||||||
|
[ -f "$$target" ] && [ -x "$$target" ] || continue; \
|
||||||
|
name=$$(basename $$target); \
|
||||||
|
echo "=== Fuzzing $$name ==="; \
|
||||||
|
corpus="tests/fuzz/corpus/$$name"; \
|
||||||
|
mkdir -p "$$corpus"; \
|
||||||
|
dict="tests/fuzz/$${name//_/-}.dict"; \
|
||||||
|
dict_flag=""; \
|
||||||
|
[ -f "$$dict" ] && dict_flag="-dict=$$dict"; \
|
||||||
|
"$$target" $$dict_flag \
|
||||||
|
-max_total_time=300 "$$corpus" 2>&1 || true; \
|
||||||
|
done
|
||||||
|
|
||||||
site-dev:
|
site-dev:
|
||||||
npm --prefix site run dev
|
npm --prefix site run dev
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,48 +1,78 @@
|
||||||
|
# Auto-detect clang with libFuzzer support.
|
||||||
|
# Priority: Homebrew LLVM (macOS ARM) → Homebrew LLVM (macOS Intel) →
|
||||||
|
# versioned clang (Linux) → system clang
|
||||||
|
FUZZ_CC ?= $(shell \
|
||||||
|
if [ -x /opt/homebrew/opt/llvm/bin/clang ]; then \
|
||||||
|
echo "/opt/homebrew/opt/llvm/bin/clang"; \
|
||||||
|
elif [ -x /usr/local/opt/llvm/bin/clang ]; then \
|
||||||
|
echo "/usr/local/opt/llvm/bin/clang"; \
|
||||||
|
elif command -v clang-18 >/dev/null 2>&1; then \
|
||||||
|
echo "clang-18"; \
|
||||||
|
elif command -v clang-17 >/dev/null 2>&1; then \
|
||||||
|
echo "clang-17"; \
|
||||||
|
elif command -v clang >/dev/null 2>&1; then \
|
||||||
|
echo "clang"; \
|
||||||
|
else \
|
||||||
|
echo "FUZZ_CC_NOT_FOUND"; \
|
||||||
|
fi)
|
||||||
|
|
||||||
TARGET_DIR=./targets
|
# AddressSanitizer + UndefinedBehaviorSanitizer + libFuzzer.
|
||||||
|
# Override FUZZ_SANITIZERS to change (e.g., drop ubsan on Windows).
|
||||||
|
FUZZ_SANITIZERS ?= -fsanitize=address,undefined,fuzzer
|
||||||
|
|
||||||
|
# On macOS, Homebrew LLVM may need -Wl,-ld_classic to work with the system linker.
|
||||||
|
FUZZ_LDFLAGS ?= $(shell \
|
||||||
|
if [ "$$(uname -s)" = "Darwin" ]; then \
|
||||||
|
echo "-Wl,-ld_classic"; \
|
||||||
|
fi)
|
||||||
|
|
||||||
|
FUZZ_CFLAGS = $(FUZZ_SANITIZERS) -I ../../ -I ../../vendor -DSQLITE_CORE -g $(FUZZ_LDFLAGS)
|
||||||
|
FUZZ_SRCS = ../../vendor/sqlite3.c ../../sqlite-vec.c
|
||||||
|
|
||||||
|
TARGET_DIR = ./targets
|
||||||
|
|
||||||
$(TARGET_DIR):
|
$(TARGET_DIR):
|
||||||
mkdir -p $@
|
mkdir -p $@
|
||||||
|
|
||||||
# ASAN_OPTIONS=detect_leaks=1 ./fuzz_json -detect_leaks=1 '-trace_malloc=[12]' tmp
|
# Existing targets (filename uses -, Makefile target uses _)
|
||||||
$(TARGET_DIR)/json: json.c $(TARGET_DIR)
|
$(TARGET_DIR)/vec0_create: vec0-create.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
/opt/homebrew/opt/llvm/bin/clang \
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
-fsanitize=address,fuzzer \
|
|
||||||
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
|
|
||||||
../../vendor/sqlite3.c \
|
|
||||||
../../sqlite-vec.c \
|
|
||||||
$< \
|
|
||||||
-o $@
|
|
||||||
|
|
||||||
|
$(TARGET_DIR)/exec: exec.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
$(TARGET_DIR)/vec0_create: vec0-create.c ../../sqlite-vec.c $(TARGET_DIR)
|
$(TARGET_DIR)/json: json.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
/opt/homebrew/opt/llvm/bin/clang \
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
-fsanitize=address,fuzzer \
|
|
||||||
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
|
|
||||||
../../vendor/sqlite3.c \
|
|
||||||
../../sqlite-vec.c \
|
|
||||||
$< \
|
|
||||||
-o $@
|
|
||||||
|
|
||||||
$(TARGET_DIR)/numpy: numpy.c ../../sqlite-vec.c $(TARGET_DIR)
|
$(TARGET_DIR)/numpy: numpy.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
/opt/homebrew/opt/llvm/bin/clang \
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
-fsanitize=address,fuzzer \
|
|
||||||
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
|
|
||||||
../../vendor/sqlite3.c \
|
|
||||||
../../sqlite-vec.c \
|
|
||||||
$< \
|
|
||||||
-o $@
|
|
||||||
|
|
||||||
$(TARGET_DIR)/exec: exec.c ../../sqlite-vec.c $(TARGET_DIR)
|
# New targets
|
||||||
/opt/homebrew/opt/llvm/bin/clang \
|
$(TARGET_DIR)/shadow_corrupt: shadow-corrupt.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
-fsanitize=address,fuzzer \
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
|
|
||||||
../../vendor/sqlite3.c \
|
|
||||||
../../sqlite-vec.c \
|
|
||||||
$< \
|
|
||||||
-o $@
|
|
||||||
|
|
||||||
all: $(TARGET_DIR)/json $(TARGET_DIR)/numpy $(TARGET_DIR)/json $(TARGET_DIR)/exec
|
$(TARGET_DIR)/vec0_operations: vec0-operations.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
|
$(TARGET_DIR)/scalar_functions: scalar-functions.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
|
$(TARGET_DIR)/vec0_create_full: vec0-create-full.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
|
$(TARGET_DIR)/metadata_columns: metadata-columns.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
|
$(TARGET_DIR)/vec_each: vec-each.c $(FUZZ_SRCS) | $(TARGET_DIR)
|
||||||
|
$(FUZZ_CC) $(FUZZ_CFLAGS) $(FUZZ_SRCS) $< -o $@
|
||||||
|
|
||||||
|
FUZZ_TARGETS = vec0_create exec json numpy \
|
||||||
|
shadow_corrupt vec0_operations scalar_functions \
|
||||||
|
vec0_create_full metadata_columns vec_each
|
||||||
|
|
||||||
|
all: $(addprefix $(TARGET_DIR)/,$(FUZZ_TARGETS))
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf $(TARGET_DIR)/*
|
rm -rf $(TARGET_DIR)/*
|
||||||
|
|
||||||
|
.PHONY: all clean
|
||||||
|
|
|
||||||
1
tests/fuzz/corpus/exec/select1
Normal file
1
tests/fuzz/corpus/exec/select1
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
SELECT 1
|
||||||
1
tests/fuzz/corpus/exec/vec_version
Normal file
1
tests/fuzz/corpus/exec/vec_version
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
SELECT vec_version()
|
||||||
1
tests/fuzz/corpus/json/empty
Normal file
1
tests/fuzz/corpus/json/empty
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
[]
|
||||||
1
tests/fuzz/corpus/json/valid_2d
Normal file
1
tests/fuzz/corpus/json/valid_2d
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
[0.5, -0.5]
|
||||||
1
tests/fuzz/corpus/json/valid_4d
Normal file
1
tests/fuzz/corpus/json/valid_4d
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
[1.0, 2.0, 3.0, 4.0]
|
||||||
BIN
tests/fuzz/corpus/shadow-corrupt/target0
Normal file
BIN
tests/fuzz/corpus/shadow-corrupt/target0
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/shadow-corrupt/target1
Normal file
BIN
tests/fuzz/corpus/shadow-corrupt/target1
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/shadow-corrupt/target2
Normal file
BIN
tests/fuzz/corpus/shadow-corrupt/target2
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/shadow-corrupt/target3
Normal file
BIN
tests/fuzz/corpus/shadow-corrupt/target3
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/shadow-corrupt/target4
Normal file
BIN
tests/fuzz/corpus/shadow-corrupt/target4
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/shadow-corrupt/target5
Normal file
BIN
tests/fuzz/corpus/shadow-corrupt/target5
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/vec0-operations/ins_del_ins
Normal file
BIN
tests/fuzz/corpus/vec0-operations/ins_del_ins
Normal file
Binary file not shown.
BIN
tests/fuzz/corpus/vec0-operations/insert5
Normal file
BIN
tests/fuzz/corpus/vec0-operations/insert5
Normal file
Binary file not shown.
115
tests/fuzz/metadata-columns.c
Normal file
115
tests/fuzz/metadata-columns.c
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
if (size < 8) return 0;
|
||||||
|
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
rc = sqlite3_exec(db,
|
||||||
|
"CREATE VIRTUAL TABLE v USING vec0("
|
||||||
|
" emb float[4],"
|
||||||
|
" flag boolean metadata,"
|
||||||
|
" count integer metadata,"
|
||||||
|
" score float metadata,"
|
||||||
|
" label text metadata,"
|
||||||
|
" aux_data text auxiliary"
|
||||||
|
")", NULL, NULL, NULL);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
|
||||||
|
|
||||||
|
// Prepare statements for insert and query
|
||||||
|
sqlite3_stmt *stmtInsert = NULL;
|
||||||
|
sqlite3_stmt *stmtKnn = NULL;
|
||||||
|
sqlite3_stmt *stmtKnnFilter = NULL;
|
||||||
|
sqlite3_stmt *stmtDelete = NULL;
|
||||||
|
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"INSERT INTO v(rowid, emb, flag, count, score, label, aux_data) "
|
||||||
|
"VALUES (?, ?, ?, ?, ?, ?, ?)", -1, &stmtInsert, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"SELECT rowid, distance FROM v WHERE emb MATCH ? LIMIT 3",
|
||||||
|
-1, &stmtKnn, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"SELECT rowid, distance FROM v WHERE emb MATCH ? AND flag = 1 LIMIT 3",
|
||||||
|
-1, &stmtKnnFilter, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
|
||||||
|
|
||||||
|
if (!stmtInsert || !stmtKnn || !stmtKnnFilter || !stmtDelete) goto cleanup;
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
while (i + 6 <= size) {
|
||||||
|
uint8_t op = data[i++] % 4;
|
||||||
|
uint8_t rowid_byte = data[i++];
|
||||||
|
int64_t rowid = (int64_t)(rowid_byte % 50) + 1;
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case 0: {
|
||||||
|
// INSERT with fuzz-derived vector and metadata
|
||||||
|
float vec[4];
|
||||||
|
for (int j = 0; j < 4 && i < size; j++, i++) {
|
||||||
|
vec[j] = (float)((int8_t)data[i]) / 10.0f;
|
||||||
|
}
|
||||||
|
int flag_val = (i < size) ? data[i++] % 2 : 0;
|
||||||
|
int count_val = (i < size) ? (int)((int8_t)data[i++]) : 0;
|
||||||
|
float score_val = (i < size) ? (float)((int8_t)data[i++]) / 10.0f : 0.0f;
|
||||||
|
|
||||||
|
sqlite3_reset(stmtInsert);
|
||||||
|
sqlite3_bind_int64(stmtInsert, 1, rowid);
|
||||||
|
sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
|
||||||
|
sqlite3_bind_int(stmtInsert, 3, flag_val);
|
||||||
|
sqlite3_bind_int(stmtInsert, 4, count_val);
|
||||||
|
sqlite3_bind_double(stmtInsert, 5, (double)score_val);
|
||||||
|
sqlite3_bind_text(stmtInsert, 6, "label", -1, SQLITE_STATIC);
|
||||||
|
sqlite3_bind_text(stmtInsert, 7, "aux", -1, SQLITE_STATIC);
|
||||||
|
sqlite3_step(stmtInsert);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
// KNN query (no filter)
|
||||||
|
float qvec[4] = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
sqlite3_reset(stmtKnn);
|
||||||
|
sqlite3_bind_blob(stmtKnn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
|
||||||
|
while (sqlite3_step(stmtKnn) == SQLITE_ROW) {}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
// KNN query WITH metadata filter
|
||||||
|
float qvec[4] = {0.0f, 1.0f, 0.0f, 0.0f};
|
||||||
|
sqlite3_reset(stmtKnnFilter);
|
||||||
|
sqlite3_bind_blob(stmtKnnFilter, 1, qvec, sizeof(qvec), SQLITE_STATIC);
|
||||||
|
while (sqlite3_step(stmtKnnFilter) == SQLITE_ROW) {}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 3: {
|
||||||
|
// DELETE
|
||||||
|
sqlite3_reset(stmtDelete);
|
||||||
|
sqlite3_bind_int64(stmtDelete, 1, rowid);
|
||||||
|
sqlite3_step(stmtDelete);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
sqlite3_finalize(stmtInsert);
|
||||||
|
sqlite3_finalize(stmtKnn);
|
||||||
|
sqlite3_finalize(stmtKnnFilter);
|
||||||
|
sqlite3_finalize(stmtDelete);
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -8,6 +8,9 @@
|
||||||
#include "sqlite3.h"
|
#include "sqlite3.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
extern int sqlite3_vec_numpy_init(sqlite3 *db, char **pzErrMsg,
|
||||||
|
const sqlite3_api_routines *pApi);
|
||||||
|
|
||||||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
int rc = SQLITE_OK;
|
int rc = SQLITE_OK;
|
||||||
sqlite3 *db;
|
sqlite3 *db;
|
||||||
|
|
@ -17,25 +20,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
assert(rc == SQLITE_OK);
|
assert(rc == SQLITE_OK);
|
||||||
rc = sqlite3_vec_init(db, NULL, NULL);
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
assert(rc == SQLITE_OK);
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_numpy_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
rc = sqlite3_prepare_v2(db, "select * from vec_npy_each(?)", -1, &stmt, NULL);
|
rc = sqlite3_prepare_v2(db, "select * from vec_npy_each(?)", -1, &stmt, NULL);
|
||||||
assert(rc == SQLITE_OK);
|
assert(rc == SQLITE_OK);
|
||||||
sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC);
|
sqlite3_bind_blob(stmt, 1, data, size, SQLITE_STATIC);
|
||||||
rc = sqlite3_step(stmt);
|
rc = sqlite3_step(stmt);
|
||||||
if(rc != SQLITE_DONE || rc != SQLITE_ROW) {
|
while (rc == SQLITE_ROW) {
|
||||||
sqlite3_finalize(stmt);
|
rc = sqlite3_step(stmt);
|
||||||
sqlite3_close(db);
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
while(1) {
|
|
||||||
if(rc == SQLITE_DONE) break;
|
|
||||||
if(rc == SQLITE_ROW) continue;
|
|
||||||
sqlite3_finalize(stmt);
|
|
||||||
sqlite3_close(db);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
sqlite3_finalize(stmt);
|
sqlite3_finalize(stmt);
|
||||||
sqlite3_close(db);
|
sqlite3_close(db);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
81
tests/fuzz/scalar-functions.c
Normal file
81
tests/fuzz/scalar-functions.c
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
if (size < 2) return 0;
|
||||||
|
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
sqlite3_stmt *stmt = NULL;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
static const char *queries[] = {
|
||||||
|
"SELECT vec_f32(cast(? as text))", /* 0: JSON text -> f32 */
|
||||||
|
"SELECT vec_f32(?)", /* 1: blob -> f32 */
|
||||||
|
"SELECT vec_int8(?)", /* 2: blob -> int8 */
|
||||||
|
"SELECT vec_bit(?)", /* 3: blob -> bit */
|
||||||
|
"SELECT vec_length(?)", /* 4: vector length */
|
||||||
|
"SELECT vec_type(?)", /* 5: vector type string */
|
||||||
|
"SELECT vec_to_json(?)", /* 6: vector -> JSON */
|
||||||
|
"SELECT vec_normalize(?)", /* 7: normalize */
|
||||||
|
"SELECT vec_quantize_binary(?)", /* 8: quantize to binary */
|
||||||
|
"SELECT vec_quantize_int8(?, 'unit')", /* 9: quantize to int8 */
|
||||||
|
"SELECT vec_distance_l2(?, ?)", /* 10: L2 distance */
|
||||||
|
"SELECT vec_distance_cosine(?, ?)", /* 11: cosine distance */
|
||||||
|
"SELECT vec_distance_l1(?, ?)", /* 12: L1 distance */
|
||||||
|
"SELECT vec_distance_hamming(?, ?)", /* 13: hamming distance */
|
||||||
|
"SELECT vec_add(?, ?)", /* 14: vector add */
|
||||||
|
"SELECT vec_sub(?, ?)", /* 15: vector subtract */
|
||||||
|
"SELECT vec_slice(?, 0, ?)", /* 16: vector slice */
|
||||||
|
};
|
||||||
|
static const int nQueries = sizeof(queries) / sizeof(queries[0]);
|
||||||
|
|
||||||
|
int qIdx = data[0] % nQueries;
|
||||||
|
const uint8_t *payload = data + 1;
|
||||||
|
int payload_size = (int)(size - 1);
|
||||||
|
|
||||||
|
rc = sqlite3_prepare_v2(db, queries[qIdx], -1, &stmt, NULL);
|
||||||
|
if (rc != SQLITE_OK) {
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nParams = sqlite3_bind_parameter_count(stmt);
|
||||||
|
|
||||||
|
// Bind param 1: fuzz payload as blob
|
||||||
|
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
|
||||||
|
|
||||||
|
if (nParams >= 2) {
|
||||||
|
if (qIdx == 16) {
|
||||||
|
// vec_slice 3rd param is integer (end index)
|
||||||
|
int end_idx = (payload_size > 0) ? (payload[0] % 64) : 0;
|
||||||
|
sqlite3_bind_int(stmt, 2, end_idx);
|
||||||
|
} else {
|
||||||
|
// For 2-param functions (distance, add, sub): split payload in half
|
||||||
|
int half = payload_size / 2;
|
||||||
|
sqlite3_bind_blob(stmt, 2, payload + half,
|
||||||
|
payload_size - half, SQLITE_STATIC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nParams >= 3) {
|
||||||
|
// vec_slice: param 3 is the end index
|
||||||
|
int end_idx = (payload_size > 1) ? (payload[1] % 64) : 0;
|
||||||
|
sqlite3_bind_int(stmt, 3, end_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
8
tests/fuzz/scalar-functions.dict
Normal file
8
tests/fuzz/scalar-functions.dict
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
json_vec1="[1.0, 2.0, 3.0, 4.0]"
|
||||||
|
json_vec2="[0.5, -0.5]"
|
||||||
|
json_empty="[]"
|
||||||
|
json_nan="[NaN]"
|
||||||
|
json_inf="[Infinity]"
|
||||||
|
json_large="[1e38, -1e38]"
|
||||||
|
unit="unit"
|
||||||
|
null="null"
|
||||||
114
tests/fuzz/shadow-corrupt.c
Normal file
114
tests/fuzz/shadow-corrupt.c
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
if (size < 2) return 0;
|
||||||
|
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
// Build a valid table with 3 vectors (float[4] = 16 bytes each)
|
||||||
|
// [1,0,0,0], [0,-1,0,1], [1,1,0,1] as little-endian float32 hex
|
||||||
|
rc = sqlite3_exec(db,
|
||||||
|
"CREATE VIRTUAL TABLE v USING vec0(emb float[4]);"
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (1, X'0000803f000000000000000000000000');"
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (2, X'00000000000080bf000000000000803f');"
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (3, X'0000803f0000803f000000000000803f');",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
if (rc != SQLITE_OK) {
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use first byte to select corruption strategy
|
||||||
|
int target = data[0] % 6;
|
||||||
|
const uint8_t *payload = data + 1;
|
||||||
|
int payload_size = (int)(size - 1);
|
||||||
|
|
||||||
|
sqlite3_stmt *stmt = NULL;
|
||||||
|
|
||||||
|
switch (target) {
|
||||||
|
case 0: {
|
||||||
|
// Corrupt _chunks validity blob with fuzz data
|
||||||
|
rc = sqlite3_prepare_v2(db,
|
||||||
|
"UPDATE v_chunks SET validity = ? WHERE rowid = 1", -1, &stmt, NULL);
|
||||||
|
if (rc == SQLITE_OK) {
|
||||||
|
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
|
||||||
|
sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
// Corrupt _chunks rowids blob with fuzz data
|
||||||
|
rc = sqlite3_prepare_v2(db,
|
||||||
|
"UPDATE v_chunks SET rowids = ? WHERE rowid = 1", -1, &stmt, NULL);
|
||||||
|
if (rc == SQLITE_OK) {
|
||||||
|
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
|
||||||
|
sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
// Corrupt _vector_chunks00 vectors blob with fuzz data
|
||||||
|
rc = sqlite3_prepare_v2(db,
|
||||||
|
"UPDATE v_vector_chunks00 SET vectors = ? WHERE rowid = 1", -1, &stmt, NULL);
|
||||||
|
if (rc == SQLITE_OK) {
|
||||||
|
sqlite3_bind_blob(stmt, 1, payload, payload_size, SQLITE_STATIC);
|
||||||
|
sqlite3_step(stmt);
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 3: {
|
||||||
|
// Set validity to NULL (violates NOT NULL but shadow tables are writable)
|
||||||
|
sqlite3_exec(db,
|
||||||
|
"UPDATE v_chunks SET validity = NULL WHERE rowid = 1",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 4: {
|
||||||
|
// Set rowids to NULL
|
||||||
|
sqlite3_exec(db,
|
||||||
|
"UPDATE v_chunks SET rowids = NULL WHERE rowid = 1",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 5: {
|
||||||
|
// Delete shadow table rows entirely (orphan the virtual table data)
|
||||||
|
sqlite3_exec(db,
|
||||||
|
"DELETE FROM v_vector_chunks00 WHERE rowid = 1",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exercise all read paths — NONE should crash
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 1", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 2", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db,
|
||||||
|
"SELECT rowid, distance FROM v "
|
||||||
|
"WHERE emb MATCH X'0000803f000000000000000000000000' LIMIT 3",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "DELETE FROM v WHERE rowid = 2", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db,
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (4, X'0000803f000000000000000000000000')",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "DROP TABLE v", NULL, NULL, NULL);
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
2
tests/fuzz/targets/.gitignore
vendored
Normal file
2
tests/fuzz/targets/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
32
tests/fuzz/vec-each.c
Normal file
32
tests/fuzz/vec-each.c
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
sqlite3_stmt *stmt = NULL;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
rc = sqlite3_prepare_v2(db,
|
||||||
|
"SELECT * FROM vec_each(?)", -1, &stmt, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
sqlite3_bind_blob(stmt, 1, data, (int)size, SQLITE_STATIC);
|
||||||
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
||||||
|
// Consume all rows — just exercise the iteration path
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_finalize(stmt);
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
43
tests/fuzz/vec0-create-full.c
Normal file
43
tests/fuzz/vec0-create-full.c
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
// Create table with fuzz input as column definitions
|
||||||
|
sqlite3_str *s = sqlite3_str_new(NULL);
|
||||||
|
assert(s);
|
||||||
|
sqlite3_str_appendall(s, "CREATE VIRTUAL TABLE v USING vec0(");
|
||||||
|
sqlite3_str_appendf(s, "%.*s", (int)size, data);
|
||||||
|
sqlite3_str_appendall(s, ")");
|
||||||
|
char *zSql = sqlite3_str_finish(s);
|
||||||
|
assert(zSql);
|
||||||
|
|
||||||
|
rc = sqlite3_exec(db, zSql, NULL, NULL, NULL);
|
||||||
|
sqlite3_free(zSql);
|
||||||
|
|
||||||
|
if (rc == SQLITE_OK) {
|
||||||
|
// Table was created — try to use it. These may fail (errors are fine),
|
||||||
|
// but must never crash.
|
||||||
|
sqlite3_exec(db, "INSERT INTO v(rowid) VALUES (1)", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v WHERE rowid = 1", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "DELETE FROM v WHERE rowid = 1", NULL, NULL, NULL);
|
||||||
|
sqlite3_exec(db, "DROP TABLE v", NULL, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
95
tests/fuzz/vec0-operations.c
Normal file
95
tests/fuzz/vec0-operations.c
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "sqlite-vec.h"
|
||||||
|
#include "sqlite3.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||||
|
if (size < 6) return 0;
|
||||||
|
|
||||||
|
int rc;
|
||||||
|
sqlite3 *db;
|
||||||
|
sqlite3_stmt *stmtInsert = NULL;
|
||||||
|
sqlite3_stmt *stmtDelete = NULL;
|
||||||
|
sqlite3_stmt *stmtKnn = NULL;
|
||||||
|
sqlite3_stmt *stmtScan = NULL;
|
||||||
|
|
||||||
|
rc = sqlite3_open(":memory:", &db);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
rc = sqlite3_vec_init(db, NULL, NULL);
|
||||||
|
assert(rc == SQLITE_OK);
|
||||||
|
|
||||||
|
rc = sqlite3_exec(db,
|
||||||
|
"CREATE VIRTUAL TABLE v USING vec0(emb float[4])",
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
if (rc != SQLITE_OK) { sqlite3_close(db); return 0; }
|
||||||
|
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"INSERT INTO v(rowid, emb) VALUES (?, ?)", -1, &stmtInsert, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"DELETE FROM v WHERE rowid = ?", -1, &stmtDelete, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"SELECT rowid, distance FROM v WHERE emb MATCH ? LIMIT 3",
|
||||||
|
-1, &stmtKnn, NULL);
|
||||||
|
sqlite3_prepare_v2(db,
|
||||||
|
"SELECT rowid FROM v", -1, &stmtScan, NULL);
|
||||||
|
|
||||||
|
if (!stmtInsert || !stmtDelete || !stmtKnn || !stmtScan) goto cleanup;
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
while (i + 2 <= size) {
|
||||||
|
uint8_t op = data[i++] % 4;
|
||||||
|
uint8_t rowid_byte = data[i++];
|
||||||
|
int64_t rowid = (int64_t)(rowid_byte % 32) + 1;
|
||||||
|
|
||||||
|
switch (op) {
|
||||||
|
case 0: {
|
||||||
|
// INSERT: consume 16 bytes for 4 floats, or use what's left
|
||||||
|
float vec[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
for (int j = 0; j < 4 && i < size; j++, i++) {
|
||||||
|
vec[j] = (float)((int8_t)data[i]) / 10.0f;
|
||||||
|
}
|
||||||
|
sqlite3_reset(stmtInsert);
|
||||||
|
sqlite3_bind_int64(stmtInsert, 1, rowid);
|
||||||
|
sqlite3_bind_blob(stmtInsert, 2, vec, sizeof(vec), SQLITE_TRANSIENT);
|
||||||
|
sqlite3_step(stmtInsert);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
// DELETE
|
||||||
|
sqlite3_reset(stmtDelete);
|
||||||
|
sqlite3_bind_int64(stmtDelete, 1, rowid);
|
||||||
|
sqlite3_step(stmtDelete);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
// KNN query with a fixed query vector
|
||||||
|
float qvec[4] = {1.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
sqlite3_reset(stmtKnn);
|
||||||
|
sqlite3_bind_blob(stmtKnn, 1, qvec, sizeof(qvec), SQLITE_STATIC);
|
||||||
|
while (sqlite3_step(stmtKnn) == SQLITE_ROW) {}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 3: {
|
||||||
|
// Full scan
|
||||||
|
sqlite3_reset(stmtScan);
|
||||||
|
while (sqlite3_step(stmtScan) == SQLITE_ROW) {}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final operations — must not crash regardless of prior state
|
||||||
|
sqlite3_exec(db, "SELECT * FROM v", NULL, NULL, NULL);
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
sqlite3_finalize(stmtInsert);
|
||||||
|
sqlite3_finalize(stmtDelete);
|
||||||
|
sqlite3_finalize(stmtKnn);
|
||||||
|
sqlite3_finalize(stmtScan);
|
||||||
|
sqlite3_close(db);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue