Fix fuzzer-found bugs and CI build issues

- fuzz.yaml: embed rpath to Homebrew LLVM's libc++ so macOS binaries can
  find the right C++ runtime at load time (fixes dyld weak-def crash)
- fuzz.yaml: add `make sqlite-vec.h` step on all platforms before building
  fuzz targets (the header is generated from a template, not checked in)
- fuzz.yaml: drop llvm version pin on Windows so choco succeeds when a
  newer LLVM is already installed on the runner
- sqlite-vec.c: change fvec_cleanup / fvec_cleanup_noop to take void*
  instead of f32* so they are ABI-compatible with vector_cleanup; removes
  UBSAN indirect-call errors at many call sites
- sqlite-vec.c: copy BLOB data into sqlite3_malloc'd buffer in
  fvec_from_value instead of aliasing the raw blob pointer, fixing UBSAN
  misaligned-load errors when SQLite hands us an unaligned blob
- sqlite-vec.c: guard npy_token_next string scan with ptr < end check
  before the closing-quote dereference (heap-buffer-overflow)
- sqlite-vec.c: clamp vec_quantize_int8 intermediate value to [-128, 127]
  before casting to i8 (UBSAN out-of-range conversion)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alex Garcia 2026-03-03 07:16:33 -08:00
parent b1a02195d9
commit cdbc34785f
2 changed files with 21 additions and 8 deletions

View file

@ -69,7 +69,11 @@ jobs:
- name: Generate sqlite-vec.h
run: make sqlite-vec.h
- name: Build fuzz targets
run: make -C tests/fuzz all FUZZ_CC=/opt/homebrew/opt/llvm/bin/clang
run: |
LLVM=/opt/homebrew/opt/llvm
make -C tests/fuzz all \
FUZZ_CC=$LLVM/bin/clang \
FUZZ_LDFLAGS="-Wl,-ld_classic -L$LLVM/lib/c++ -Wl,-rpath,$LLVM/lib/c++"
- name: Run fuzz targets
env:
DYLD_LIBRARY_PATH: "/opt/homebrew/opt/llvm/lib/c++:${{ env.DYLD_LIBRARY_PATH }}"

View file

@ -692,9 +692,9 @@ char *type_name(int type) {
return "";
}
typedef void (*fvec_cleanup)(f32 *vector);
typedef void (*fvec_cleanup)(void *vector);
void fvec_cleanup_noop(f32 *_) { UNUSED_PARAMETER(_); }
void fvec_cleanup_noop(void *_) { UNUSED_PARAMETER(_); }
static int fvec_from_value(sqlite3_value *value, f32 **vector,
size_t *dimensions, fvec_cleanup *cleanup,
@ -714,9 +714,15 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
sizeof(f32), bytes);
return SQLITE_ERROR;
}
*vector = (f32 *)blob;
f32 *buf = sqlite3_malloc(bytes);
if (!buf) {
*pzErr = sqlite3_mprintf("out of memory");
return SQLITE_NOMEM;
}
memcpy(buf, blob, bytes);
*vector = buf;
*dimensions = bytes / sizeof(f32);
*cleanup = fvec_cleanup_noop;
*cleanup = sqlite3_free;
return SQLITE_OK;
}
@ -806,7 +812,7 @@ static int fvec_from_value(sqlite3_value *value, f32 **vector,
if (x.length > 0) {
*vector = (f32 *)x.z;
*dimensions = x.length;
*cleanup = (fvec_cleanup)sqlite3_free;
*cleanup = sqlite3_free;
return SQLITE_OK;
}
sqlite3_free(x.z);
@ -1458,7 +1464,10 @@ static void vec_quantize_int8(sqlite3_context *context, int argc,
}
f32 step = (1.0 - (-1.0)) / 255;
for (size_t i = 0; i < dimensions; i++) {
out[i] = ((srcVector[i] - (-1.0)) / step) - 128;
double val = ((srcVector[i] - (-1.0)) / step) - 128;
if (val > 127.0) val = 127.0;
if (val < -128.0) val = -128.0;
out[i] = (i8)val;
}
sqlite3_result_blob(context, out, dimensions * sizeof(i8), sqlite3_free);
@ -2718,7 +2727,7 @@ int npy_token_next(unsigned char *start, unsigned char *end,
}
ptr++;
}
if ((*ptr) != '\'') {
if (ptr >= end || (*ptr) != '\'') {
return VEC0_TOKEN_RESULT_ERROR;
}
out->start = start;