sqlite-vec/.github/workflows/fuzz.yaml
Alex Garcia cdbc34785f Fix fuzzer-found bugs and CI build issues
- fuzz.yaml: embed rpath to Homebrew LLVM's libc++ so macOS binaries can
  find the right C++ runtime at load time (fixes dyld weak-def crash)
- fuzz.yaml: add `make sqlite-vec.h` step on all platforms before building
  fuzz targets (the header is generated from a template, not checked in)
- fuzz.yaml: drop llvm version pin on Windows so choco succeeds when a
  newer LLVM is already installed on the runner
- sqlite-vec.c: change fvec_cleanup / fvec_cleanup_noop to take void*
  instead of f32* so they are ABI-compatible with vector_cleanup; removes
  UBSAN indirect-call errors at many call sites
- sqlite-vec.c: copy BLOB data into sqlite3_malloc'd buffer in
  fvec_from_value instead of aliasing the raw blob pointer, fixing UBSAN
  misaligned-load errors when SQLite hands us an unaligned blob
- sqlite-vec.c: guard npy_token_next string scan with ptr < end check
  before the closing-quote dereference (heap-buffer-overflow)
- sqlite-vec.c: clamp vec_quantize_int8 intermediate value to [-128, 127]
  before casting to i8 (UBSAN out-of-range conversion)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-03 07:16:33 -08:00

163 lines
5.4 KiB
YAML

name: "Fuzz"
on:
push:
branches: [main]
schedule:
# Nightly at 2am UTC for longer fuzzing sessions
- cron: "0 2 * * *"
workflow_dispatch:
inputs:
duration:
description: "Fuzz duration per target (seconds)"
default: "60"
permissions:
contents: read
jobs:
fuzz-linux:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install LLVM 18
run: |
wget -qO- https://apt.llvm.org/llvm.sh | sudo bash -s -- 18
echo "FUZZ_CC=clang-18" >> $GITHUB_ENV
- run: ./scripts/vendor.sh
- name: Generate sqlite-vec.h
run: make sqlite-vec.h
- name: Build fuzz targets
run: make -C tests/fuzz all FUZZ_CC=$FUZZ_CC FUZZ_LDFLAGS=
- name: Run fuzz targets
run: |
DURATION=${{ github.event.inputs.duration || '60' }}
EXIT_CODE=0
for target in tests/fuzz/targets/*; do
[ -f "$target" ] && [ -x "$target" ] || continue
name=$(basename "$target")
echo "::group::Fuzzing $name ($DURATION seconds)"
corpus="tests/fuzz/corpus/$name"
mkdir -p "$corpus"
dict="tests/fuzz/${name//_/-}.dict"
dict_flag=""
[ -f "$dict" ] && dict_flag="-dict=$dict"
if ! ASAN_OPTIONS=detect_leaks=1 "$target" $dict_flag \
-max_total_time="$DURATION" "$corpus" 2>&1; then
echo "::error::Fuzz target $name found a crash!"
EXIT_CODE=1
fi
echo "::endgroup::"
done
exit $EXIT_CODE
- name: Upload crash artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: fuzz-crashes-linux
path: |
tests/fuzz/crash-*
tests/fuzz/leak-*
tests/fuzz/timeout-*
fuzz-macos:
runs-on: macos-14
steps:
- uses: actions/checkout@v4
- name: Install LLVM
run: brew install llvm
- run: ./scripts/vendor.sh
- name: Generate sqlite-vec.h
run: make sqlite-vec.h
- name: Build fuzz targets
run: |
LLVM=/opt/homebrew/opt/llvm
make -C tests/fuzz all \
FUZZ_CC=$LLVM/bin/clang \
FUZZ_LDFLAGS="-Wl,-ld_classic -L$LLVM/lib/c++ -Wl,-rpath,$LLVM/lib/c++"
- name: Run fuzz targets
env:
DYLD_LIBRARY_PATH: "/opt/homebrew/opt/llvm/lib/c++:${{ env.DYLD_LIBRARY_PATH }}"
run: |
DURATION=${{ github.event.inputs.duration || '60' }}
EXIT_CODE=0
for target in tests/fuzz/targets/*; do
[ -f "$target" ] && [ -x "$target" ] || continue
name=$(basename "$target")
echo "::group::Fuzzing $name ($DURATION seconds)"
corpus="tests/fuzz/corpus/$name"
mkdir -p "$corpus"
dict="tests/fuzz/${name//_/-}.dict"
dict_flag=""
[ -f "$dict" ] && dict_flag="-dict=$dict"
if ! "$target" $dict_flag \
-max_total_time="$DURATION" "$corpus" 2>&1; then
echo "::error::Fuzz target $name found a crash!"
EXIT_CODE=1
fi
echo "::endgroup::"
done
exit $EXIT_CODE
- name: Upload crash artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: fuzz-crashes-macos
path: |
tests/fuzz/crash-*
tests/fuzz/leak-*
tests/fuzz/timeout-*
fuzz-windows:
# Best-effort: libFuzzer works on Windows via LLVM but ASAN/UBSAN
# support is less reliable. Leak detection is not available.
runs-on: windows-2022
continue-on-error: true
steps:
- uses: actions/checkout@v4
- name: Install LLVM
run: choco install llvm -y
- run: bash ./scripts/vendor.sh
shell: bash
- name: Generate sqlite-vec.h
shell: bash
run: make sqlite-vec.h
- name: Build fuzz targets
shell: bash
run: |
export PATH="/c/Program Files/LLVM/bin:$PATH"
cd tests/fuzz
mkdir -p targets
for src in *.c; do
name="${src%.c}"
target_name="${name//-/_}"
echo "Building $target_name from $src"
clang -fsanitize=address,fuzzer \
-I ../../ -I ../../vendor -DSQLITE_CORE -g \
../../vendor/sqlite3.c ../../sqlite-vec.c \
"$src" -o "targets/${target_name}.exe" || {
echo "Warning: failed to build $target_name (best-effort)"
}
done
- name: Run fuzz targets
shell: bash
run: |
export PATH="/c/Program Files/LLVM/bin:$PATH"
DURATION=${{ github.event.inputs.duration || '60' }}
for target in tests/fuzz/targets/*.exe; do
[ -f "$target" ] || continue
name=$(basename "$target" .exe)
echo "=== Fuzzing $name ($DURATION seconds) ==="
corpus="tests/fuzz/corpus/$name"
mkdir -p "$corpus"
"$target" -max_total_time="$DURATION" "$corpus" 2>&1 || {
echo "Warning: $name found an issue or failed"
}
done
- name: Upload crash artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: fuzz-crashes-windows
path: |
tests/fuzz/crash-*
tests/fuzz/leak-*